In [3]:
import pandas as pd
import json
import numpy as np
import pickle
from sklearn.linear_model import LinearRegression

In [4]:
with open('../../data/close_prices.jsons', 'r') as f:
    inputs = pd.DataFrame([json.loads(line) for line in f])

In [5]:
inputs.head()

Unnamed: 0,Close,Date
0,1536.339966,20968.0
1,1539.180054,20971.0
2,1530.949951,20972.0
3,1517.380005,20973.0
4,1490.719971,20974.0


In [6]:
close_prices = np.array(inputs['Close'])

We'll train an autoregressor that uses the last 15 days of prices to predict the next day's closing price.

In [7]:
def make_data(inputs, window_size = 15, size = 500):
    X, y = np.vstack( inputs[i:i+window_size] for i in range(0, size) ), np.vstack( inputs[i+window_size: i + window_size + 1] for i in range(0, size) )
    return X, y

Two sets of training data: first 1000 days, and then days 1000 through 2000.

In [8]:
X1, y1 = make_data(close_prices[0:1000])
X2, y2 = make_data(close_prices[1000:2000])

Model "version 1"

In [9]:
lr = LinearRegression()

In [10]:
lr = lr.fit(X1, y1)

In [11]:
lr.score(X1, y1)

0.9931060961417425

Get a reasonable R2 value of 0.9845 on the out of sample data.

In [12]:
lr.score(X2, y2)

0.98452029509333705

Save it to a file.

In [13]:
with open('lr_pickle1.pkl', 'wb') as f:
    pickle.dump(lr, f)

Now we'll train it again on our out-of-sample data. This should be guaranteed to be a better fit.

In [14]:
lr = lr.fit(X2, y2)

In [15]:
lr.score(X2, y2)

0.98662812694287827

And indeed it is, by a smidge: 0.98663 to 0.98452.

In [16]:
with open('lr_pickle2.pkl', 'wb') as f:
    pickle.dump(lr, f)

Deployment in FastScore.

In [17]:
from fastscoredeploy import ipmagic

In [18]:
%%schema close_price
{
    "type":"record",
    "name":"close_price",
    "fields":[
        {"type":"double", "name":"Date"},
        {"type":"double", "name":"Close"}
    ]
}

Schema loaded and bound to close_price variable


In [19]:
%%schema tagged_double
{
    "type":"record",
    "name":"tagged_double",
    "fields":[
        {"type":"string", "name":"name"},
        {"type":"double", "name":"value"}
    ]
}

Schema loaded and bound to tagged_double variable


In [20]:
%%model lr_model

# fastscore.schema.0: close_price
# fastscore.schema.1: tagged_double

import numpy as np
import pickle
from sklearn.linear_model import LinearRegression


def begin():
    global lr
    global window, window_size
    window = []
    window_size = 15
    with open('lr_pickle1.pkl', 'rb') as f:
        lr = pickle.load(f)

def action(x):
    global window, window_size
    x = x['Close']
    window = window[1-window_size:] + [x]
    if len(window) < window_size:
        yield {"name": "price", "value":x}
    else:
        X = np.array([window])
        y = lr.predict(X)
        yield {"name":"price", "value": y[0,0]}


Model loaded and bound to lr_model variable.


In [21]:
lr_model.score(inputs.to_dict(orient='records')[0:5])

[{'name': 'price', 'value': 1536.339966},
 {'name': 'price', 'value': 1539.1800539999999},
 {'name': 'price', 'value': 1530.9499510000001},
 {'name': 'price', 'value': 1517.380005},
 {'name': 'price', 'value': 1490.719971}]

In [22]:
from fastscoredeploy.suite import Connect

c = Connect('https://dashboard:8000')
mm = c.lookup('model-manage')
eng = c.lookup('engine')

In [24]:
lr_model.update(model_manage=mm)

True

In [25]:
from fastscore.attachment import Attachment

att = Attachment('att', datafile='lr_pickle1.tar.gz')
att.upload(lr_model)

In [26]:
lr_model.deploy(eng)

In [27]:
eng.score(inputs.to_dict(orient='records')[0:25])

[{'name': 'price', 'value': 1536.339966},
 {'name': 'price', 'value': 1539.180054},
 {'name': 'price', 'value': 1530.949951},
 {'name': 'price', 'value': 1517.380005},
 {'name': 'price', 'value': 1490.719971},
 {'name': 'price', 'value': 1507.670044},
 {'name': 'price', 'value': 1509.119995},
 {'name': 'price', 'value': 1493.0},
 {'name': 'price', 'value': 1515.670044},
 {'name': 'price', 'value': 1522.969971},
 {'name': 'price', 'value': 1532.910034},
 {'name': 'price', 'value': 1531.050049},
 {'name': 'price', 'value': 1533.699951},
 {'name': 'price', 'value': 1512.839966},
 {'name': 'price', 'value': 1520.2010890340941},
 {'name': 'price', 'value': 1499.109418056728},
 {'name': 'price', 'value': 1500.3373565964112},
 {'name': 'price', 'value': 1493.5678127290635},
 {'name': 'price', 'value': 1501.4576692651551},
 {'name': 'price', 'value': 1504.4839249005067},
 {'name': 'price', 'value': 1501.5688830062377},
 {'name': 'price', 'value': 1516.1651036901176},
 {'name': 'price', 'value'

In [25]:
eng.reset()

Model works, let's save what we made to files.

In [None]:
with open('lr_model.py3', 'w') as f:
    f.write(lr_model.source)

with open('close_price.avsc', 'w') as f:
    f.write(json.dumps(close_price.source))

with open('tagged_double.avsc', 'w') as f:
    f.write(json.dumps(tagged_double.source))