In [1]:
import numpy as np
import pickle
from sklearn import linear_model, metrics
import scipy as sp

In [2]:
def load_object(file_name):
    """load the pickled object"""
    with open(file_name, 'rb') as f:
        return pickle.load(f)

def view_data(data_path):
    data = load_object(data_path)
    prices = data['prices']
    names = data['features']['names']
    features = data['features']['values']
    print(prices.shape)
    print(names)
    print(features.shape)
    return prices, features

In [3]:
class Strategy():
    def __init__(self):
        pass

    def handle_update(self, inx, price, factors):
        """Put your logic here
        Args:
            inx: zero-based inx in days
            price: [num_assets, ]
            factors: [num_assets, num_factors]
        Return:
            allocation: [num_assets, ]
        """
        assert price.shape[0] == factors.shape[0]
        return np.array([1.0] * price.shape[0])

In [4]:
# load data
data = load_object("data/C3_train.pkl")
prices = data['prices']
names = data['features']['names']
features = data['features']['values']
y = returns = np.array([[(prices[i][j]-prices[i-1][j])/prices[i-1][j] for i in range(1, 757)] for j in range(680)])
x = np.array([[[features[j][k][i] for i in range(10)] for j in range(0, 756)] for k in range(680)])

In [5]:
# training/testing data
x_train, x_test = x[:, :504, :], x[:, 504:, :]
y_train, y_test = y[:, :504], y[:, 504:]

In [6]:
# OLS models
def train_models(x, y):
    models = []
    for i in range(680):
        model = linear_model.LinearRegression(normalize=True)
        model.fit(x[i], y[i])
        models.append(model)
    return models

def test_models(models, x, y):
    for i in range(680):
        model = models[i]
        pred = model.predict(x[i])
        print("mae:", metrics.mean_absolute_error(pred, y[i])*100)

In [7]:
OLS_models = train_models(x_train, y_train)
test_models(OLS_models, x_test, y_test)

mae: 1.621455712232172
mae: 0.8049883682105222
mae: 0.49983220446904236
mae: 1.3447325710923939
mae: 1.6712593788426267
mae: 0.6662579398563382
mae: 0.7171966775796527
mae: 0.736636043185221
mae: 0.7079627115113113
mae: 1.1224539882559301
mae: 1.0111232177109113
mae: 1.7466894063512408
mae: 4.595267348795535
mae: 0.8922713903433317
mae: 0.7618235938632036
mae: 1.3646004451166254
mae: 0.7885655666618769
mae: 2.8536123299407796
mae: 2.248013476465086
mae: 0.7199385720259578
mae: 1.2080794928457594
mae: 2.336046893276485
mae: 1.3741725044276465
mae: 1.2235358649086503
mae: 1.4104672248961636
mae: 3.7813351309686207
mae: 0.41432917845233513
mae: 1.02506644699791
mae: 1.853588373386617
mae: 1.0556371990142537
mae: 1.2882053276890733
mae: 1.0731860874153303
mae: 1.1629909242518797
mae: 0.989429607938692
mae: 1.8482343807699273
mae: 0.4529744611000146
mae: 0.6534559346708759
mae: 3.2461688851233927
mae: 2.5963896602824112
mae: 0.6974868244474053
mae: 3.5161760996694666
mae: 0.7533971789992198

mae: 2.626244027020146
mae: 9.501675671328
mae: 3.822378943188287
mae: 1.415841030134188
mae: 2.1433307690559995
mae: 1.0447073774522582
mae: 0.3452420034563937
mae: 0.8528563791558447
mae: 1.3026015488979978
mae: 0.49087939338524966
mae: 2.8475893302345225
mae: 2.208187070282473
mae: 1.701196547562121
mae: 1.6343787199118966
mae: 1.1382163302769424
mae: 1.0428702178303377
mae: 0.7750659520402466
mae: 1.2021214103685738
mae: 2.3708081412145408
mae: 1.0935992576015814
mae: 1.595355656694713
mae: 0.6464152531348107
mae: 1.3000928030776173
mae: 3.2330885958145164
mae: 0.962668714753968
mae: 1.4726169178414052
mae: 1.6978140823959038
mae: 3.1421273175781104
mae: 1.5690521326232036
mae: 1.3010602985784576
mae: 1.203143656114393
mae: 1.427187195478318
mae: 1.408238784171837
mae: 0.6030666384575469
mae: 1.143989193314527
mae: 1.0795566879722918
mae: 2.46087471341686
mae: 1.734078709217171
mae: 1.0498344643038722
mae: 1.341560617425642
mae: 1.0362902715571296
mae: 0.6935911953351583
mae: 0.744

In [11]:
# Bayesian models
def train_models(x, y):
    # prior param
    mu_0 = np.zeros(x.shape[-1])
    lambda_0 = np.identity(x.shape[-1])
    eta_0 = x.shape[-1]+1
    s_0 = np.identity(x.shape[-1])
    nu_0 = 2
    sigma_sq_0 = 1
    
    # sampler
    beta_0_samples, sigma_0_samples = [], []
    beta_j_samples, sigma_sq_samples = [], []

    # initial values
    beta_0_n = np.transpose(np.random.multivariate_normal(mu_0, lambda_0))
    sigma_0_n = np.linalg.inv(sp.stats.wishart.rvs(eta_0, s_0))
    beta_j_n = [np.transpose(np.random.multivariate_normal(beta_0_n, sigma_0_n)) for j in range(680)]
    sigma_sq_n = 1/np.random.gamma(nu_0/2, nu_0*sigma_sq_0/2)

    beta_0_samples.append(beta_0_n)
    sigma_0_samples.append(sigma_0_n)
    beta_j_samples.append(beta_j_n)
    sigma_sq_samples.append(sigma_sq_n)

    for i in range(2, 5000):
        # beta_j
        beta_j_n = []
        for j in range(680):
            vbeta_j = np.linalg.inv(np.linalg.inv(sigma_0_n) + np.matmul(np.transpose(x[j]), x[j])/sigma_sq_n)
            ebeta_j = np.matmul(vbeta_j, np.matmul(np.linalg.inv(sigma_0_n), beta_0_n) + np.matmul(np.transpose(x[j]), y[j])/sigma_sq_n)
            beta_j_n.append(np.transpose(np.random.multivariate_normal(ebeta_j, vbeta_j)))
        beta_j_samples.append(beta_j_n)
        
        # sigma_sq
        nu_n = nu_0 + 680*x.shape[1]
        ss = nu_0*sigma_sq_0
        for j in range(680):
            ss += sum((y[j] - np.matmul(x[j], beta_j_n[j]))**2)
        sigma_sq_n = 1/np.random.gamma(nu_n/2, ss/2)
        sigma_sq_samples.append(sigma_sq_n)
        
        # beta_0
        vbeta_0 = np.linalg.inv(np.linalg.inv(lambda_0) + 680*np.linalg.inv(sigma_0_n))
        ebeta_0 = np.matmul(vbeta_0, np.matmul(np.linalg.inv(lambda_0), mu_0) + np.matmul(np.linalg.inv(sigma_0_n), np.sum(beta_j_n, axis=0)))
        beta_0_n = np.transpose(np.random.multivariate_normal(ebeta_0, vbeta_0))
        beta_0_samples.append(beta_0_n)
        
        # sigma_0
        esigma_0 = eta_0 + 680
        s_beta_0 = np.zeros((10, 10))
        for j in range(680):
            s_beta_0 = np.add(s_beta_0, np.matmul(beta_j_n[j] - beta_0_n, np.transpose(beta_j_n[j] - beta_0_n)))
        ss = s_0 + s_beta_0
        sigma_0_n = np.linalg.inv(sp.stats.wishart.rvs(esigma_0, np.linalg.inv(ss)))
        sigma_0_samples.append(sigma_0_n)
    
    models = []
    for n in range(680):
        model = beta_j_samples[2500][n]
        for j in range(2501, len(beta_j_samples)):
            model = np.add(model, beta_j_samples[j][n])
        model = model/2500
        models.append(model)
    
    return models

def test_models(models, x, y):
    for i in range(680):
        model = models[i]
        pred = [np.dot(model, x[i][j]) for j in range(len(x[i]))]
        print("mae:", metrics.mean_absolute_error(pred, y[i])*100)

In [12]:
Bayesian_models = train_models(x_train, y_train)
test_models(Bayesian_models, x_test, y_test)

mae: 1.2949991517120503
mae: 0.802296577756189
mae: 0.49912217832893113
mae: 1.287997143784044
mae: 1.740871871057142
mae: 0.6645405886957837
mae: 0.7535025049795023
mae: 0.7479215041255766
mae: 0.688574324259841
mae: 1.0930110949480538
mae: 0.7155095705372563
mae: 1.7623056759450009
mae: 4.530128510327625
mae: 0.8925540356991881
mae: 0.7638840247797757
mae: 1.3534604555815268
mae: 0.7819043669987166
mae: 2.2389005584954043
mae: 2.067658602158104
mae: 0.6982930834430289
mae: 1.2026045449557325
mae: 2.214049066827282
mae: 1.1850483391022257
mae: 1.3335349927846751
mae: 1.4027173658985763
mae: 3.5867793940392882
mae: 0.4142217441543213
mae: 0.9465166562543574
mae: 1.8152022143345807
mae: 0.8960892382788814
mae: 1.678089563705087
mae: 0.9595509570920816
mae: 1.1993864606029998
mae: 0.9927792617327031
mae: 1.6683900728477832
mae: 0.4535371742915203
mae: 0.637327002756102
mae: 3.21971169587182
mae: 2.531144679832083
mae: 0.660124438034861
mae: 3.6313558858959096
mae: 0.7510064829195856
mae:

mae: 3.176323140388836
mae: 3.7814058358939993
mae: 0.9719079809011146
mae: 1.3373099190743218
mae: 1.4920087385508949
mae: 0.9833411996983992
mae: 1.8548948564293795
mae: 0.3658133801604678
mae: 0.8064695246840823
mae: 0.6724054079330312
mae: 8.644817917673171
mae: 1.060713523763337
mae: 0.7708992045653249
mae: 1.0432494985598149
mae: 1.8730518477410332
mae: 1.5653963697466184
mae: 0.4959799959200863
mae: 1.3302548082714902
mae: 3.8073776492530222
mae: 0.9438217751052181
mae: 0.7056709182182561
mae: 1.3868325597027715
mae: 0.6419185313887349
mae: 2.1141831997877034
mae: 0.994310108768004
mae: 0.9894435478948311
mae: 1.536856599773819
mae: 0.9180844823396588
mae: 2.1029669030606426
mae: 0.47596500605156816
mae: 6.010297874617534
mae: 1.95652346675468
mae: 2.2022244739271604
mae: 0.504622521203452
mae: 1.3684391589178069
mae: 1.3203311032599367
mae: 1.547979785640627
mae: 2.144056773789427
mae: 1.008282937035463
mae: 1.9343346685409972
mae: 2.1664470118249004
mae: 1.259755744566435
mae:

In [13]:
with open('bayesian_weights.pkl', 'wb') as f:
    pickle.dump(Bayesian_models, f)