In [1]:
import numpy as np
import pandas as pd
import itertools
from Regession_Model import RegressionModel

# Read in the data you would like to enter to the model
model_data = pd.read_parquet('model_data.gzip')
model_data.dropna(inplace=True)

# Split training data as the first 478 date_ids
train_all = model_data[model_data['date_id'] < 478]
train_x = train_all.loc[:, train_all.columns != 'target'] * 10000
train_x = train_x.to_numpy()
train_y = train_all.loc[:, train_all.columns == 'target']
train_y = train_y.to_numpy()

# Split testing data as the last 3 date_ids
test_all = model_data[model_data['date_id'] >= 478]
test_x = test_all.loc[:, test_all.columns != 'target'] * 10000
test_x = test_x.to_numpy()
test_y = test_all.loc[:, test_all.columns == 'target']
test_y = test_y.to_numpy()

# Initialise model with the train and test datasets
model = RegressionModel(train_x, train_y, test_x, test_y)

In [2]:
# Fit a simple linear regression model to the dataset and evaluate the fit
RegressionModel.fit_model(model, model_name='linear_regression')
pred_train_y = RegressionModel.predict_model(model, train_or_test='train')
train_eval = RegressionModel.eval_fit(model, pred_train_y, metric_name='all', train_or_test='train')
print('\nTrain Evaluation Metrics:')
print('\n'.join('{}:{}'.format(*k) for k in enumerate(train_eval)))
pred_test_y = RegressionModel.predict_model(model, train_or_test='test')
test_eval = RegressionModel.eval_fit(model, pred_test_y, metric_name='all', train_or_test='test')
print('\nTest Evaluation Metrics:')
print('\n'.join('{}:{}'.format(*k) for k in enumerate(test_eval)))


Train Evaluation Metrics:
0:('mae', 6.353305009117273)
1:('mse', 87.9360103493126)
2:('rmse', 9.377420239560164)
3:('r2', 0.008518855269190717)
4:('exp_var', 0.008518855269190828)
5:('corr_coef', 0.09229764498182137)

Test Evaluation Metrics:
0:('mae', 5.267011780305497)
1:('mse', 61.57272451038838)
2:('rmse', 7.846828946165985)
3:('r2', -0.00020142440000170403)
4:('exp_var', -0.00012270296961958138)
5:('corr_coef', 0.0431459732278345)


In [3]:
# Fit a ridge regression model to the dataset and evaluate the fit for a range of lambdas

lambdas = [0.1, 0.5, 1, 10]

for i, lambda_i in enumerate(lambdas):
    RegressionModel.fit_model(model, model_name='ridge', reg_alpha=lambda_i)
    pred_train_y = RegressionModel.predict_model(model, train_or_test='train')
    train_eval = RegressionModel.eval_fit(model, pred_train_y, metric_name='all', train_or_test='train')
    print('\nTrain Evaluation Metrics (Lambda = ' + str(lambda_i) + '):')
    print('\n'.join('{}:{}'.format(*k) for k in enumerate(train_eval)))
    pred_test_y = RegressionModel.predict_model(model, train_or_test='test')
    test_eval = RegressionModel.eval_fit(model, pred_test_y, metric_name='all', train_or_test='test')
    print('\nTest Evaluation Metrics (Lambda = ' + str(lambda_i) + '):')
    print('\n'.join('{}:{}'.format(*k) for k in enumerate(test_eval)))

  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T



Train Evaluation Metrics (Lambda = 0.1):
0:('mae', 6.353304985916613)
1:('mse', 87.93601034941346)
2:('rmse', 9.377420239565541)
3:('r2', 0.008518855268053516)
4:('exp_var', 0.008518855268053516)
5:('corr_coef', 0.09229764497575962)

Test Evaluation Metrics (Lambda = 0.1):
0:('mae', 5.267011716596985)
1:('mse', 61.57272427242554)
2:('rmse', 7.846828931002991)
3:('r2', -0.00020142053447869124)
4:('exp_var', -0.0001226991761436036)
5:('corr_coef', 0.043145927404555764)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T



Train Evaluation Metrics (Lambda = 0.5):
0:('mae', 6.353304893225199)
1:('mse', 87.93601035182911)
2:('rmse', 9.377420239694343)
3:('r2', 0.00851885524081697)
4:('exp_var', 0.00851885524081708)
5:('corr_coef', 0.09229764483036469)

Test Evaluation Metrics (Lambda = 0.5):
0:('mae', 5.267011461882067)
1:('mse', 61.572723322292276)
2:('rmse', 7.846828870460492)
3:('r2', -0.0002014051002965367)
4:('exp_var', -0.00012268403001214345)
5:('corr_coef', 0.04314574414429791)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T



Train Evaluation Metrics (Lambda = 1):
0:('mae', 6.353304777663116)
1:('mse', 87.9360103593689)
2:('rmse', 9.377420240096361)
3:('r2', 0.008518855155805749)
4:('exp_var', 0.008518855155805749)
5:('corr_coef', 0.09229764437656333)

Test Evaluation Metrics (Lambda = 1):
0:('mae', 5.2670111437556395)
1:('mse', 61.572722138479655)
2:('rmse', 7.846828795027942)
3:('r2', -0.00020138587017370924)
4:('exp_var', -0.00012266515964709512)
5:('corr_coef', 0.04314551514297299)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T



Train Evaluation Metrics (Lambda = 10):
0:('mae', 6.35330275028468)
1:('mse', 87.93601133783795)
2:('rmse', 9.377420292267908)
3:('r2', 0.008518844123537872)
4:('exp_var', 0.008518844123537872)
5:('corr_coef', 0.09229758547047585)

Test Evaluation Metrics (Lambda = 10):
0:('mae', 5.267005467774684)
1:('mse', 61.5727015499151)
2:('rmse', 7.846827483124317)
3:('r2', -0.00020105142482051974)
4:('exp_var', -0.00012233713249121259)
5:('corr_coef', 0.04314140728521088)


In [4]:
# Fit a lasso regression model to the dataset and evaluate the fit for a range of lambdas

lambdas = [0.1, 0.5, 1, 10]

for i, lambda_i in enumerate(lambdas):
    RegressionModel.fit_model(model, model_name='lasso', reg_alpha=lambda_i)
    pred_train_y = RegressionModel.predict_model(model, train_or_test='train')
    train_eval = RegressionModel.eval_fit(model, pred_train_y, metric_name='all', train_or_test='train')
    print('\nTrain Evaluation Metrics (Lambda = ' + str(lambda_i) + '):')
    print('\n'.join('{}:{}'.format(*k) for k in enumerate(train_eval)))
    pred_test_y = RegressionModel.predict_model(model, train_or_test='test')
    test_eval = RegressionModel.eval_fit(model, pred_test_y, metric_name='all', train_or_test='test')
    print('\nTest Evaluation Metrics (Lambda = ' + str(lambda_i) + '):')
    print('\n'.join('{}:{}'.format(*k) for k in enumerate(test_eval)))


Train Evaluation Metrics (Lambda = 0.1):
0:('mae', 6.353335263387279)
1:('mse', 87.95216778600808)
2:('rmse', 9.37828170754153)
3:('r2', 0.008336679687570459)
4:('exp_var', 0.00833667968757057)
5:('corr_coef', 0.09140942058260633)

Test Evaluation Metrics (Lambda = 0.1):
0:('mae', 5.264133663711215)
1:('mse', 61.52628173764188)
2:('rmse', 7.8438690540856095)
3:('r2', 0.0005530026263077525)
4:('exp_var', 0.0006278546452416212)
5:('corr_coef', 0.04496561921588842)

Train Evaluation Metrics (Lambda = 0.5):
0:('mae', 6.355589125453331)
1:('mse', 88.01016437561904)
2:('rmse', 9.381373267044598)
3:('r2', 0.007682766406429509)
4:('exp_var', 0.00768276640642962)
5:('corr_coef', 0.08972285693175068)

Test Evaluation Metrics (Lambda = 0.5):
0:('mae', 5.261669720452266)
1:('mse', 61.48139479868086)
2:('rmse', 7.841007256639982)
3:('r2', 0.001282156332640727)
4:('exp_var', 0.001354174261893215)
5:('corr_coef', 0.0432363444193667)

Train Evaluation Metrics (Lambda = 1):
0:('mae', 6.362811194455652

In [27]:
# Fit an elastic regression model to the dataset and evaluate the fit for a range of lambdas and ratios

lambdas = [0.1, 0.5, 1, 10]
l1_ratios = [0.1, 0.25, 0.5, 0.75, 0.9]

lambda_ratio_comb = list(itertools.product(lambdas, l1_ratios))

for i, comb_i in enumerate(lambda_ratio_comb):
    RegressionModel.fit_model(model, model_name='elastic_net', reg_alpha=comb_i[0], reg_l1_ratio=comb_i[1])
    pred_train_y = RegressionModel.predict_model(model, train_or_test='train')
    train_eval = RegressionModel.eval_fit(model, pred_train_y, metric_name='all', train_or_test='train')
    print('\nTrain Evaluation Metrics (alpha = ' + str(comb_i[0]) + ' and l1_ratio = ' + str(comb_i[1]) + '):')
    print('\n'.join('{}:{}'.format(*k) for k in enumerate(train_eval)))
    pred_test_y = RegressionModel.predict_model(model, train_or_test='test')
    test_eval = RegressionModel.eval_fit(model, pred_test_y, metric_name='all', train_or_test='test')
    print('\nTest Evaluation Metrics (alpha = ' + str(comb_i[0]) + ' and l1_ratio = ' + str(comb_i[1]) + '):')
    print('\n'.join('{}:{}'.format(*k) for k in enumerate(test_eval)))


Train Evaluation Metrics (alpha = 0.1 and l1_ratio = 0.1):
0:('mae', 6.353543723244869)
1:('mse', 87.94967148129872)
2:('rmse', 9.378148616933874)
3:('r2', 0.00836482559777263)
4:('exp_var', 0.00836482559777274)
5:('corr_coef', 0.09146872957207659)

Test Evaluation Metrics (alpha = 0.1 and l1_ratio = 0.1):
0:('mae', 5.264953542109881)
1:('mse', 61.53886828589315)
2:('rmse', 7.844671330648159)
3:('r2', 0.0003485438567907284)
4:('exp_var', 0.00042497820571685274)
5:('corr_coef', 0.045204640423494015)

Train Evaluation Metrics (alpha = 0.1 and l1_ratio = 0.25):
0:('mae', 6.353446212689722)
1:('mse', 87.94994941983451)
2:('rmse', 9.378163435333942)
3:('r2', 0.00836169183247526)
4:('exp_var', 0.00836169183247526)
5:('corr_coef', 0.09146195611286696)

Test Evaluation Metrics (alpha = 0.1 and l1_ratio = 0.25):
0:('mae', 5.264628705621637)
1:('mse', 61.53330901052376)
2:('rmse', 7.844316988146499)
3:('r2', 0.0004388499978185134)
4:('exp_var', 0.0005147225130119892)
5:('corr_coef', 0.045443386