In [1]:
import pandas as pd
import numpy as np
import lightgbm as lgb
import pickle

### Read Data

In [2]:
file = open('../data/subsets.pkl', 'rb')
subsets = pickle.load(file)
file.close()

In [3]:
file = open('../model/best_parameters3.pkl','rb')
params_dict = pickle.load(file)
file.close()
print(params_dict.keys())

dict_keys(['2013', '2014', '2015', '2016', '2017', '2018', '2019'])


### Train model

In [4]:
non_features = ['gvkey','datacqtr', 'cusip','tic', 'announcement_date','analyst_date',
                'nq_eps_actual_direction','nq_eps_actual_change',
                'nq_eps_predicted_mean_direction','nq_eps_predicted_mean_change',
                'nq_eps_predicted_median_direction','nq_eps_predicted_median_change',
                'mkvaltq', 'gsector']

In [5]:
models_by_year = {}

In [6]:
for i, year in enumerate(sorted(subsets.keys())):

    # split into training and testing
    df_train = subsets[year]['train']
    df_test = subsets[year]['test']

    # create train and test features
    train_features = np.array(df_train.drop(non_features, axis = 1))
    test_features = np.array(df_test.drop(non_features, axis = 1))

    # create train and test labels
    train_labels = np.array(df_train['nq_eps_actual_direction'])
    test_labels = np.array(df_test['nq_eps_actual_direction'])

    # create feature names
    feature_names = list(df_test.drop(non_features, axis = 1).columns)

    # train model
    params = params_dict[year]

    train_data = lgb.Dataset(train_features, label=train_labels)
    model = lgb.train(params,train_data)

    # save model
    models_by_year[year] = model

    print(year)
    
with open('../model/models_by_year.pkl', 'wb') as file:
    pickle.dump(models_by_year, file)



2013




2014




2015




2016




2017




2018




2019
