In [1]:
import os
import time
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV, TimeSeriesSplit
from sklearn.inspection import permutation_importance
from sklearn.metrics import mean_squared_error, mean_absolute_error
import math
import csv
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn
import matplotlib.pyplot as plt
import seaborn as sns
import torch.nn.init as init
import random
import torch.optim as optim
import logging
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
import ray

## Initial setup

In [2]:
sns.set()

In [3]:
os.chdir('/zfs/projects/darc/wolee_edehaan_suzienoh-exploratory-ml')

In [4]:
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger('Neural Network')
logger.setLevel(level=logging.INFO)

In [5]:
# Declare global variables
global con_list
global dum_list
global embed_list
global deps
global header

In [6]:
# List of continuous variables
con_list = ['absacc', 'acc', 'aeavol', 'age', 'agr', 'baspread', 'beta', 
            'betasq', 'bm', 'bm_ia', 'cash', 'cashdebt', 'cashpr','cfp', 
            'cfp_ia', 'chatoia', 'chcsho', 'chempia', 'chfeps', 'chinv', 
            'chmom', 'chnanalyst', 'chpmia', 'chtx', 'cinvest', 'currat', 
            'depr', 'disp', 'dolvol', 'dy', 'ear', 'egr', 'ep', 'fgr5yr', 
            'gma', 'grcapx', 'grltnoa', 'herf', 'hire', 'idiovol', 'ill', 
            'indmom', 'invest', 'lev', 'lgr', 'maxret', 'mom12m', 'mom1m', 
            'mom36m', 'mom6m', 'ms', 'mve', 'mve_ia', 'nanalyst', 'nincr', 
            'operprof', 'orgcap', 'pchcapx_ia', 'pchcurrat', 'pchdepr', 
            'pchgm_pchsale', 'pchquick', 'pchsale_pchinvt', 'pchsale_pchrect', 
            'pchsale_pchxsga', 'pchsaleinv', 'pctacc', 'pricedelay', 'ps', 
            'quick', 'rd_mve', 'rd_sale', 'realestate', 'retvol', 'roaq', 
            'roavol', 'roeq', 'roic', 'rsup', 'salecash', 'saleinv', 
            'salerec', 'secured', 'sfe', 'sgr', 'sp', 'std_dolvol', 
            'std_turn', 'stdacc', 'stdcf', 'sue', 'tang', 'tb', 'turn', 
            'zerotrade']


# List of dummy variables
dum_list = ['convind', 'divi', 'divo', 'ipo', 'rd', 'securedind', 'sin'] # Categorical variable binary

# List of embedding variables
embed_list = ['permno']

# List of dependent variable
deps = con_list + dum_list + embed_list + ['date']


# Headers
header = ['permno','pyear']

## Load and preprocess data

In [7]:
def load_and_preprocess_data(file_path, period):
    
    """
    Loads and preprocesses the input data.

    Args:
    file_path (str): The path to the CSV file to be loaded.

    Returns:
    DataFrame: Preprocessed pandas DataFrame.
    """
    
    # Load data
    df = pd.read_csv(file_path)
    df.columns = [e.lower() for e in df.columns]
    
    df['date'] = df['date'].copy()
    df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y')
    # df['date'] = df['date'].dt.strftime('%m-%d-%Y')

    # Extract year
    df['pyear'] = df['date'].dt.year
    # Remove months if quarterly, otherwise, monthly, keep all months
    if period == 'quarter':
        df = df[df['date'].dt.month.isin([1,4,7,10])]

    # df.sort_values(['permno','date'], inplace=True)
    df.sort_values(['date', 'permno'], inplace=True)
    df['date'] = df['date'].dt.strftime('%Y-%m')
    del df['fpedats']
    
    print(df[['date', 'permno']].head())
    print('-' * 50)
        
    return df

In [8]:
class CustomWinsorizer(BaseEstimator, TransformerMixin):
    
    """
    A custom transformer for Winsorizing numeric data.

    Attributes:
    lower_percentile (int): The lower percentile for clipping data.
    upper_percentile (int): The upper percentile for clipping data.
    """
    
    def __init__(self, lower_percentile, upper_percentile):
        self.lower_percentile = lower_percentile
        self.upper_percentile = upper_percentile

    def fit(self, X, y=None):
        self.lower_bound_ = np.percentile(X, self.lower_percentile)
        self.upper_bound_ = np.percentile(X, self.upper_percentile)
        return self

    def transform(self, X):
        X_clipped = np.clip(X, self.lower_bound_, self.upper_bound_)
        
        return X_clipped

In [9]:
class timePeriodMeanTransformer(BaseEstimator, TransformerMixin):
    
    """
    A custom transformer for imputing missing data based on time period means.

    Attributes:
    date_column (str): The column name representing dates.
    numeric_columns (list): List of numeric column names for which means are calculated.
    period (str): The time period for grouping data, either 'quarter' or 'month'.
    """
    
    def __init__(self, date_column, numeric_columns, period='quarter'):
        self.date_column = date_column
        self.numeric_columns = numeric_columns
        self.period = period

    def fit(self, X, y=None):
        X[self.date_column] = pd.to_datetime(X[self.date_column])
        if self.period == 'quarter':
            X['Period'] = X[self.date_column].dt.quarter
        elif self.period == 'month':
            X['Period'] = X[self.date_column].dt.month
        else:
            raise ValueError("period must be 'quarter' or 'month'")
       
       # Calculate and store the means of each numeric column for each time period
        self.period_means_ = X.groupby('Period')[self.numeric_columns].mean()
        return self

    def transform(self, X):
        X[self.date_column] = pd.to_datetime(X[self.date_column])
        if self.period == 'quarter':
            X['Period'] = X[self.date_column].dt.quarter
        elif self.period == 'month':
            X['Period'] = X[self.date_column].dt.month
        
        for col in self.numeric_columns:
            X[col] = X.apply(lambda row: row[col] if not pd.isna(row[col]) 
                             else self.period_means_.loc[row['Period'], col], axis=1)
        # return X.drop(['Period'], axis=1)
        return X

In [10]:
def build_pipeline(con_list, dum_list, lower_percentile, upper_percentile, period):
    
    """
    Builds a preprocessing pipeline for both numeric and categorical data.

    Args:
    con_list (list): List of continuous variable names.
    dum_list (list): List of dummy (categorical) variable names.
    lower_percentile (float): Lower percentile for winsorization.
    upper_percentile (float): Upper percentile for winsorization.
    period (string): Period for getting mean values (month vs quarter)

    Returns:
    Pipeline: A composed preprocessing pipeline.
    """
    
    numeric_pipeline = Pipeline([
        # ('fill_na', SimpleImputer(missing_values=np.nan, strategy='constant', fill_value=0)),
        ('winsorizer', CustomWinsorizer(lower_percentile=lower_percentile, upper_percentile=upper_percentile)),
        ('scaler', StandardScaler()),
        ('impute_con', SimpleImputer(missing_values=np.nan, strategy='constant', fill_value=0))
    ])

    categorical_pipeline = Pipeline([
        ('impute_cat', SimpleImputer(missing_values=np.nan, strategy='constant', fill_value=0)),
    ])

    preprocessing = ColumnTransformer(
        transformers=[
            ('num', numeric_pipeline, con_list),
            ('cat', categorical_pipeline, dum_list)
        ], remainder='passthrough')

    pipeline = Pipeline([
        ('Time_period_mean_imputation', timePeriodMeanTransformer('date', con_list, period)),
        ('Preprocessing', preprocessing),
    ])
    
    return pipeline

In [11]:
def build_pipeline(con_list, dum_list, embed_list, lower_percentile, upper_percentile, period):
    
    """
    Builds a preprocessing pipeline for both numeric and categorical data.

    Args:
    con_list (list): List of continuous variable names.
    dum_list (list): List of dummy (categorical) variable names.
    lower_percentile (float): Lower percentile for winsorization.
    upper_percentile (float): Upper percentile for winsorization.
    period (string): Period for getting mean values (month vs quarter)

    Returns:
    Pipeline: A composed preprocessing pipeline.
    """
    
    numeric_pipeline = Pipeline([
        # ('fill_na', SimpleImputer(missing_values=np.nan, strategy='constant', fill_value=0)),
        ('winsorizer', CustomWinsorizer(lower_percentile=lower_percentile, upper_percentile=upper_percentile)),
        ('scaler', StandardScaler()),
        ('impute_con', SimpleImputer(missing_values=np.nan, strategy='constant', fill_value=0))
    ])

    categorical_pipeline = Pipeline([
        ('impute_cat', SimpleImputer(missing_values=np.nan, strategy='constant', fill_value=0)),
    ])
    
    embed_pipeline = Pipeline([
        ('impute_embed', SimpleImputer(missing_values=np.nan, strategy='constant', fill_value=0)),
    ])

    preprocessing = ColumnTransformer(
        transformers=[
            ('num', numeric_pipeline, con_list),
            ('cat', categorical_pipeline, dum_list),
            ('embed', embed_pipeline, embed_list),
        ], remainder='passthrough')

    pipeline = Pipeline([
        ('Time_period_mean_imputation', timePeriodMeanTransformer('date', con_list, period)),
        ('Preprocessing', preprocessing),
    ])
    
    return pipeline

In [12]:
infile_path = 'Info Processing and Mutual Funds/masterv14.csv'
period = 'month'

In [13]:
if period == 'quarter':
    target = 'retq'
elif period == 'month':
    target = 'ret'
else:
    raise ValueError("period must be 'quarter' or 'month'")

In [14]:
# Load and preprocess data
print('\nLoading and preprocessing data...\n')
df = load_and_preprocess_data(infile_path, period)


Loading and preprocessing data...

      date  permno
0  1980-01   10006
1  1980-01   10057
2  1980-01   10103
3  1980-01   10137
4  1980-01   10145
--------------------------------------------------


In [15]:
# Drop null values in the target column and get years 2020 or prior
df1 = df.dropna(subset=[target])
df1 = df1[df1['pyear'] <= 2020]
df1.reset_index(drop=True, inplace=True)

In [16]:
df1

Unnamed: 0,permno,gvkey,adatadate,fyear,sic2,spi,mve_f,bm,ep,cashpr,...,std_dolvol,std_turn,ill,zerotrade,beta,betasq,rsq1,pricedelay,idiovol,pyear
0,10006,1010,12/31/1978,1978,37,0.0000,269.308500,1.180962,0.153022,-32.218678,...,0.881844,0.635898,2.565667e-08,1.115306e-07,1.060420,1.124491,0.343408,0.029859,0.025576,1980
1,10057,1098,09/30/1978,1978,36,0.0000,97.372000,0.956692,0.135131,-4.408581,...,1.368363,2.546787,2.719812e-07,6.199128e-08,1.526013,2.328716,0.307905,0.092667,0.037473,1980
2,10103,1012,10/31/1978,1978,33,,1.697500,3.362003,0.338144,-17.143817,...,,,,,1.759493,3.095816,0.096753,0.221851,0.087020,1980
3,10137,1279,12/31/1978,1978,49,,537.524500,1.330341,0.153238,-87.819837,...,0.553246,0.740017,1.765620e-08,9.726790e-08,0.492885,0.242936,0.189693,0.125777,0.017540,1980
4,10145,1300,12/31/1978,1978,99,-0.0031,805.633282,1.579284,0.149248,-22.050470,...,0.427617,0.657563,2.898901e-09,6.190654e-08,1.139163,1.297691,0.279437,0.024228,0.031201,1980
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2165510,93422,154357,12/31/2019,2019,13,-0.0090,1069.650000,2.487356,-0.090964,-14.117559,...,0.797001,12.233361,7.505129e-09,5.571619e-09,2.691027,7.241625,0.265207,0.257939,0.132692,2020
2165511,93423,10567,12/31/2019,2019,79,0.0004,3817.839740,-0.187572,0.046902,19.464647,...,0.519033,17.649093,4.462048e-10,3.803709e-09,1.921529,3.692274,0.485215,0.068369,0.061119,2020
2165512,93426,185138,12/31/2019,2019,36,-0.0108,459.782000,0.524944,0.048258,1.095352,...,0.473367,2.144264,2.296462e-08,3.236729e-08,1.302016,1.695247,0.472220,0.037482,0.043174,2020
2165513,93434,184259,06/30/2019,2019,1,-0.1349,87.853920,1.138777,-0.105914,-13.505851,...,0.935967,0.897075,3.435272e-07,1.037670e-07,0.389842,0.151977,0.021429,-0.694649,0.073887,2020


In [17]:
permno_list = list(df1['permno'].sample(1000, random_state=42))

In [18]:
sample = df1.loc[df1['permno'].isin(permno_list)]

In [19]:
sample[['permno', 'date']]

Unnamed: 0,permno,date
31,10698,1980-01
41,10866,1980-01
46,10912,1980-01
60,11260,1980-01
62,11308,1980-01
...,...,...
2165462,93073,2020-12
2165466,93089,2020-12
2165472,93130,2020-12
2165484,93246,2020-12


## Transform data

In [20]:
print('Training in progress...\n')
# Build a training pipeline
pipeline = build_pipeline(con_list, dum_list, embed_list, 5, 95, period)

Training in progress...



In [21]:
pipeline

In [22]:
# Set year range of the sample
years = list(sample['pyear'].drop_duplicates().sort_values())

In [23]:
years

[1980,
 1981,
 1982,
 1983,
 1984,
 1985,
 1986,
 1987,
 1988,
 1989,
 1990,
 1991,
 1992,
 1993,
 1994,
 1995,
 1996,
 1997,
 1998,
 1999,
 2000,
 2001,
 2002,
 2003,
 2004,
 2005,
 2006,
 2007,
 2008,
 2009,
 2010,
 2011,
 2012,
 2013,
 2014,
 2015,
 2016,
 2017,
 2018,
 2019,
 2020]

In [24]:
year = 1986

In [25]:
train_data = sample.loc[(sample['pyear']<=year)]
test_data = sample.loc[(sample['pyear']==year+1)]

In [26]:
# Training and testing data
training_years = sorted(train_data.pyear.unique())
print(f'Training Years: {training_years}\n')
print(f'Testing Year: {test_data.pyear.unique()}')

Training Years: [1980, 1981, 1982, 1983, 1984, 1985, 1986]

Testing Year: [1987]


In [27]:
train_data

Unnamed: 0,permno,gvkey,adatadate,fyear,sic2,spi,mve_f,bm,ep,cashpr,...,std_dolvol,std_turn,ill,zerotrade,beta,betasq,rsq1,pricedelay,idiovol,pyear
31,10698,2040,12/31/1978,1978,12,0.0009,68.839375,1.269332,0.116634,-13.067487,...,0.753815,2.894962,4.539289e-08,2.609996e-08,0.589829,0.347898,0.063211,0.037030,0.038551,1980
41,10866,2436,10/31/1978,1978,56,-0.0038,180.971833,1.357946,0.184896,-6.925387,...,0.759439,0.317240,1.205612e-07,2.628598e-07,0.609418,0.371390,0.122947,-0.228825,0.028520,1980
46,10912,1141,03/31/1979,1978,73,0.0000,1.092000,2.217033,0.293956,-25.645299,...,,,,,1.274608,1.624625,0.041796,-0.013952,0.100517,1980
60,11260,3022,12/31/1978,1978,37,0.0000,548.843068,3.730699,-0.372784,-10.001630,...,0.753830,2.611047,1.942549e-08,3.251945e-08,0.972871,0.946478,0.100623,0.186219,0.049505,1980
62,11308,3144,12/31/1978,1978,20,0.0000,5419.612673,0.320984,0.069136,8.871277,...,0.579809,0.418106,3.221865e-09,1.362653e-07,0.744968,0.554978,0.261126,0.236456,0.020792,1980
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
341710,91556,9248,01/31/1986,1985,56,,334.672000,0.377522,0.021080,43.089022,...,1.038543,21.132059,4.597927e-08,8.618303e-09,1.151457,1.325853,0.039875,-0.401413,0.068092,1986
341746,92401,10392,12/31/1985,1985,62,,5.498999,0.186761,-0.144935,3.140651,...,0.448634,1.470119,0.000000e+00,3.148666e-08,0.785494,0.617001,0.008787,1.368951,0.071624,1986
341756,92575,10728,12/31/1985,1985,27,,49.313250,0.535880,0.050494,6.586239,...,2.181790,0.154151,0.000000e+00,1.657895e+01,-0.242451,0.058782,-0.005332,0.886406,0.034116,1986
341758,92591,10754,12/31/1985,1985,35,,12.069216,0.099012,0.013671,11.071744,...,0.637323,1.596371,4.564405e-07,4.458273e-08,1.538611,2.367324,0.035727,0.251801,0.087214,1986


In [28]:
test_data

Unnamed: 0,permno,gvkey,adatadate,fyear,sic2,spi,mve_f,bm,ep,cashpr,...,std_dolvol,std_turn,ill,zerotrade,beta,betasq,rsq1,pricedelay,idiovol,pyear
341786,10027,12305,03/31/1986,1985,73,,25.500000,0.271882,0.041294,4.492978,...,1.564011,8.993560,6.312183e-06,9.545455e-01,,,,,,1987
341791,10048,12102,05/31/1986,1985,48,,272.249528,0.131820,0.003023,18.170654,...,1.286789,2.281508,1.393634e-06,4.541301e-08,,,,,,1987
341795,10070,12520,03/31/1986,1985,35,,29.527500,0.099704,-0.089442,6.329292,...,1.060865,5.913670,6.913003e-08,1.532746e-08,,,,,,1987
341841,10866,2436,10/31/1985,1985,56,0.0000,573.420000,0.657363,0.085053,-0.080908,...,0.681750,1.213941,5.256297e-09,4.533236e-08,0.632797,0.400433,0.065110,0.168126,0.032726,1987
341846,10912,1141,12/31/1985,1985,73,0.0000,5.711750,0.534337,0.092091,-1.132083,...,0.787355,1.160510,1.166170e-05,1.909091e+00,0.896344,0.803433,0.019788,0.309812,0.078399,1987
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395600,91556,9248,01/31/1987,1986,56,-0.1543,135.124500,0.628642,-0.306747,-13.608253,...,0.776736,1.884524,9.858786e-08,3.520751e-08,1.734325,3.007883,0.187160,0.008384,0.079553,1987
395643,92401,10392,12/31/1986,1986,62,0.0000,6.203999,0.146518,-0.084623,11.499997,...,1.064012,1.973528,1.668580e-05,5.284194e-08,0.501638,0.251641,0.011662,0.569690,0.077391,1987
395652,92575,10728,12/31/1986,1986,27,-0.0301,39.026625,0.581321,0.024470,0.544062,...,2.221737,0.404125,0.000000e+00,1.785000e+01,0.180611,0.032620,0.001167,1.039796,0.040927,1987
395654,92591,10754,12/31/1986,1986,35,0.0000,26.523000,0.056215,0.009727,45.285132,...,0.728634,0.847776,6.489236e-07,8.257556e-08,1.456934,2.122657,0.103737,0.139770,0.089248,1987


In [29]:
def transform_data(train_data, test_data, features, target, pipeline):
    # Train data
    x_train = train_data.loc[:, features]
    y_train = train_data.loc[:, target]
    
    # Fit the pipeline to the train data
    pipeline.fit(x_train)
    x_train_tf = pipeline.transform(x_train)
    x_train_tf = x_train_tf[:, :-2]
    
    # Test data
    x_test = test_data.loc[:, features]
    y_test = test_data.loc[:, target]
    
    # Fit the pipeline to the test data
    x_test_tf = pipeline.transform(x_test)
    x_test_tf = x_test_tf[:, :-2]
    
    # Transform data into numpy array as type float32
    x_train_tf = x_train_tf.astype(np.float32)
    y_train_tf = y_train.to_numpy(np.float32)
    x_test_tf = x_test_tf.astype(np.float32)
    y_test_tf = y_test.to_numpy(np.float32)
    
    # # Transform them to tensor floats
    x_train_tf = torch.tensor(x_train_tf).float()
    y_train_tf = torch.tensor(y_train_tf).float()
    x_test_tf = torch.tensor(x_test_tf).float()
    y_test_tf = torch.tensor(y_test_tf).float()

    print(f'x_train shape: {x_train_tf.shape}')
    print(f'y_train shape: {y_train_tf.shape}\n')
    print(f'x_test shape: {x_test_tf.shape}')
    print(f'y_test shape: {y_test_tf.shape}\n')
    
    return x_train_tf, y_train_tf, x_test_tf, y_test_tf
    

In [30]:
x_train, y_train, x_test, y_test = transform_data(train_data, test_data, deps, target, pipeline)

  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


x_train shape: torch.Size([28988, 103])
y_train shape: torch.Size([28988])

x_test shape: torch.Size([4881, 103])
y_test shape: torch.Size([4881])



In [31]:
continuous_len = len(con_list) + len(dum_list)

In [32]:
continuous_len

102

In [33]:
x_train

tensor([[ 6.7609e-01,  7.9748e-01, -4.0289e-01,  ...,  0.0000e+00,
          0.0000e+00,  1.0698e+04],
        [ 8.1625e-02,  3.1391e-01, -6.1745e-01,  ...,  0.0000e+00,
          0.0000e+00,  1.0866e+04],
        [ 2.0111e-01,  4.1110e-01,  6.4145e-04,  ...,  0.0000e+00,
          0.0000e+00,  1.0912e+04],
        ...,
        [ 9.1361e-03, -2.4392e-02, -1.0743e+00,  ...,  1.0000e+00,
          0.0000e+00,  9.2575e+04],
        [ 9.1361e-03, -2.4392e-02, -8.7273e-01,  ...,  0.0000e+00,
          0.0000e+00,  9.2591e+04],
        [ 9.1361e-03, -2.4392e-02,  7.2840e-01,  ...,  1.0000e+00,
          0.0000e+00,  9.3316e+04]])

In [34]:
x_train[:, :continuous_len].shape

torch.Size([28988, 102])

In [35]:
x_train[:, continuous_len:].shape

torch.Size([28988, 1])

In [36]:
# Since permno is already integers, no need to convert str to int for embedding step
df1['permno'].describe()

count    2.165515e+06
mean     5.777522e+04
std      2.776163e+04
min      1.000100e+04
25%      3.141400e+04
50%      6.552500e+04
75%      8.172100e+04
max      9.343600e+04
Name: permno, dtype: float64

## Finalize data for NN

In [37]:
class XandYDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [38]:
class XandYDataset(Dataset):
    def __init__(self, X_continuous_vars, X_embedding_vars, y):
        self.X_continuous_vars = X_continuous_vars
        if isinstance(X_embedding_vars, torch.Tensor):
            self.X_embedding_vars = X_embedding_vars.numpy()
        else:
            self.X_embedding_vars = X_embedding_vars
        self.y = y

        # Ensure the categorical variables are strings
        self.X_embedding_vars = self.X_embedding_vars.astype(str)
        
        # Create a mapping dictionary for each unique categorical variable to integer
        self.embedding_var_mappings = self._create_mappings(self.X_embedding_vars)

    def _create_mappings(self, X_embedding_vars):
        mappings = {}
        for i in range(X_embedding_vars.shape[1]):
            unique_values = set(X_embedding_vars[:, i])
            mappings[i] = {val: idx for idx, val in enumerate(unique_values)}
        return mappings

    def __len__(self):
        return len(self.X_continuous_vars)

    def __getitem__(self, idx):
        X_continuous = self.X_continuous_vars[idx]
        X_embedding = self.X_embedding_vars[idx]

        # Convert each categorical variable to its corresponding integer index
        X_embedding_int = torch.tensor([self.embedding_var_mappings[i][val] for i, val in enumerate(X_embedding)], dtype=torch.long)

        return X_continuous, X_embedding_int, self.y[idx]

In [39]:
torch.manual_seed(42)
batch_size = 32

In [40]:
train_dataset = XandYDataset(x_train[:, :continuous_len], x_train[:, continuous_len:], y_train)
test_dataset = XandYDataset(x_test[:, :continuous_len], x_test[:, continuous_len:], y_test)

In [41]:
x_train[:, :continuous_len].shape

torch.Size([28988, 102])

In [42]:
x_train[:, continuous_len:].shape

torch.Size([28988, 1])

In [43]:
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [44]:
next(iter(train_loader))

[tensor([[ 0.8267,  0.9200, -0.9892,  ...,  0.0000,  1.0000,  0.0000],
         [-0.8199, -0.4194, -0.9157,  ...,  0.0000,  0.0000,  0.0000],
         [-1.0121, -1.0184, -1.0640,  ...,  0.0000,  1.0000,  0.0000],
         ...,
         [ 0.4125,  0.5831,  4.2784,  ...,  0.0000,  0.0000,  0.0000],
         [-0.0065,  0.0153, -0.0284,  ...,  0.0000,  0.0000,  0.0000],
         [-0.9092, -0.4921, -0.8940,  ...,  0.0000,  0.0000,  0.0000]]),
 tensor([[280],
         [405],
         [355],
         [181],
         [231],
         [399],
         [446],
         [331],
         [325],
         [387],
         [422],
         [134],
         [442],
         [ 39],
         [ 38],
         [ 10],
         [ 78],
         [ 18],
         [169],
         [ 24],
         [260],
         [181],
         [193],
         [324],
         [259],
         [355],
         [ 49],
         [409],
         [199],
         [332],
         [244],
         [409]]),
 tensor([-0.0605,  0.0249,  0.1867,  0.3302,

In [45]:
X_continuous, X_embed, y = next(iter(train_loader))

In [46]:
print(X_continuous.shape, X_embed.shape, y.shape)

torch.Size([32, 102]) torch.Size([32, 1]) torch.Size([32])


In [47]:
print(f'train_dataset: {len(train_dataset)}')
print(f'test_dataset: {len(test_dataset)}')

train_dataset: 28988
test_dataset: 4881


In [48]:
train_dataset[12]

(tensor([-0.5848, -0.2282, -0.8616, -0.8411, -0.2423, -0.4317, -0.2812, -0.4452,
         -0.0032,  0.2094, -0.6951, -0.0164,  0.0296,  0.0331, -0.0057,  0.0416,
         -0.4384, -0.3467,  0.0000, -0.3798, -0.1366,  0.0000,  0.0797,  2.6947,
         -0.0051, -0.6563, -0.5054,  0.0000,  1.4770,  0.3292, -0.1324, -0.0749,
          0.3279,  0.0000,  0.0792, -0.1692,  0.5088, -0.2947, -0.2402, -0.9355,
         -0.3051,  2.7384,  0.0134, -0.1643, -0.2729,  0.0301,  1.1708,  0.0123,
         -0.5665,  0.7236,  0.4563,  1.9516,  1.6305,  0.0000, -0.8442,  0.3035,
         -0.5233, -0.1221, -0.0868, -0.1908,  0.0592, -0.0233,  0.2755,  0.2361,
         -0.4321,  0.0602, -0.1774,  0.2683,  0.5245, -0.5166, -0.9901, -0.2778,
         -0.0185, -0.4600,  0.4201, -0.8453,  0.5514,  0.5773,  0.7123, -0.2923,
         -0.1762, -0.0988, -0.0078,  0.0000, -0.2521, -0.1124, -0.9889, -0.6014,
         -0.4406, -0.4670,  0.3297, -0.2738,  2.2471, -0.2442, -0.4818,  0.0000,
          0.0000,  0.0000,  

## Modeling data with NN

In [49]:
class FlexibleNeuralNetwork(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers, dropout_rate=0.5):
        super(FlexibleNeuralNetwork, self).__init__()
        self.layers = nn.ModuleList()
        
        # Input layer
        self.layers.append(nn.Linear(input_dim, hidden_dim))
        
        # Hidden layers
        for _ in range(num_layers - 1):
            self.layers.append(nn.Linear(hidden_dim, hidden_dim))
            self.layers.append(nn.Dropout(dropout_rate))
        
        # Output layer
        self.layers.append(nn.Linear(hidden_dim, output_dim))
        
        self.relu = nn.ReLU()
        
        # Apply Xavier initialization to the layers
        self._initialize_weights()

    def _initialize_weights(self):
        for layer in self.layers:
            if isinstance(layer, nn.Linear):
                nn.init.xavier_uniform_(layer.weight)
                if layer.bias is not None:
                    nn.init.zeros_(layer.bias)

    def forward(self, x):
        for layer in self.layers:
            if isinstance(layer, nn.Linear):
                x = self.relu(layer(x))
            else:
                x = layer(x)  # This applies dropout
        return x

In [50]:
class FlexibleNeuralNetwork(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers, num_embeddings, embedding_dim, dropout_rate=0.5):
        super(FlexibleNeuralNetwork, self).__init__()
        self.embedding = nn.Embedding(num_embeddings, embedding_dim)
        
        self.layers = nn.ModuleList()
        
        # Input layer (adjust input_dim to account for embedding_dim)
        self.layers.append(nn.Linear(input_dim + embedding_dim, hidden_dim))
        
        # Hidden layers
        for _ in range(num_layers - 1):
            self.layers.append(nn.Linear(hidden_dim, hidden_dim))
            self.layers.append(nn.Dropout(dropout_rate))
        
        # Output layer
        self.layers.append(nn.Linear(hidden_dim, output_dim))
        
        # Activation function, note that nn.ReLU() is not appropriate because of outputing non-negative number only
        self.activation = nn.LeakyReLU(negative_slope=4) # nn.Tanh()
        
        # Apply Xavier initialization to the layers
        self._initialize_weights()

    def _initialize_weights(self):
        for layer in self.layers:
            if isinstance(layer, nn.Linear):
                nn.init.xavier_uniform_(layer.weight)
                if layer.bias is not None:
                    nn.init.zeros_(layer.bias)

    def forward(self, x_continuous, x_categorical):
        embedded = self.embedding(x_categorical)
        embedded = embedded.view(embedded.size(0), -1)  # Flatten the embedding
        x = torch.cat((x_continuous, embedded), dim=1)
        
        for layer in self.layers:
            if isinstance(layer, nn.Linear):
                x = self.activation(layer(x))
            else:
                x = layer(x)  # This applies dropout
        return x

In [51]:
def train(model, train_loader, loss_function, optimizer, num_epochs=10):
    for epoch in range(num_epochs):
        running_loss = 0.0
        for x_continuous, x_embedding_vars, targets in train_loader:
            x_continuous, x_embedding_vars, targets = \
                x_continuous.to(device), x_embedding_vars.to(device), targets.to(device)
            
            optimizer.zero_grad()
            # outputs is squeezed from shape [batch_size, 1] to [batch_size]
            outputs = model(x_continuous, x_embedding_vars).squeeze()
            loss = loss_function(outputs, targets)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / len(train_loader)}')

In [52]:
def evaluate(model, test_loader, loss_function):
    model.eval()
    total_loss = 0.0
    with torch.no_grad():
        for x_continuous, x_embedding_vars, targets in test_loader:
            x_continuous, x_embedding_vars, targets = \
                x_continuous.to(device), x_embedding_vars.to(device), targets.to(device)
            # outputs is squeezed from shape [batch_size, 1] to [batch_size]
            outputs = model(x_continuous, x_embedding_vars).squeeze()
            loss = loss_function(outputs, targets)
            
            total_loss += loss.item()
    
    average_loss = total_loss / len(test_loader)
    print(f'Average Loss: {average_loss}')
    return average_loss

In [53]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU.
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

# Set the seed for reproducibility
set_seed(42)

In [54]:
def predict(model, data_loader):
    model.eval()
    predictions = []
    with torch.no_grad():
        for batch in data_loader:
            # If the loader provides three values, unpack and ignore the third (targets)
            if len(batch) == 3:
                x_continuous, x_embedding_vars, _ = batch
            else:
                x_continuous, x_embedding_vars = batch

            x_continuous, x_embedding_vars = x_continuous.to(device), x_embedding_vars.to(device)
            # outputs is squeezed from shape [batch_size, 1] to [batch_size]
            outputs = model(x_continuous, x_embedding_vars).squeeze()
            predictions.append(outputs.cpu().numpy())
    
    return np.concatenate(predictions)

In [55]:
# Params
input_dim = continuous_len
hidden_dim = 50
output_dim = 1 # 1 for regression
num_layers = 2  # Number of linear layers
num_embeddings = sample['permno'].nunique()
embedding_dim = 5
dropout_rate = 0.3
lr = 5e-4
weight_decay = 1e-5
num_epochs = 10

In [56]:
# Create the model
model = FlexibleNeuralNetwork(input_dim, hidden_dim, output_dim, num_layers, num_embeddings, embedding_dim, dropout_rate)

# Define the loss function and optimizer
loss_function = nn.L1Loss() # nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)  # weight_decay for L2 regularization



In [57]:
# Move model to the appropriate device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

FlexibleNeuralNetwork(
  (embedding): Embedding(953, 5)
  (layers): ModuleList(
    (0): Linear(in_features=107, out_features=50, bias=True)
    (1): Linear(in_features=50, out_features=50, bias=True)
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=50, out_features=1, bias=True)
  )
  (activation): LeakyReLU(negative_slope=4)
)

In [58]:
# Train and evaluate the model
train(model, train_loader, loss_function, optimizer, num_epochs=num_epochs)

Epoch 1/10, Loss: 5.525010971570383
Epoch 2/10, Loss: 1.1195433008552387
Epoch 3/10, Loss: 0.15644511839921768
Epoch 4/10, Loss: 0.10267533843814669
Epoch 5/10, Loss: 0.10199528280440019
Epoch 6/10, Loss: 0.10135087264376892
Epoch 7/10, Loss: 0.10103924645246654
Epoch 8/10, Loss: 0.09985176408067181
Epoch 9/10, Loss: 0.09953012656642507
Epoch 10/10, Loss: 0.09929188685882302


In [59]:
evaluate(model, test_loader, loss_function)

Average Loss: 0.11947521861959128


0.11947521861959128

In [60]:
train_data_final = train_data.copy()
test_data_final = test_data.copy()

In [61]:
train_data_final['pred'] = predict(model, train_loader)

In [62]:
test_data_final['pred'] = predict(model, test_loader)

In [63]:
mean_absolute_error(train_data_final[target], train_data_final['pred'])

0.09666238794759509

In [64]:
mean_absolute_error(test_data_final[target], test_data_final['pred'])

0.1193948435462886

In [65]:
train_data_final[[target, 'pred']].describe()

Unnamed: 0,ret,pred
count,28988.0,28988.0
mean,0.016174,0.015639
std,0.139005,0.022614
min,-0.733333,-0.372393
25%,-0.058252,0.010208
50%,0.001077,0.018224
75%,0.075862,0.025561
max,3.954545,0.256739


In [66]:
test_data_final[[target, 'pred']].describe()

Unnamed: 0,ret,pred
count,4881.0,4881.0
mean,0.004878,0.008822
std,0.168603,0.048172
min,-0.622222,-0.434961
25%,-0.069799,0.008567
50%,0.0,0.018721
75%,0.081967,0.026787
max,1.307692,0.233608


In [67]:
test_data_final[[target, 'pred']]

Unnamed: 0,ret,pred
341786,0.090909,0.009466
341791,0.066667,0.025469
341795,0.430556,0.042102
341841,0.072464,0.024669
341846,0.000000,0.021313
...,...,...
395600,0.031250,0.004891
395643,0.000000,0.018647
395652,0.083333,0.012125
395654,-0.192308,0.014399


# Hyperparameter tuning

In [68]:
# Initialize Ray
ray.init(num_cpus=10, num_gpus=0)

2024-06-20 10:31:56,322	INFO worker.py:1753 -- Started a local Ray instance.


0,1
Python version:,3.10.12
Ray version:,2.24.0


In [69]:
def evaluate(model, test_loader, loss_function, device):
    model.eval()
    total_loss = 0.0
    with torch.no_grad():
        for x_continuous, x_embedding_vars, targets in test_loader:
            x_continuous, x_embedding_vars, targets = x_continuous.to(device), x_embedding_vars.to(device), targets.to(device)
            outputs = model(x_continuous, x_embedding_vars).squeeze()
            loss = loss_function(outputs, targets)
            total_loss += loss.item()
    
    average_loss = total_loss / len(test_loader)
    return average_loss

In [70]:
def train_fnn(config, train_loader, test_loader, ray_tuning=True):
    device = torch.device("cuda" if config["gpu"] > 0 else "cpu")
    input_dim = config["input_dim"]
    hidden_dim = config["hidden_dim"]
    output_dim = 1
    num_layers = config["num_layers"]
    num_embeddings = config["num_embeddings"]
    embedding_dim = config["embedding_dim"]
    dropout_rate = config["dropout_rate"]
    lr = config["lr"]
    weight_decay = config["weight_decay"]
    num_epochs = config["num_epochs"]

    model = FlexibleNeuralNetwork(input_dim, hidden_dim, output_dim, num_layers, num_embeddings, embedding_dim, dropout_rate)
    model.to(device)
    
    loss_function = nn.L1Loss()
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    try:
        for epoch in range(num_epochs):
            model.train()
            running_loss = 0.0
            for x_continuous, x_embedding_vars, targets in train_loader:
                x_continuous, x_embedding_vars, targets = x_continuous.to(device), x_embedding_vars.to(device), targets.to(device)
                
                optimizer.zero_grad()
                outputs = model(x_continuous, x_embedding_vars).squeeze()
                loss = loss_function(outputs, targets)
                loss.backward()
                optimizer.step()
                
                running_loss += loss.item()
            
            avg_train_loss = running_loss / len(train_loader)
            avg_test_loss = evaluate(model, test_loader, loss_function, device)
            metrics = {
                'avg_train_loss': avg_train_loss,
                'avg_test_loss': avg_test_loss,
            }
            if ray_tuning:
                ray.train.report(metrics=metrics)
            else:
                logger.info(f'Epoch {epoch + 1}/{num_epochs}, metrics: {metrics}')

    except Exception as e:
        metrics = {
            'avg_train_loss': float('inf'),
            'avg_test_loss': float('inf'),
        }
        logger.error(f"Training failed with exception: {e}")
        if ray_tuning:
            ray.train.report(metrics=metrics)
        else:
            logger.error(f"Training failed with exception: {e}")
        
    return model

In [71]:
def get_best_trial(train_loader, test_loader, num_samples=10, max_num_epochs=10, gpus_per_trial=0):
    config = {
        "input_dim": continuous_len,
        "hidden_dim": tune.choice([i for i in range(10, 201, 10)]),
        "num_layers": tune.choice([1, 2, 3]),
        "num_embeddings": sample['permno'].nunique(),
        "embedding_dim": tune.choice([i for i in range(1, 50, 5)]),
        "dropout_rate": tune.uniform(0.01, 0.7),
        "lr": tune.loguniform(1e-6, 1e-2),
        "weight_decay": tune.loguniform(1e-6, 1e-3),
        "num_epochs": max_num_epochs,
        "gpu": gpus_per_trial,
    }
    
    scheduler = ASHAScheduler(
        metric="avg_test_loss",
        mode="min",
        max_t=max_num_epochs,
        grace_period=1,
        reduction_factor=2)
    
    reporter = CLIReporter(
        metric_columns=["average_train_loss", "avg_test_loss", "training_iteration"])
    
    result = tune.run(
        tune.with_parameters(train_fnn, train_loader=train_loader, test_loader=test_loader),
        resources_per_trial={"cpu": 1, "gpu": gpus_per_trial},
        config=config,
        num_samples=num_samples,
        scheduler=scheduler,
        progress_reporter=reporter,
        storage_path="/zfs/projects/darc/wolee_edehaan_suzienoh-exploratory-ml/kevin/ray_results",
        verbose=3, # to turn off a lot of messages
    )
    
    best_trial = result.get_best_trial("avg_test_loss", "min", "last")
    logger.info(f"Best trial config: {best_trial.config}")
    logger.info(f"Best trial training loss: {best_trial.last_result['avg_train_loss']}")
    logger.info(f"Best trial testing loss: {best_trial.last_result['avg_test_loss']}")
    
    return best_trial

In [72]:
logging.getLogger("ray").setLevel(logging.WARNING) # To turn off a lot of Ray messages
best_trial = get_best_trial(train_loader, test_loader, num_samples=40, max_num_epochs=30, gpus_per_trial=0)

== Status ==
Current time: 2024-06-20 10:31:57 (running for 00:00:00.33)
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: None
Logical resource usage: 0/10 CPUs, 0/0 GPUs
Result logdir: /tmp/ray/session_2024-06-20_10-31-52_670893_552272/artifacts/2024-06-20_10-31-57/train_fnn_2024-06-20_10-31-57/driver_artifacts
Number of trials: 40/40 (40 PENDING)
+-----------------------+----------+-------+----------------+-----------------+--------------+-------------+--------------+----------------+
| Trial name            | status   | loc   |   dropout_rate |   embedding_dim |   hidden_dim |          lr |   num_layers |   weight_decay |
|-----------------------+----------+-------+----------------+-----------------+--------------+-------------+--------------+----------------|
| train_fnn_fe339_00000 | PENDING  |       |      0.423074  |              36 |           70 | 4.20799e-06 |            1 |    2.93754e-06 |


Trial name,avg_test_loss,avg_train_loss,checkpoint_dir_name,date,done,hostname,iterations_since_restore,node_ip,pid,time_since_restore,time_this_iter_s,time_total_s,timestamp,training_iteration,trial_id
train_fnn_fe339_00000,11.9615,6.18317,,2024-06-20_10-32-06,True,yen4,1,171.67.96.198,601613,3.0964,3.0964,3.0964,1718904726,1,fe339_00000
train_fnn_fe339_00001,0.470559,1.03694,,2024-06-20_10-32-07,True,yen4,2,171.67.96.198,601614,4.74519,1.46096,4.74519,1718904727,2,fe339_00001
train_fnn_fe339_00002,88.2948,60.2973,,2024-06-20_10-32-06,True,yen4,1,171.67.96.198,601615,3.14271,3.14271,3.14271,1718904726,1,fe339_00002
train_fnn_fe339_00003,171.207,69.0931,,2024-06-20_10-32-08,True,yen4,2,171.67.96.198,601616,5.31816,1.726,5.31816,1718904728,2,fe339_00003
train_fnn_fe339_00004,0.19882,0.137026,,2024-06-20_10-32-25,True,yen4,16,171.67.96.198,601618,22.2446,1.2634,22.2446,1718904745,16,fe339_00004
train_fnn_fe339_00005,17.3654,20.7246,,2024-06-20_10-32-06,True,yen4,1,171.67.96.198,601617,3.21011,3.21011,3.21011,1718904726,1,fe339_00005
train_fnn_fe339_00006,46.3597,173.954,,2024-06-20_10-32-06,True,yen4,1,171.67.96.198,601619,3.31408,3.31408,3.31408,1718904726,1,fe339_00006
train_fnn_fe339_00007,91.6444,68.7719,,2024-06-20_10-32-06,True,yen4,1,171.67.96.198,601620,3.30781,3.30781,3.30781,1718904726,1,fe339_00007
train_fnn_fe339_00008,0.577009,0.144541,,2024-06-20_10-32-10,True,yen4,4,171.67.96.198,601621,7.42797,1.42345,7.42797,1718904730,4,fe339_00008
train_fnn_fe339_00009,0.119962,0.0932545,,2024-06-20_10-32-40,True,yen4,30,171.67.96.198,601622,37.2523,1.18551,37.2523,1718904760,30,fe339_00009


== Status ==
Current time: 2024-06-20 10:32:07 (running for 00:00:10.37)
Using AsyncHyperBand: num_stopped=5
Bracket: Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: -0.38412465973227633 | Iter 1.000: -6.925628302923215
Logical resource usage: 9.0/10 CPUs, 0/0 GPUs
Result logdir: /tmp/ray/session_2024-06-20_10-31-52_670893_552272/artifacts/2024-06-20_10-31-57/train_fnn_2024-06-20_10-31-57/driver_artifacts
Number of trials: 40/40 (30 PENDING, 5 RUNNING, 5 TERMINATED)
+-----------------------+------------+----------------------+----------------+-----------------+--------------+-------------+--------------+----------------+-----------------+----------------------+
| Trial name            | status     | loc                  |   dropout_rate |   embedding_dim |   hidden_dim |          lr |   num_layers |   weight_decay |   avg_test_loss |   training_iteration |
|-----------------------+------------+----------------------+----------------+-----------------+-------------

INFO:Neural Network:Best trial config: {'input_dim': 102, 'hidden_dim': 10, 'num_layers': 3, 'num_embeddings': 953, 'embedding_dim': 46, 'dropout_rate': 0.24295446826850328, 'lr': 0.005910698619088546, 'weight_decay': 9.324140221663508e-06, 'num_epochs': 30, 'gpu': 0}
INFO:Neural Network:Best trial training loss: 0.09510207540102747
INFO:Neural Network:Best trial testing loss: 0.11536250937900512


== Status ==
Current time: 2024-06-20 10:33:30 (running for 00:01:33.28)
Using AsyncHyperBand: num_stopped=40
Bracket: Iter 16.000: -0.12595723661916708 | Iter 8.000: -0.1242824012742323 | Iter 4.000: -0.17171807385055848 | Iter 2.000: -0.38412465973227633 | Iter 1.000: -4.537167676913193
Logical resource usage: 1.0/10 CPUs, 0/0 GPUs
Result logdir: /tmp/ray/session_2024-06-20_10-31-52_670893_552272/artifacts/2024-06-20_10-31-57/train_fnn_2024-06-20_10-31-57/driver_artifacts
Number of trials: 40/40 (40 TERMINATED)
+-----------------------+------------+----------------------+----------------+-----------------+--------------+-------------+--------------+----------------+-----------------+----------------------+
| Trial name            | status     | loc                  |   dropout_rate |   embedding_dim |   hidden_dim |          lr |   num_layers |   weight_decay |   avg_test_loss |   training_iteration |
|-----------------------+------------+----------------------+----------------+-----

## Retrain the model with the optimized parameters

In [73]:
"""
Training Process:
Training Years: [1980, 1981, 1982, 1983, 1984, 1985, 1986]
Testing Year: [1987] for hyperparameters tuning

Inference Process:
Training Years: [1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987] 
using optimized hyperparameters found during training
Inference Year: [1988]
""";

In [74]:
best_trial

train_fnn_fe339_00020

In [75]:
best_trial.config

{'input_dim': 102,
 'hidden_dim': 10,
 'num_layers': 3,
 'num_embeddings': 953,
 'embedding_dim': 46,
 'dropout_rate': 0.24295446826850328,
 'lr': 0.005910698619088546,
 'weight_decay': 9.324140221663508e-06,
 'num_epochs': 30,
 'gpu': 0}

In [76]:
trained_model = train_fnn(
    config=best_trial.config, 
    train_loader=train_loader, 
    test_loader=test_loader, 
    ray_tuning=False
)

INFO:Neural Network:Epoch 1/30, metrics: {'avg_train_loss': 1.6898387787703253, 'avg_test_loss': 0.12120474405051057}
INFO:Neural Network:Epoch 2/30, metrics: {'avg_train_loss': 0.10228859168162804, 'avg_test_loss': 0.12438325286885492}
INFO:Neural Network:Epoch 3/30, metrics: {'avg_train_loss': 0.09857141108932611, 'avg_test_loss': 0.11640679848447345}
INFO:Neural Network:Epoch 4/30, metrics: {'avg_train_loss': 0.09722370161339805, 'avg_test_loss': 0.11544685628192097}
INFO:Neural Network:Epoch 5/30, metrics: {'avg_train_loss': 0.09622465083108306, 'avg_test_loss': 0.11625250575004839}
INFO:Neural Network:Epoch 6/30, metrics: {'avg_train_loss': 0.0954351717766547, 'avg_test_loss': 0.11918131240150508}
INFO:Neural Network:Epoch 7/30, metrics: {'avg_train_loss': 0.09557125286484923, 'avg_test_loss': 0.12116529415148536}
INFO:Neural Network:Epoch 8/30, metrics: {'avg_train_loss': 0.09482452587099133, 'avg_test_loss': 0.12471790297650824}
INFO:Neural Network:Epoch 9/30, metrics: {'avg_tra

In [77]:
predict(trained_model, test_loader)

array([-0.0232391 , -0.0139509 , -0.02017702, ..., -0.02177294,
       -0.03343377, -0.02854417], dtype=float32)