In [2]:
import pandas as pd
import mlflow

from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR

import numpy as np

In [3]:
data = pd.read_parquet('./data/large.parquet.gz')
data = data[data['ticker'] == 'AAPL']

In [4]:
train, test = data[(data['dateTime']>='01-01-2021') & (data['dateTime']<'01-01-2022')], data[(data['dateTime']>='01-01-2022') & (data['dateTime']<'01-01-2023')]

In [5]:
train = train['close'].values
test = test['close'].values

In [6]:
segment_len = 7
train_x, train_y = [], []

for i in range(train.shape[0]-segment_len):
    train_x += [train[i:i+segment_len-1]]
    train_y += [train[i+segment_len]]

test_x, test_y = [], []

for i in range(test.shape[0]-segment_len):
    test_x += [test[i:i+segment_len-1]]
    test_y += [test[i+segment_len]]


train_x = np.array(train_x)
train_y = np.array(train_y)
test_x = np.array(test_x)
test_y = np.array(test_y)

In [9]:
import time
from sklearn.metrics import mean_squared_error, mean_absolute_error, root_mean_squared_error, mean_absolute_percentage_error, r2_score


In [69]:
mlflow.autolog()


statistics = []
models = {  'LR' :  LinearRegression(),
            'DTR' : DecisionTreeRegressor(min_samples_leaf=10),
            'RFR' : RandomForestRegressor(),
            'GBR' : GradientBoostingRegressor(),
            'SVR' : SVR(kernel='linear', epsilon=1e-2)}

for model_decription, model in models.items():
    start_time = time.time()
    with mlflow.start_run(run_name=model_decription):
        model.fit(train_x, train_y) 
    # utilized = time.time() - start_time
    # preds = model.predict(test_x)
    # statistics.append({'time':utilized, 'mse': mean_squared_error(test_y, preds),'mae': mean_absolute_error(test_y, preds),'model': model_decription})

2024/02/02 18:49:56 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


In [10]:
mlflow.autolog()

statistics = []
models = {  'LR' :  LinearRegression(),
            # 'DTR' : DecisionTreeRegressor(min_samples_leaf=10),
            # 'RFR' : RandomForestRegressor(),
            # 'GBR' : GradientBoostingRegressor(),
            'SVR' : SVR(kernel='linear', epsilon=1e-2)}

for model_decription, model in models.items():
    with mlflow.start_run(run_name=model_decription):
        model.fit(train_x, train_y) 
        # mlflow.log_metrics({
        #     'mse': mlflow.metrics.mse(model.predict(test_x), test_y)
        # })
        preds = model.predict(test_x)
        mlflow.log_metrics({
            'mse': mean_squared_error(test_y, preds),
            'mae': mean_absolute_error(test_y, preds),
            'mape': mean_absolute_percentage_error(test_y, preds),
            'rmse': root_mean_squared_error(test_y, preds),
            'r2_score': r2_score(test_y, preds),
        })

2024/02/02 21:45:56 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


In [72]:
# NN methods
from tensorflow.keras import Sequential
from tensorflow.keras.layers import LSTM, Dense, GRU, SimpleRNN, Conv1D, MaxPooling1D, Flatten, GlobalMaxPooling1D, Input, Dropout

import warnings
warnings.filterwarnings("ignore")


sequence_length = 7
neurons_per_layer = sequence_length-1
input_shape = (sequence_length-1, 1)
topologies = {  'CNN + LSTM': [Conv1D(filters = 32, kernel_size = 3, activation = 'relu', input_shape = input_shape),
                               LSTM(units = neurons_per_layer),
                               Dense(units = 1)],
                'LSTM x3': [LSTM(units = neurons_per_layer, 
                                 return_sequences = True, 
                                 input_shape = input_shape),
                            LSTM(units = neurons_per_layer, 
                                 return_sequences = True),
                            LSTM(units = neurons_per_layer),
                            Dense(units = 1)],
                'LSTM x2': [LSTM(units = neurons_per_layer, 
                                 return_sequences = True, 
                                 input_shape = input_shape),
                            LSTM(units = neurons_per_layer),
                            Dense(units = 1)],
                'LSTM x1': [LSTM(units = neurons_per_layer, 
                                 input_shape = input_shape),
                            Dense(units = 1)],
                'CNN + GRU': [Conv1D(filters = 32, kernel_size = 3, activation = 'relu', input_shape = input_shape),
                              GRU(units = neurons_per_layer),
                              Dense(units = 1)],
                'GRU x3' : [GRU(units = neurons_per_layer, 
                                return_sequences = True, 
                                input_shape = input_shape),
                            GRU(units = neurons_per_layer, 
                                return_sequences = True),
                            GRU(units = neurons_per_layer),
                            Dense(units = 1)],
                'GRU x2' : [GRU(units = neurons_per_layer, 
                                return_sequences = True, 
                                input_shape = input_shape),
                            GRU(units = neurons_per_layer),
                            Dense(units = 1)],
                'GRU x1' : [GRU(units = neurons_per_layer, 
                                input_shape = input_shape),
                            Dense(units = 1)],
                'CNN + SimpleRNN': [Conv1D(filters = 32, kernel_size = 3, activation = 'relu', input_shape = input_shape),
                                     SimpleRNN(units = neurons_per_layer),
                                     Dense(units = 1)],
                'SimpleRNN x3':[SimpleRNN(units = neurons_per_layer, 
                                          return_sequences = True, 
                                          input_shape = input_shape),
                                SimpleRNN(units = neurons_per_layer, 
                                          return_sequences = True),
                                SimpleRNN(units = neurons_per_layer),
                                Dense(units = 1)],
                'SimpleRNN x2':[SimpleRNN(units = neurons_per_layer, 
                                          return_sequences = True, 
                                          input_shape = input_shape),
                                SimpleRNN(units = neurons_per_layer),
                                Dense(units = 1)],
                'SimpleRNN x1':[SimpleRNN(units = neurons_per_layer, 
                                          input_shape = input_shape),
                                Dense(units = 1)],
                'CNN': [Conv1D(filters = 32, kernel_size = 3, input_shape = input_shape, activation = 'relu'),
                        Flatten(),
                        Dense(units = 1)],
                'MLP(2)': [Dense(units = neurons_per_layer, input_shape = (neurons_per_layer,)),
                        Dense(units = neurons_per_layer),
                        Dense(units = 1)],
                'MLP(1)': [Dense(units = neurons_per_layer, input_shape = (neurons_per_layer,)),
                        Dense(units = 1)],
                }

In [73]:
for model_decription in topologies:
    topology = topologies[model_decription]
    with mlflow.start_run(run_name=model_decription):
        epochs = 5
        batch_size = 32
        current_topology_model = Sequential(topology)
        current_topology_model.compile(optimizer = 'Adamax', loss = 'mean_squared_error', metrics = ['mean_absolute_error'])
        current_topology_model.fit(x = train_x, y = train_y, epochs = epochs, batch_size = batch_size, verbose = 0)
        mse, mae = current_topology_model.evaluate(test_x, test_y, verbose = 0)



INFO:tensorflow:Assets written to: C:\Users\sixxio\AppData\Local\Temp\tmpep2od0n8\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\sixxio\AppData\Local\Temp\tmpep2od0n8\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\sixxio\AppData\Local\Temp\tmpk6syy482\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\sixxio\AppData\Local\Temp\tmpk6syy482\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\sixxio\AppData\Local\Temp\tmpy6bixe3e\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\sixxio\AppData\Local\Temp\tmpy6bixe3e\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\sixxio\AppData\Local\Temp\tmpzip4jvh5\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\sixxio\AppData\Local\Temp\tmpzip4jvh5\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\sixxio\AppData\Local\Temp\tmptms9ulp4\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\sixxio\AppData\Local\Temp\tmptms9ulp4\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\sixxio\AppData\Local\Temp\tmpe7gqie03\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\sixxio\AppData\Local\Temp\tmpe7gqie03\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\sixxio\AppData\Local\Temp\tmpixhqa8no\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\sixxio\AppData\Local\Temp\tmpixhqa8no\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\sixxio\AppData\Local\Temp\tmp50rmczgb\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\sixxio\AppData\Local\Temp\tmp50rmczgb\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\sixxio\AppData\Local\Temp\tmpaysxy_0a\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\sixxio\AppData\Local\Temp\tmpaysxy_0a\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\sixxio\AppData\Local\Temp\tmpw_qcgzhi\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\sixxio\AppData\Local\Temp\tmpw_qcgzhi\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\sixxio\AppData\Local\Temp\tmpn_u_9y04\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\sixxio\AppData\Local\Temp\tmpn_u_9y04\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\sixxio\AppData\Local\Temp\tmpw5zxidih\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\sixxio\AppData\Local\Temp\tmpw5zxidih\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\sixxio\AppData\Local\Temp\tmp04oykfx4\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\sixxio\AppData\Local\Temp\tmp04oykfx4\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\sixxio\AppData\Local\Temp\tmpsa7i161e\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\sixxio\AppData\Local\Temp\tmpsa7i161e\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\sixxio\AppData\Local\Temp\tmp9eytgal7\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\sixxio\AppData\Local\Temp\tmp9eytgal7\model\data\model\assets
