In [24]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [14]:
import os
from dotenv import load_dotenv
from financial.momentum.utilities import find_dotenv
import financial.data as fd
from financial.momentum.experiment.modelExperiment import ModelExperimentFactory
from financial.io.file.cache import FileCache
import pandas as pd

In [15]:
load_dotenv(dotenv_path=find_dotenv())

cache = os.environ["CACHE"]
model = os.environ["MODEL"]
print(model)
print(os.environ["DATA"])
datastore = fd.CachedDataStore(path=os.environ["DATA"], cache=FileCache(cache_path=cache+"/"))
print(datastore)                

/home/manidmt/Universidad/TFG/OTRI/models/scikit-learn
/home/manidmt/Universidad/TFG/OTRI/data
CachedDataStore with 946 data sources [cache stats: {'size': 0, 'hit': 0, 'miss': 0, 'write': 0, 'read': 0, 'update': 0}]


In [16]:
import financial.model as fm
import financial.lab.models as labmodels
import sklearn.linear_model

class LinearScikitLearnModelFactory (labmodels.ModelFactory):
    '''
    Linear regression Scikit-Learn model factory
    '''
        
    def create_model_from_descriptors(self, 
                                      model_id: str, 
                                      hyperparameters: dict, 
                                      input_descriptor: fd.DataDescriptor, 
                                      output_descriptor: fd.DataDescriptor) -> fm.Model:
        model = sklearn.linear_model.LinearRegression()
        return fm.ScikitLearnModel(model_id, input_descriptor, output_descriptor, model, hyperparameters)
        

factory = LinearScikitLearnModelFactory()

In [17]:
start_date = "1990-01-01"
end_date = "2023-12-31"
ticker = "^GSPC"

config = {
    "mode": "global",
    "datastore": datastore,
    "ticker": ticker,
    "model_factory": factory,
    "name": "global_first_experiment",
    "start_year": start_date,
    "end_year": end_date
}

In [18]:
import pickle

path_train = os.path.join(cache, f"{config["name"]}.results.{config["ticker"]}.train.pickle")
with open(path_train, 'rb') as file:
    predictions_train = pickle.load(file)
print(predictions_train)

path_test = os.path.join(cache, f"{config["name"]}.results.{config["ticker"]}.test.pickle")
with open(path_test, 'rb') as file:
    predictions_test = pickle.load(file)
print(predictions_test)

global_first_experiment.^GSPC.train
global_first_experiment.^GSPC.test


In [19]:
from financial.lab.experiment import ExperimentBatch

experimentBatch = ExperimentBatch("TestLinearTest")
experimentBatch.from_file(path_train)

print(experimentBatch)
#experimentBatch['train'][ticker]

TestLinearTest experiments=0 n=0


In [20]:
experiment_linear = ModelExperimentFactory.create_experiment(config)
experiment_linear.run()
print(experiment_linear.predictions)

1990-05-10   -0.002445
1990-05-11    0.000419
1990-05-14    0.000412
1990-05-15   -0.002300
1990-05-16   -0.001051
                ...   
2023-12-22   -0.003897
2023-12-26   -0.002925
2023-12-27    0.000292
2023-12-28    0.000937
2023-12-29    0.000158
Length: 8475, dtype: float64


In [21]:
absolute_predictions_linear = experiment_linear.reconstruct_absolute_predictions_from_relative()

In [22]:
from financial.momentum.models.randomForest import RandomForestModelFactory

factory = RandomForestModelFactory()
config = {
    "mode": "global",
    "datastore": datastore,
    "ticker": ticker,
    "model_factory": factory,
    "name": "global_forest_first_experiment",
    "start_year": start_date,
    "end_year": end_date
}


In [25]:
experiment_forest = ModelExperimentFactory.create_experiment(config)
experiment_forest.run()

In [26]:
import pickle
path = os.path.join(model, f"{config["name"]}.pickle")
with open(path, 'rb') as file:
    predictions = pickle.load(file)
print(experiment_forest.predictions)

1990-05-10   -0.031734
1990-05-11   -0.016927
1990-05-14   -0.025651
1990-05-15   -0.024577
1990-05-16   -0.023041
                ...   
2023-12-22   -0.035808
2023-12-26   -0.031003
2023-12-27   -0.033587
2023-12-28   -0.031352
2023-12-29   -0.033377
Length: 8475, dtype: float64


In [27]:
data = datastore.get_data(ticker, start_date, end_date)

In [28]:
'''
stdev = experiment_forest.target[0].stdev
mean = experiment_forest.target[0].mean
deconstructed_predictions = (experiment_forest.predictions /stdev) - mean


def reconstruct_relative(data: pd.Series, model_output: pd.Series) -> pd.Series:
    #print(model_output[:-lookahead])
    reconstructed_change = - (mean+stdev*model_output) # Sign reversal @ shift(-lookahead)
    #print("reconstructed change")
    #print(reconstructed_change[:-lookahead]) 
    # change = (final-inicial)/final => change*final = final-inicial => final = inicial/(1-change)
    reconstructed_final = data / (1-reconstructed_change)
    #print("reconstructed final")
    #print(reconstructed_final[:-lookahead].dropna()) 
    return reconstructed_final.shift(20).dropna()


print(reconstruct_relative(data, deconstructed_predictions))
'''

AttributeError: 'Change' object has no attribute 'stdev'

In [None]:
print(data)

In [None]:
absolute_predictions_forest = experiment_forest.reconstruct_absolute_predictions_from_relative()
print(absolute_predictions)

In [None]:
from financial.momentum.models.SVR import SVRModelFactory

factory = SVRModelFactory()
config = {
    "mode": "global",
    "datastore": datastore,
    "ticker": ticker,
    "model_factory": factory,
    "name": "global_svr_first_experiment",
    "start_year": start_date,
    "end_year": end_date
}

In [None]:
experiment_svr = ModelExperimentFactory.create_experiment(config)
experiment_svr.run()
print(experiment_svr.predictions)

In [None]:
absolute_predictions_svr = experiment_svr.reconstruct_absolute_predictions_from_relative()
print(absolute_predictions_svr)

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib import style

# Adjusting the size of matplotlib
import matplotlib as mpl

mpl.rc('figure', figsize=(16, 9))
mpl.__version__
style.use('ggplot')
def plot_series(series: list[pd.Series], start_date: str=None, end_date: str=None):
    for i, serie in enumerate(series):
            linewidth = 1.0  # Grosor de línea predeterminado
            if i == 0:  # Si es la primera serie en la lista (índice 0)
                linewidth = 3.0  # Grosor de línea mayor
            serie[start_date:end_date].plot(linewidth=linewidth)

In [None]:
plot_series([data, absolute_predictions_linear, absolute_predictions_svr, absolute_predictions_forest])

In [None]:
plot_series([data, absolute_predictions_linear], "2020-01-01", "2021-01-01")

In [None]:
new_abs_linear = absolute_predictions_linear.shift(-20).dropna()
plot_series([data, new_abs_linear], "2020-01-01", "2021-01-01")

In [None]:
plot_series([data, absolute_predictions_svr], "2020-01-01", "2021-01-01")

In [None]:
plot_series([data, absolute_predictions_forest], "2020-01-01", "2021-01-01")

In [None]:
plot_series([data, absolute_predictions_linear, absolute_predictions_svr, absolute_predictions_forest], "2020-01-01", "2021-01-01")

In [None]:
print(experiment_svr.predictions.size)
print(absolute_predictions_svr.size)
print(data.size)
target = data[20+90:]
print(target.size)
from financial.lab.experiment import Experiment
from sklearn.metrics import r2_score

In [None]:
metrics_linear = Experiment(experiment_linear.name, absolute_predictions_linear, target)
r2 = r2_score(target, absolute_predictions_linear)

print("GLOBAL:")
print(f"n={metrics_linear.samples()} MSE={metrics_linear.MSE():.4f} RMSE={metrics_linear.RMSE():.4f} MAE={metrics_linear.MAE():.4f} MAPE={metrics_linear.MAPE():.4f} R² = {r2:.4f}")

print("TRAIN: ")
results = experiment_linear.train
print(f"n={results.samples()} MSE={results.MSE():.4f} RMSE={results.RMSE():.4f} MAE={results.MAE():.4f} MAPE={results.MAPE():.4f}")

print("TEST: ")
results = experiment_linear.test
print(f"n={results.samples()} MSE={results.MSE():.4f} RMSE={results.RMSE():.4f} MAE={results.MAE():.4f} MAPE={results.MAPE():.4f}")

In [None]:
metrics_svr = Experiment(experiment_svr.name, absolute_predictions_svr, target)
r2 = r2_score(target, absolute_predictions_svr)

print("GLOBAL:")
print(f"n={metrics_svr.samples()} MSE={metrics_svr.MSE():.4f} RMSE={metrics_svr.RMSE():.4f} MAE={metrics_svr.MAE():.4f} MAPE={metrics_svr.MAPE():.4f} R² = {r2:.4f}")

print("TRAIN: ")
results = experiment_svr.train
print(f"n={results.samples()} MSE={results.MSE():.4f} RMSE={results.RMSE():.4f} MAE={results.MAE():.4f} MAPE={results.MAPE():.4f}")

print("TEST: ")
results = experiment_svr.test
print(f"n={results.samples()} MSE={results.MSE():.4f} RMSE={results.RMSE():.4f} MAE={results.MAE():.4f} MAPE={results.MAPE():.4f}")

In [None]:
metrics_forest = Experiment(experiment_svr.name, absolute_predictions_forest, target)
r2 = r2_score(target, absolute_predictions_forest)

print("GLOBAL:")
print(f"n={metrics_forest.samples()} MSE={metrics_forest.MSE():.4f} RMSE={metrics_forest.RMSE():.4f} MAE={metrics_forest.MAE():.4f} MAPE={metrics_forest.MAPE():.4f} R² = {r2:.4f}")

print("TRAIN: ")
results = experiment_forest.train
print(f"n={results.samples()} MSE={results.MSE():.4f} RMSE={results.RMSE():.4f} MAE={results.MAE():.4f} MAPE={results.MAPE():.4f}")

print("TEST: ")
results = experiment_forest.test
print(f"n={results.samples()} MSE={results.MSE():.4f} RMSE={results.RMSE():.4f} MAE={results.MAE():.4f} MAPE={results.MAPE():.4f}")