In [2]:
import yfinance as yf
import json
import threading
import pickle
import warnings
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
import itertools

#### Entrenamiento de modelos ARIMA

In [3]:
warnings.filterwarnings('ignore')

In [4]:
# Read the companies.json file
companies = json.load(open('companies.json'))
health = [yf.Ticker(i).history(period='1d', start='2021-01-01', end='2021-12-31')[['Close','Volume']] for i in companies['Health']]
banking = [yf.Ticker(i).history(period='1d', start='2021-01-01', end='2021-12-31')[['Close','Volume']] for i in companies['Banking']]
technology = [yf.Ticker(i).history(period='1d', start='2021-01-01', end='2021-12-31')[['Close','Volume']] for i in companies['Technology']]
renewable_energy = [yf.Ticker(i).history(period='1d', start='2021-01-01', end='2021-12-31')[['Close','Volume']] for i in companies['RenewableEnergies']]
raw_materials = [yf.Ticker(i).history(period='1d', start='2021-01-01', end='2021-12-31')[['Close','Volume']] for i in companies['RawMaterials']]

In [5]:
# Ejemplo concreto para la primera accion de Health, tendriamos que hacerlo para todas.
tr_start,tr_end = '2021-01-01','2021-12-01'
te_start,te_end = '2021-12-02','2021-12-31'

In [6]:
lista = [0,1,2,3]
posibles_coefs = list(itertools.combinations_with_replacement(lista, 3))
def auto_arimax(posibles_coefs,tra,tes):
    """
    This function calculates the best combination of parameters for a SARIMAX model based on MSE and coefs pvalues."""
    errores = [0]*len(posibles_coefs)
    for j in range(len(posibles_coefs)): # Calcs error for each of the combs. 
        error = 0
        # Calcs the error for one step head prediction. It is slow but nice to decrease error.
        for i in range(len(tes)):
            ml = SARIMAX(list(tra['Close'])+list(tes['Close'][0:i]), exog = list(tra['Volume'])+list(tes['Volume'][0:i]), order=posibles_coefs[j], seasonal_order=(0,0,0,0)).fit()
            error += (ml.get_forecast(steps = 1, exog = tes['Volume'][i]).predicted_mean[0] - tes['Close'][i])**2
        errores[j] = error
        pvalues = ml.pvalues
        if pvalues[pvalues > 0.1].any():
            errores[j] = np.inf
    return SARIMAX(list(tra['Close'])+list(tes['Close']), 
                   exog = list(tra['Volume'])+list(tes['Volume']), 
                   order=posibles_coefs[errores.index(min(errores))]
                   , seasonal_order=(0,0,0,0)).fit()

In [None]:
def train_and_save(sector,list_sector):
    for i in range(len(list_sector)):
        tra = list_sector[i][['Close', 'Volume']][tr_start:tr_end].dropna()
        tes = list_sector[i][['Close', 'Volume']][te_start:te_end].dropna()
        ml = auto_arimax(posibles_coefs,tra,tes)
        nombre = list(companies[sector].keys())[i]
        with open('modelosEntrenados/'+sector+'/'+nombre+'.pkl', 'wb') as outfile:
            pickle.dump(ml, outfile)
train_and_save('Health',health)

In [None]:
th1 = threading.Thread(target=train_and_save, args=('Health',health))
th2 = threading.Thread(target=train_and_save, args=('Banking',banking))
th3 = threading.Thread(target=train_and_save, args=('Technology',technology))
th4 = threading.Thread(target=train_and_save, args=('RenewableEnergies',renewable_energy))
th5 = threading.Thread(target=train_and_save, args=('RawMaterials',raw_materials))
th1.start()
th2.start()
th3.start()
th4.start()
th5.start()
th1.join()
th2.join()
th3.join()
th4.join()
th5.join()