In [1]:
import numpy as np
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from arch import arch_model
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sktime.transformations.panel.rocket import MiniRocketMultivariate
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
import yfinance as yf

import sys
sys.path.append('../')
from src.parameters import STR_CONN

import warnings
warnings.filterwarnings('ignore')

2024-09-18 00:04:01.053710: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-09-18 00:04:01.057618: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-09-18 00:04:01.067397: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-18 00:04:01.083597: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-18 00:04:01.088339: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-09-18 00:04:01.101944: I tensorflow/core/platform/cpu_feature_gu

In [2]:
data = pd.read_sql("SELECT symbol, date, close FROM stock_prices WHERE symbol='META'", con=STR_CONN)
data = data.set_index('date').loc[:, ['close']]

In [65]:
def split_data(data, test_size=0.2):
    return train_test_split(data, test_size=test_size, shuffle=False)

def train_test_arima(train, test, order=(1,1,1)):
    model = ARIMA(train, order=order)
    model_fit = model.fit()
    forecast = model_fit.forecast(steps=len(test))
    mse = mean_squared_error(test, forecast)
    return model_fit, forecast, mse

def train_test_garch(train, test, p=1, q=1):
    model = arch_model(train, vol='GARCH', p=p, q=q)
    model_fit = model.fit()
    forecast = model_fit.forecast(horizon=len(test))
    mse = mean_squared_error(test, forecast.mean.iloc[-1])
    return model_fit, forecast.mean.iloc[-1], mse

# Modelo LSTM
def train_test_lstm(train, test, look_back=1):
    scaler = StandardScaler()
    train_scaled = scaler.fit_transform(train.values.reshape(-1, 1))
    
    def create_dataset(dataset, look_back=1):
        X, y = [], []
        for i in range(len(dataset) - look_back):
            X.append(dataset[i:(i + look_back), 0])
            y.append(dataset[i + look_back, 0])
        return np.array(X), np.array(y)
    
    X_train, y_train = create_dataset(train_scaled, look_back)
    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
    
    model = Sequential()
    model.add(LSTM(50, input_shape=(look_back, 1)))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(X_train, y_train, epochs=100, batch_size=1, verbose=0)
    
    test_scaled = scaler.transform(test.values.reshape(-1, 1))
    X_test, y_test = create_dataset(test_scaled, look_back)
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
    
    predictions = model.predict(X_test)
    predictions = scaler.inverse_transform(predictions)
    mse = mean_squared_error(test[look_back:], predictions)
    
    return model, predictions, mse

def train_test_minirocket_lgbm(train, test, num_kernels=1000):
    train_2d = train.values.reshape((1, 1, -1))
    test_2d = test.values.reshape((1, 1, -1))
    
    rocket = MiniRocketMultivariate(num_kernels=num_kernels)
    rocket.fit(train_2d)
    train_transform = rocket.transform(train_2d)
    test_transform = rocket.transform(test_2d)
    
    model = LGBMRegressor()
    model.fit(train_transform, train.values)
    
    predictions = model.predict(test_transform)
    mse = mean_squared_error(test, predictions)
    
    return model, predictions, mse

def train_test_lgbm(train, test, look_back=10):
    def create_features(data, look_back):
        X, y = [], []
        for i in range(len(data) - look_back):
            X.append(data[i:i+look_back])
            y.append(data[i+look_back])
        return np.array(X), np.array(y)
    
    X_train, y_train = create_features(train.values, look_back)
    X_test, y_test = create_features(test.values, look_back)
    
    model = LGBMRegressor()
    model.fit(X_train, y_train)
    
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    
    return model, predictions, mse

def train_test_xgboost(train, test, look_back=10):
    def create_features(data, look_back):
        X, y = [], []
        for i in range(len(data) - look_back):
            X.append(data[i:i+look_back])
            y.append(data[i+look_back])
        return np.array(X), np.array(y)
    
    X_train, y_train = create_features(train.values, look_back)
    X_test, y_test = create_features(test.values, look_back)
    
    model = XGBRegressor()
    model.fit(X_train, y_train)
    
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    
    return model, predictions, mse

def run_all_models(data):
    train, test = split_data(data)
    results = {}
    
    # ARIMA
    arima_model, arima_forecast, arima_mse = train_test_arima(train, test)
    results['ARIMA'] = {'model': arima_model, 'forecast': arima_forecast, 'mse': arima_mse}
    
    # GARCH
    garch_model, garch_forecast, garch_mse = train_test_garch(train, test)
    results['GARCH'] = {'model': garch_model, 'forecast': garch_forecast, 'mse': garch_mse}
    
    # LSTM
    lstm_model, lstm_predictions, lstm_mse = train_test_lstm(train, test)
    results['LSTM'] = {'model': lstm_model, 'predictions': lstm_predictions, 'mse': lstm_mse}
    
    # # MiniRocket + LGBM
    # minirocket_lgbm_model, minirocket_lgbm_predictions, minirocket_lgbm_mse = train_test_minirocket_lgbm(train, test)
    # results['MiniRocket+LGBM'] = {'model': minirocket_lgbm_model, 'predictions': minirocket_lgbm_predictions, 'mse': minirocket_lgbm_mse}
    
    # LGBM
    # lgbm_model, lgbm_predictions, lgbm_mse = train_test_lgbm(train, test)
    # results['LGBM'] = {'model': lgbm_model, 'predictions': lgbm_predictions, 'mse': lgbm_mse}
    
    # XGBoost
    # xgboost_model, xgboost_predictions, xgboost_mse = train_test_xgboost(train, test)
    # results['XGBoost'] = {'model': xgboost_model, 'predictions': xgboost_predictions, 'mse': xgboost_mse}
    
    return results

In [66]:
data = pd.read_sql("SELECT symbol, date, close FROM stock_prices WHERE symbol='META'", con=STR_CONN)
data = data.set_index('date').loc[:, ['close']]

train, test = split_data(data)

In [67]:

results = run_all_models(data)

# Imprimir resultados
for model_name, model_results in results.items():
    print(f"{model_name} MSE: {model_results['mse']}")

# Para hacer predicciones futuras, use el modelo con el MSE más bajo
best_model = min(results, key=lambda x: results[x]['mse'])
print(f"El mejor modelo es: {best_model}")

# Ejemplo de predicción futura (asumiendo que queremos predecir los próximos 30 días)
# Nota: Este es un ejemplo simplificado. En la práctica, necesitarías ajustar esto según el modelo específico
future_days = 30
best_model_data = results[best_model]
if best_model in ['ARIMA', 'GARCH']:
    future_prediction = best_model_data['model'].forecast(steps=future_days)
else:
    # Para los otros modelos, necesitarías preparar los datos de entrada adecuadamente
    # Este es solo un placeholder y necesitaría ser ajustado según el modelo específico
    last_known_data = results[best_model]['predictions'][-10:]  # Últimos 10 puntos de datos
    future_prediction = best_model_data['model'].predict(np.array([last_known_data]))

print(f"Predicción para los próximos {future_days} días usando {best_model}:")
print(future_prediction)

Iteration:      1,   Func. Count:      6,   Neg. LLF: 11498.697263621332
Iteration:      2,   Func. Count:     12,   Neg. LLF: 2271.4626766960737
Iteration:      3,   Func. Count:     17,   Neg. LLF: 2245.7253430494175
Iteration:      4,   Func. Count:     22,   Neg. LLF: 2235.528181109994
Iteration:      5,   Func. Count:     27,   Neg. LLF: 2232.393094977644
Iteration:      6,   Func. Count:     32,   Neg. LLF: 2232.1725597402587
Iteration:      7,   Func. Count:     37,   Neg. LLF: 2232.0589830231606
Iteration:      8,   Func. Count:     42,   Neg. LLF: 2232.0431925461
Iteration:      9,   Func. Count:     47,   Neg. LLF: 2232.0023774260817
Iteration:     10,   Func. Count:     52,   Neg. LLF: 2231.9153681319726
Iteration:     11,   Func. Count:     57,   Neg. LLF: 2231.6621545035323
Iteration:     12,   Func. Count:     62,   Neg. LLF: 2231.004214335363
Iteration:     13,   Func. Count:     67,   Neg. LLF: 2229.1733521540637
Iteration:     14,   Func. Count:     72,   Neg. LLF: 222