### Dataset

In [19]:
import pandas as pd
from auto_ts import auto_timeseries
import dill
import talib
import numpy as np
import warnings

warnings.filterwarnings('ignore')

from sklearn.model_selection import TimeSeriesSplit
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, Flatten, Conv1D, MaxPooling1D, BatchNormalization
from keras.regularizers import l2
from keras.optimizers import Adam, RMSprop, SGD
from scikeras.wrappers import KerasRegressor
from keras.callbacks import EarlyStopping
from skopt import BayesSearchCV
from sklearn.preprocessing import LabelEncoder
import tensorflow.keras.backend as K
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OneHotEncoder
from keras.losses import categorical_crossentropy
from sklearn.metrics import log_loss
from keras.utils import to_categorical

In [20]:
# Evito que ciertas columnas se transformen a notacion cientifica en las predicciones
pd.set_option('display.float_format', lambda x: '%.2f' % x)
pd.set_option('display.max_columns', None)

In [21]:
columns = [
    'Open_time',
    'Open',
    'High',
    'Low',
    # 'Close',
    'Number of trades',
    'Close_BTCUSDT',
    'Volume_BTCUSDT',
    'Number_of_trades_BTCUSDT',
    'Close_ETHUSDT',
    'Volume_ETHUSDT',
    'Number_of_trades_ETHUSDT',
    'Close_BNBUSDT',
    'Volume_BNBUSDT',
    'Number_of_trades_BNBUSDT',
    'SMA_20',
    'EMA_20',
    'Upper_Band',
    'Middle_Band',
    'Lower_Band',
    'RSI',
    'buy_1000x_high_coinbase',
    'sell_1000x_high_coinbase',
    'total_trades_coinbase',	
    'Tweets_Utilizados',
    'Tweets_Utilizados_coin',
    'Tweets_Utilizados_referentes',
    'Tweets_Utilizados_whale_alert',
    'Buy_1000x_high',
    'sell_1000x_high',
    'total_trades_binance'
]

### Armado y entrenamiento de un clasificador a partir de los datos originales

#### Modelo LSTM

In [22]:
complete_dataset = pd.read_csv('/Users/mmarchetta/Desktop/Tesis-2024/data-visualization/final_dataset.csv') 
classifier_dataset = complete_dataset[columns]
classifier_dataset['Open_time'] = pd.to_datetime(classifier_dataset['Open_time'])
classifier_dataset['Tendencia'] = complete_dataset['Tendencia']

clasifier_validation = classifier_dataset[-5:]
classifier_dataset = classifier_dataset[:-5]

In [23]:
display(classifier_dataset.tail())

Unnamed: 0,Open_time,Open,High,Low,Number of trades,Close_BTCUSDT,Volume_BTCUSDT,Number_of_trades_BTCUSDT,Close_ETHUSDT,Volume_ETHUSDT,Number_of_trades_ETHUSDT,Close_BNBUSDT,Volume_BNBUSDT,Number_of_trades_BNBUSDT,SMA_20,EMA_20,Upper_Band,Middle_Band,Lower_Band,RSI,buy_1000x_high_coinbase,sell_1000x_high_coinbase,total_trades_coinbase,Tweets_Utilizados,Tweets_Utilizados_coin,Tweets_Utilizados_referentes,Tweets_Utilizados_whale_alert,Buy_1000x_high,sell_1000x_high,total_trades_binance,Tendencia
898,2024-03-13,10.74,11.46,10.64,276468.0,73072.41,52659.71,2501197.0,4004.79,482305.78,1536498.0,630.5,2526002.56,1265237.0,9.45,9.7,11.82,9.45,7.07,73.43,64.0,81.0,92576.0,275,205,1.0,73.0,578.0,553.0,164000.0,Alcista
899,2024-03-14,11.37,11.89,10.68,536988.0,71388.94,71757.63,2994869.0,3881.7,648237.52,1919963.0,603.2,2119540.3,1038297.0,9.65,9.88,12.03,9.65,7.27,74.51,102.0,133.0,145727.0,211,181,4.0,29.0,754.0,677.0,327000.0,Alcista
900,2024-03-15,11.56,11.71,9.97,557152.0,69499.85,103334.04,3904445.0,3742.19,947537.41,2487337.0,632.7,3066312.79,1365283.0,9.8,9.97,12.07,9.8,7.52,63.27,88.0,83.0,147460.0,238,106,0.0,25.0,493.0,430.0,360000.0,Bajista
901,2024-03-16,10.81,10.9,9.5,330505.0,65300.63,55926.95,2729019.0,3523.09,548288.16,1798939.0,576.4,1811838.04,1025452.0,9.89,9.94,11.99,9.89,7.78,50.82,30.0,49.0,88095.0,670,471,0.0,20.0,513.0,403.0,209000.0,Bajista
902,2024-03-17,9.68,10.25,9.19,229683.0,68393.48,49742.22,2449156.0,3644.71,517790.99,1721355.0,571.7,1712920.34,802297.0,9.98,9.95,11.93,9.98,8.04,54.27,36.0,48.0,71390.0,693,413,0.0,21.0,295.0,277.0,150000.0,Alcista


In [24]:
classifier_dataset.shape

(903, 31)

In [25]:
X = classifier_dataset.drop(columns=["Tendencia", "Open_time"])
y = classifier_dataset["Tendencia"]

# label_encoder = LabelEncoder()
# y_encoded = label_encoder.fit_transform(classifier_dataset["Tendencia"])

y = y.to_numpy().reshape(-1, 1)
onehot_encoder = OneHotEncoder(sparse=False)
y_one_hot = onehot_encoder.fit_transform(y)


In [26]:
display(y_one_hot)

array([[0., 0., 1.],
       [0., 1., 0.],
       [1., 0., 0.],
       ...,
       [0., 1., 0.],
       [0., 1., 0.],
       [1., 0., 0.]])

In [27]:
import warnings
warnings.filterwarnings('ignore')

def create_model(activation, units, dropout, learning_rate, l2_penalty, depth, optimizer='adam'):
    model = Sequential()
    model.add(LSTM(units=int(units/2), activation=activation, input_shape=(len(X.columns), 1), return_sequences=True, kernel_regularizer=l2(l2_penalty)))
    model.add(Dropout(dropout))
    model.add(BatchNormalization())
    
    for _ in range(depth - 1):
        model.add(LSTM(units=units, activation=activation, return_sequences=True, kernel_regularizer=l2(l2_penalty)),)
        model.add(Dropout(dropout))
        model.add(BatchNormalization())
    
    model.add(LSTM(units=int(units*2), activation=activation, kernel_regularizer=l2(l2_penalty)))
    model.add(Dropout(dropout))
    model.add(BatchNormalization())
    model.add(Dense(units=3, activation='softmax'))
    
    if optimizer == 'adam':
        optimizer = Adam(learning_rate=learning_rate)
    elif optimizer == 'rmsprop':
        optimizer = RMSprop(learning_rate=learning_rate)
    elif optimizer == 'sgd':
        optimizer = SGD(learning_rate=learning_rate)

    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
regressor = KerasRegressor(build_fn=create_model, verbose=0, activation='relu', units=50, dropout=0.2, learning_rate=0.1, l2_penalty=0.001, depth=2, optimizer='adam')

cv = TimeSeriesSplit(n_splits=10).split(X)
param_space = {
    'depth': [2, 3, 4, 5],
    'activation': ['relu', 'tanh', 'swish', 'selu'],
    'units': [64, 128, 256, 512],
    'dropout': [0.1, 0.2, 0.3, 0.4],
    'learning_rate': [0.01, 0.001, 0.0001],
    'epochs': [10, 20, 30, 50, 100],
    'batch_size': [32, 64, 128],
    'optimizer': ['adam', 'rmsprop', 'sgd'],
    'l2_penalty': [0.001, 0.01, 0.1]
}

def categorical_crossentropy_loss(estimator, X_test, y_test):
    y_pred = estimator.predict(X_test)
    
    if np.isnan(y_pred).any():
        y_pred[np.isnan(y_pred)] = 0
    
    loss = log_loss(y_test, y_pred)
    return loss

bayes_search = BayesSearchCV(regressor, param_space, scoring=categorical_crossentropy_loss, cv=cv, verbose=0)#10)
bayes_result = bayes_search.fit(X, y_one_hot, callbacks=[early_stopping])

In [28]:
# Show best results
print("Best score:", bayes_result.best_score_)
print("Best parameters:", bayes_result.best_params_)

# Entrenar el modelo con los mejores hiperparámetros
best_model = bayes_result.best_estimator_
best_model.fit(X, y_one_hot)

Best score: 5.120278119888486
Best parameters: OrderedDict([('activation', 'selu'), ('batch_size', 64), ('depth', 4), ('dropout', 0.1), ('epochs', 10), ('l2_penalty', 0.001), ('learning_rate', 0.001), ('optimizer', 'rmsprop'), ('units', 512)])


In [29]:
import json

# Obtener los hiperparámetros y puntajes de los 5 mejores modelos
top_n_models = 5
best_params_list = []
best_scores_list = []

for i in range(min(top_n_models, len(bayes_search.cv_results_['params']))):
    best_params_list.append(bayes_search.cv_results_['params'][i])
    best_scores_list.append(bayes_search.cv_results_['mean_test_score'][i])

# Guardar los hiperparámetros de los 5 mejores modelos en un archivo JSON
with open('lstm_classifier/top_5_hyperparameters.json', 'w') as f:
    json.dump({'best_params': best_params_list, 'best_scores': best_scores_list}, f)

# O imprimir los hiperparámetros
print("Top 5 mejores modelos:")
for i in range(len(best_params_list)):
    print("Modelo", i+1)
    print("Hiperparámetros:", best_params_list[i])
    print("Puntaje:", best_scores_list[i])


Top 5 mejores modelos:
Modelo 1
Hiperparámetros: OrderedDict([('activation', 'relu'), ('batch_size', 64), ('depth', 4), ('dropout', 0.2), ('epochs', 30), ('l2_penalty', 0.01), ('learning_rate', 0.0001), ('optimizer', 'adam'), ('units', 64)])
Puntaje: 1.157736949168703
Modelo 2
Hiperparámetros: OrderedDict([('activation', 'selu'), ('batch_size', 32), ('depth', 4), ('dropout', 0.4), ('epochs', 20), ('l2_penalty', 0.1), ('learning_rate', 0.01), ('optimizer', 'sgd'), ('units', 128)])
Puntaje: 1.1129858996812934
Modelo 3
Hiperparámetros: OrderedDict([('activation', 'relu'), ('batch_size', 64), ('depth', 5), ('dropout', 0.4), ('epochs', 20), ('l2_penalty', 0.1), ('learning_rate', 0.0001), ('optimizer', 'sgd'), ('units', 64)])
Puntaje: 1.1185720457866446
Modelo 4
Hiperparámetros: OrderedDict([('activation', 'relu'), ('batch_size', 128), ('depth', 3), ('dropout', 0.3), ('epochs', 50), ('l2_penalty', 0.1), ('learning_rate', 0.001), ('optimizer', 'sgd'), ('units', 512)])
Puntaje: 2.2327541293501

#### Armado del ensamble

In [30]:
def generate_prime_seeds(n):
    seeds = []
    num = 70001  # Comenzamos desde el primer número primo mayor que 70000
    while len(seeds) < n:
        is_prime = True
        for i in range(2, int(num**0.5) + 1):
            if num % i == 0:
                is_prime = False
                break
        if is_prime:
            seeds.append(num)
        num += 1
    return seeds


In [31]:
## Clase personalizada para hacer el ensamble, dado que sklearn no provee ninguna clase que permita hacer ensmble
## de modelos re regresion multivariados
class MultivariableVotingRegressor:
    def __init__(self, models):
        self.models = models

    def fit(self, X, y):
        for model in self.models:
            model.fit(X, y)

    def predict(self, X):
        # Hacer predicciones con cada modelo
        predictions = np.array([model.predict(X) for model in self.models])
        
        # Calcular la moda de las predicciones
        mode_predictions = np.argmax(np.sum(predictions, axis=0), axis=1)
        
        return mode_predictions


In [32]:

import json

# Leer los hiperparámetros desde el archivo JSON
with open('lstm_classifier/top_5_hyperparameters.json', 'r') as f:
    top_hyperparameters = json.load(f)

prime_seeds = generate_prime_seeds(30)
models = []
best_seeds= {}

# Train models with different seeds for each set of hyperparameters
for mode_number, params in enumerate(top_hyperparameters['best_params']):
    best_validation_errors = {}
    
    for seed_number, seed in enumerate(prime_seeds):
        model = KerasRegressor(build_fn=create_model, random_state=seed, verbose=1, **params)
        model.fit(X, y_one_hot)
        
        # Make predictions with the model
        model_predictions = model.predict(X)
        
        # Calculate error (training error)
        train_error = categorical_crossentropy(y_one_hot, model_predictions)
        
        mean_train_error = np.mean(train_error)

        # Update best validation error for this seed
        best_validation_errors[seed] = mean_train_error
    
        print(f"model number {mode_number}, seed number {seed_number}")
    # print("Best validation errors:", best_validation_errors)

    # Find the best seed for this set of hyperparameters
    best_seed_for_params = min(best_validation_errors, key=lambda k: best_validation_errors[k])
    best_seeds[str(params)] = best_seed_for_params
    
    # Create and train the model with the best seed
    model = KerasRegressor(build_fn=create_model, random_state=best_seed_for_params, verbose=1, **params)
    model.fit(X, y_one_hot)
    models.append(model)

# Define and train the ensemble model
ensemble = MultivariableVotingRegressor(models)
ensemble.fit(X, y_one_hot)

# Save the best seeds to a JSON file
with open('lstm_classifier/best_seeds.json', 'w') as f:
    json.dump(best_seeds, f)    

Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


#### Clasificacion con el ensamble sobre las redicciones de los modelos generativos

In [33]:
with open('lstm_classifier/best_seeds.json', 'r') as f:
    best_seeds = json.load(f)

# 21 Crear y entrenar los modelos con los hiperparámetros y semillas guardados
models = []
for params_str, seed in best_seeds.items():
    params = json.loads(params_str.replace("'", "\""))
    model = KerasRegressor(build_fn=create_model, random_state=seed, **params)
    model.fit(X, y_one_hot)
    models.append(model)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoc

In [34]:
ensemble = MultivariableVotingRegressor(models)
ensemble.fit(X, y_one_hot)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoc

##### Datos originales:

In [35]:
display(clasifier_validation)

Unnamed: 0,Open_time,Open,High,Low,Number of trades,Close_BTCUSDT,Volume_BTCUSDT,Number_of_trades_BTCUSDT,Close_ETHUSDT,Volume_ETHUSDT,Number_of_trades_ETHUSDT,Close_BNBUSDT,Volume_BNBUSDT,Number_of_trades_BNBUSDT,SMA_20,EMA_20,Upper_Band,Middle_Band,Lower_Band,RSI,buy_1000x_high_coinbase,sell_1000x_high_coinbase,total_trades_coinbase,Tweets_Utilizados,Tweets_Utilizados_coin,Tweets_Utilizados_referentes,Tweets_Utilizados_whale_alert,Buy_1000x_high,sell_1000x_high,total_trades_binance,Tendencia
903,2024-03-18,10.08,10.46,9.6,245319.0,67609.99,55691.08,2464515.0,3520.46,570901.29,1906387.0,555.4,2284301.81,994512.0,10.06,9.95,11.86,10.06,8.26,52.48,34.0,43.0,84706.0,696,471,0.0,43.0,343.0,228.0,154000.0,Bajista
904,2024-03-19,9.9,9.99,8.6,341363.0,61937.4,101005.32,3593832.0,3158.64,1049629.69,2647385.0,507.7,2551361.51,1213572.0,10.08,9.84,11.81,10.08,8.35,42.93,120.0,126.0,135180.0,961,509,1.0,56.0,534.0,433.0,221000.0,Bajista
905,2024-03-20,8.77,9.57,8.49,267797.0,67840.51,90420.59,3549793.0,3516.53,1207322.82,2987953.0,556.8,1425296.58,809335.0,10.14,9.8,11.68,10.14,8.6,49.21,185.0,117.0,112997.0,866,555,1.0,40.0,473.0,386.0,171000.0,Alcista
906,2024-03-21,9.48,9.58,9.07,156774.0,65501.27,53357.48,2388390.0,3492.85,602755.21,1791989.0,553.8,953921.37,563996.0,10.17,9.74,11.63,10.17,8.71,46.85,64.0,81.0,66543.0,692,533,0.0,24.0,350.0,290.0,101000.0,Bajista
907,2024-03-22,9.18,9.37,8.69,147578.0,63796.64,51482.38,2492881.0,3336.35,558848.89,1747756.0,553.8,1181298.51,712381.0,10.14,9.67,11.67,10.14,8.62,45.0,57.0,66.0,68616.0,681,546,0.0,41.0,252.0,206.0,92000.0,Bajista


In [36]:
n_clases = 3 

validation_predictions = ensemble.predict(clasifier_validation.drop(columns=["Open_time", "Tendencia"]))
predicciones_one_hot = to_categorical(validation_predictions, num_classes=n_clases)
etiquetas_numericas = np.argmax(predicciones_one_hot, axis=1)
categorias_clases = onehot_encoder.categories_[0]
nombres_clases = [categorias_clases[indice] for indice in etiquetas_numericas]

display(validation_predictions)
display(categorias_clases)
display(nombres_clases)



array([1, 1, 1, 0, 1])

array(['Alcista', 'Bajista', 'Lateral'], dtype=object)

['Bajista', 'Bajista', 'Bajista', 'Alcista', 'Bajista']

##### Datos generados por auto ml con prophet:

In [37]:
auto_ml_prophet_df = pd.read_csv('auto_timeseries_models_prophet/predicciones.csv')
auto_mp_prophet_predictions = ensemble.predict(auto_ml_prophet_df.drop(columns=["Open_time", "Close"]))

predicciones_one_hot = to_categorical(auto_mp_prophet_predictions, num_classes=n_clases)
etiquetas_numericas = np.argmax(predicciones_one_hot, axis=1)
categorias_clases = onehot_encoder.categories_[0]
nombres_clases = [categorias_clases[indice] for indice in etiquetas_numericas]

display(auto_mp_prophet_predictions)
display(categorias_clases)
display(nombres_clases)



array([1, 1, 1, 1, 0])

array(['Alcista', 'Bajista', 'Lateral'], dtype=object)

['Bajista', 'Bajista', 'Bajista', 'Bajista', 'Alcista']

##### Datos generados por auto ml con stats:

In [38]:
auto_ml_stats_df = pd.read_csv('auto_timeseries_models/predicciones.csv')
auto_mp_stats_predictions = ensemble.predict(auto_ml_stats_df.drop(columns=["Open_time", "Close"]))

predicciones_one_hot = to_categorical(auto_mp_stats_predictions, num_classes=n_clases)
etiquetas_numericas = np.argmax(predicciones_one_hot, axis=1)
categorias_clases = onehot_encoder.categories_[0]
nombres_clases = [categorias_clases[indice] for indice in etiquetas_numericas]

display(auto_mp_stats_predictions)
display(categorias_clases)
display(nombres_clases)



array([1, 1, 1, 1, 1])

array(['Alcista', 'Bajista', 'Lateral'], dtype=object)

['Bajista', 'Bajista', 'Bajista', 'Bajista', 'Bajista']

##### Datos generados por auto ml con modelos clasicos:

In [39]:
auto_ml_df = pd.read_csv('h2o_models/predicciones.csv')
auto_mp_predictions = ensemble.predict(auto_ml_df.drop(columns=["Open_time", "Next_Day_Target", "Close"]))

predicciones_one_hot = to_categorical(auto_mp_predictions, num_classes=n_clases)
etiquetas_numericas = np.argmax(predicciones_one_hot, axis=1)
categorias_clases = onehot_encoder.categories_[0]
nombres_clases = [categorias_clases[indice] for indice in etiquetas_numericas]

display(auto_mp_predictions)
display(categorias_clases)
display(nombres_clases)



array([1, 1, 1, 1, 1])

array(['Alcista', 'Bajista', 'Lateral'], dtype=object)

['Bajista', 'Bajista', 'Bajista', 'Bajista', 'Bajista']