In [200]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import f1_score, roc_auc_score
from sklearn.preprocessing import LabelEncoder
from statsmodels.tsa.vector_ar.var_model import VAR
import matplotlib.pyplot as plt
from IPython.display import display

pd.set_option('display.max_columns', None)

In [201]:
dataset = pd.read_csv('/Users/mmarchetta/Desktop/Tesis-2024/data-visualization/final_dataset.csv')
dataset1 = pd.read_csv('/Users/mmarchetta/Desktop/Tesis-2024/data-cleaning/final_dataset_first_cleaning.csv')
dataset2= pd.read_csv('/Users/mmarchetta/Desktop/Tesis-2024/data-cleaning/final_dataset_cleaned_kbest.csv')
dataset3 = pd.read_csv('/Users/mmarchetta/Desktop/Tesis-2024/data-cleaning/final_dataset_cleaned_random_forest_feature_selector.csv')

dataset4 = pd.read_csv('/Users/mmarchetta/Desktop/Tesis-2024/data-cleaning/final_dataset_first_cleaning_denormalized.csv')
dataset5= pd.read_csv('/Users/mmarchetta/Desktop/Tesis-2024/data-cleaning/final_dataset_cleaned_kbest_denormalized.csv')
dataset6 = pd.read_csv('/Users/mmarchetta/Desktop/Tesis-2024/data-cleaning/final_dataset_cleaned_random_forest_feature_selector_denormalized.csv')

dataset.drop(['Open_time'], axis=1, inplace=True)
dataset.drop(['Sentimiento'], axis=1, inplace=True)
dataset.drop(['Sentimiento_coin'], axis=1, inplace=True)
dataset.drop(['Sentimiento_referentes'], axis=1, inplace=True)

dataset1.drop(['Open_time'], axis=1, inplace=True)
dataset2.drop(['Open_time'], axis=1, inplace=True)
dataset3.drop(['Open_time'], axis=1, inplace=True)
dataset4.drop(['Open_time'], axis=1, inplace=True)
dataset5.drop(['Open_time'], axis=1, inplace=True)
dataset6.drop(['Open_time'], axis=1, inplace=True)


In [202]:
display(dataset[['Close', 'Tendencia']].head(20))

Unnamed: 0,Close,Tendencia
0,27.71,Lateral
1,26.31,Bajista
2,27.28,Alcista
3,28.62,Alcista
4,31.94,Alcista
5,32.01,Lateral
6,32.11,Lateral
7,31.15,Bajista
8,31.39,Lateral
9,32.19,Alcista


In [203]:
def basic_logistic_regression(dataset):
    # Dividir los datos en conjunto de entrenamiento y conjunto de prueba
    X_train, X_test, y_train, y_test = train_test_split(dataset.drop(columns=["Tendencia"]), 
                                                        dataset["Tendencia"], 
                                                        test_size=0.2, 
                                                        random_state=42)

    # Entrenar un modelo final de Regresión Logística utilizando las características seleccionadas
    final_model = LogisticRegression()
    final_model.fit(X_train, y_train)

    # Evaluar el modelo final
    accuracy = final_model.score(X_test, y_test)
    
    y_pred = final_model.predict(X_test)
    
    # Calcular el F1-score
    f1score = f1_score(y_test, y_pred, average='weighted')
    
    roc_auc = roc_auc_score(y_test, final_model.predict_proba(X_test), multi_class='ovr')

    return accuracy, f1score, roc_auc, final_model


In [204]:
def basic_gradient_boosting(dataset):
    X_train, X_test, y_train, y_test = train_test_split(dataset.drop(columns=["Tendencia"]), 
                                                        dataset["Tendencia"], 
                                                        test_size=0.2, 
                                                        random_state=42)

    # Crear el clasificador GradientBoostingClassifier
    gb_model = GradientBoostingClassifier(
        n_estimators=1000,  # Usar 1000 estimadores
        learning_rate=0.1,  # Tasa de aprendizaje
        max_depth=5,  # Profundidad máxima de cada árbol
        min_samples_split=2,  # Número mínimo de muestras requeridas para dividir un nodo interno
        min_samples_leaf=1,  # Número mínimo de muestras requeridas para estar en una hoja
        subsample=0.8,  # Fracción de muestras a utilizar para ajustar los estimadores base
        max_features='sqrt',  # Número máximo de características a considerar al dividir nodos: raíz cuadrada del número de características
        random_state=42
    )

    # Entrenar el modelo
    gb_model.fit(X_train, y_train)

    # Evaluar el modelo
    accuracy = gb_model.score(X_test, y_test)
    
    y_pred = gb_model.predict(X_test)
    
    # Calcular el F1-score
    f1score = f1_score(y_test, y_pred, average='weighted')
    
    roc_auc = roc_auc_score(y_test, gb_model.predict_proba(X_test), multi_class='ovr')

    return accuracy, f1score, roc_auc, gb_model


In [205]:
def basic_svm(dataset):
    X_train, X_test, y_train, y_test = train_test_split(dataset.drop(columns=["Tendencia"]), 
                                                        dataset["Tendencia"], 
                                                        test_size=0.2, 
                                                        random_state=42)

    # Codificar las etiquetas de destino numéricamente
    label_encoder = LabelEncoder()
    y_train_encoded = label_encoder.fit_transform(y_train)
    y_test_encoded = label_encoder.transform(y_test)

    svc_model = SVC(
        kernel='rbf',  # Kernel radial
        C=10.0,  # Parámetro de regularización
        gamma='scale',  # Coeficiente de kernel para 'rbf'
        probability=True,  # Habilitar el cálculo de probabilidades
        random_state=42
    )

    # Entrenar el modelo
    svc_model.fit(X_train, y_train_encoded)

    # Predecir las probabilidades de clase
    y_prob = svc_model.predict_proba(X_test)

    # Calcular el F1-score
    f1score = f1_score(y_test_encoded, y_prob.argmax(axis=1), average='weighted')

    # Calcular el ROC AUC
    roc_auc = roc_auc_score(y_test_encoded, y_prob, multi_class='ovr')

    # Calcular la exactitud
    accuracy = svc_model.score(X_test, y_test_encoded)

    return accuracy, f1score, roc_auc, svc_model

In [206]:
def basic_MLP(dataset):
    X_train, X_test, y_train, y_test = train_test_split(dataset.drop(columns=["Tendencia"]), 
                                                        dataset["Tendencia"], 
                                                        test_size=0.2, 
                                                        random_state=42)

    mlp_model = MLPClassifier(
        hidden_layer_sizes=(100, 50),  # Dos capas ocultas con 100 y 50 neuronas respectivamente
        activation='relu',  # Función de activación ReLU
        solver='adam',  # Optimizador Adam
        alpha=0.0001,  # Tasa de regularización L2
        learning_rate='adaptive',  # Tasa de aprendizaje adaptativa
        max_iter=1000,  # Número máximo de iteraciones
        random_state=42
    )

    # Entrenar el modelo
    mlp_model.fit(X_train, y_train)

    # Evaluar el modelo
    accuracy = mlp_model.score(X_test, y_test)
    
    y_pred = mlp_model.predict(X_test)
    
    # Calcular el F1-score
    f1score = f1_score(y_test, y_pred, average='weighted')

    roc_auc = roc_auc_score(y_test, mlp_model.predict_proba(X_test), multi_class='ovr')

    return accuracy, f1score, roc_auc, mlp_model

In [207]:
def predict_constant_class(y_true, constant_class):
    y_pred_constant = [constant_class] * len(y_true)
    
    # Calcular el accuracy entre las etiquetas verdaderas y las predicciones constantes
    accuracy = accuracy_score(y_true, y_pred_constant)
    
    return accuracy

In [208]:
performance_rl_0, f1_score_rl_0, roc_auc_rl_0, modelo_rl_0 = basic_logistic_regression(dataset)
performance_rl_1, f1_score_rl_1, roc_auc_rl_1, modelo_rl_1 = basic_logistic_regression(dataset1)
performance_rl_2, f1_score_rl_2, roc_auc_rl_2, modelo_rl_2 = basic_logistic_regression(dataset2)
performance_rl_3, f1_score_rl_3, roc_auc_rl_3, modelo_rl_3 = basic_logistic_regression(dataset3)
performance_rl_4, f1_score_rl_4, roc_auc_rl_4, modelo_rl_4 = basic_logistic_regression(dataset4)
performance_rl_5, f1_score_rl_5, roc_auc_rl_5, modelo_rl_5 = basic_logistic_regression(dataset5)
performance_rl_6, f1_score_rl_6, roc_auc_rl_6, modelo_rl_6 = basic_logistic_regression(dataset6)

# Imprimir las métricas
print("Métricas de los modelos:")
print(f"Regresión Logística 0: Accuracy={performance_rl_0}, F1-Score={f1_score_rl_0}, ROC AUC={roc_auc_rl_0}")
print(f"Regresión Logística 1: Accuracy={performance_rl_1}, F1-Score={f1_score_rl_1}, ROC AUC={roc_auc_rl_1}")
print(f"Regresión Logística 2: Accuracy={performance_rl_2}, F1-Score={f1_score_rl_2}, ROC AUC={roc_auc_rl_2}")
print(f"Regresión Logística 3: Accuracy={performance_rl_3}, F1-Score={f1_score_rl_3}, ROC AUC={roc_auc_rl_3}")
print(f"Regresión Logística 4: Accuracy={performance_rl_4}, F1-Score={f1_score_rl_4}, ROC AUC={roc_auc_rl_4}")
print(f"Regresión Logística 5: Accuracy={performance_rl_5}, F1-Score={f1_score_rl_5}, ROC AUC={roc_auc_rl_5}")
print(f"Regresión Logística 6: Accuracy={performance_rl_6}, F1-Score={f1_score_rl_6}, ROC AUC={roc_auc_rl_6}")
print("-----------------------------------")

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Métricas de los modelos:
Regresión Logística 0: Accuracy=0.3485714285714286, F1-Score=0.26467992766726944, ROC AUC=0.6951934654759687
Regresión Logística 1: Accuracy=0.3942857142857143, F1-Score=0.3502284430373844, ROC AUC=0.6656924028980788
Regresión Logística 2: Accuracy=0.37142857142857144, F1-Score=0.2586673070883597, ROC AUC=0.7152827801832217
Regresión Logística 3: Accuracy=0.33714285714285713, F1-Score=0.23662038240851488, ROC AUC=0.715072890139859
Regresión Logística 4: Accuracy=0.3657142857142857, F1-Score=0.31975064935064934, ROC AUC=0.6981562681399223
Regresión Logística 5: Accuracy=0.33714285714285713, F1-Score=0.2552544551562957, ROC AUC=0.6961907297006752
Regresión Logística 6: Accuracy=0.35428571428571426, F1-Score=0.26814862530378003, ROC AUC=0.6995025918149591
-----------------------------------


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [209]:
performance_gb_0, f1_score_gb_0, roc_auc_gb_0, modelo_gb_0 = basic_gradient_boosting(dataset)
performance_gb_1, f1_score_gb_1, roc_auc_gb_1, modelo_gb_1 = basic_gradient_boosting(dataset1)
performance_gb_2, f1_score_gb_2, roc_auc_gb_2, modelo_gb_2 = basic_gradient_boosting(dataset2)
performance_gb_3, f1_score_gb_3, roc_auc_gb_3, modelo_gb_3 = basic_gradient_boosting(dataset3)
performance_gb_4, f1_score_gb_4, roc_auc_gb_4, modelo_gb_4 = basic_gradient_boosting(dataset4)
performance_gb_5, f1_score_gb_5, roc_auc_gb_5, modelo_gb_5 = basic_gradient_boosting(dataset5)
performance_gb_6, f1_score_gb_6, roc_auc_gb_6, modelo_gb_6 = basic_gradient_boosting(dataset6)

# Imprimir las métricas
print(f"Gradient Boosting 0: Accuracy={performance_gb_0}, F1-Score={f1_score_gb_0}, ROC AUC={roc_auc_gb_0}")
print(f"Gradient Boosting 1: Accuracy={performance_gb_1}, F1-Score={f1_score_gb_1}, ROC AUC={roc_auc_gb_1}")
print(f"Gradient Boosting 2: Accuracy={performance_gb_2}, F1-Score={f1_score_gb_2}, ROC AUC={roc_auc_gb_2}")
print(f"Gradient Boosting 3: Accuracy={performance_gb_3}, F1-Score={f1_score_gb_3}, ROC AUC={roc_auc_gb_3}")
print(f"Gradient Boosting 4: Accuracy={performance_gb_4}, F1-Score={f1_score_gb_4}, ROC AUC={roc_auc_gb_4}")
print(f"Gradient Boosting 5: Accuracy={performance_gb_5}, F1-Score={f1_score_gb_5}, ROC AUC={roc_auc_gb_5}")
print(f"Gradient Boosting 6: Accuracy={performance_gb_6}, F1-Score={f1_score_gb_6}, ROC AUC={roc_auc_gb_6}")
print("-----------------------------------")

Gradient Boosting 0: Accuracy=0.6571428571428571, F1-Score=0.6571428571428571, ROC AUC=0.7946012886593596
Gradient Boosting 1: Accuracy=0.6171428571428571, F1-Score=0.6175002652000424, ROC AUC=0.7886180502151171
Gradient Boosting 2: Accuracy=0.5371428571428571, F1-Score=0.5377112796338047, ROC AUC=0.7492338579307405
Gradient Boosting 3: Accuracy=0.5371428571428571, F1-Score=0.535557886373698, ROC AUC=0.7280869285456159
Gradient Boosting 4: Accuracy=0.6342857142857142, F1-Score=0.633522701518471, ROC AUC=0.798150656284281
Gradient Boosting 5: Accuracy=0.5485714285714286, F1-Score=0.5496099633699634, ROC AUC=0.7599795637781308
Gradient Boosting 6: Accuracy=0.5371428571428571, F1-Score=0.535557886373698, ROC AUC=0.7280869285456159
-----------------------------------


In [210]:
performance_svm_0, f1_score_svm_0, roc_auc_svm_0, modelo_svm_0 = basic_svm(dataset)
performance_svm_1, f1_score_svm_1, roc_auc_svm_1, modelo_svm_1 = basic_svm(dataset1)
performance_svm_2, f1_score_svm_2, roc_auc_svm_2, modelo_svm_2 = basic_svm(dataset2)
performance_svm_3, f1_score_svm_3, roc_auc_svm_3, modelo_svm_3 = basic_svm(dataset3)
performance_svm_4, f1_score_svm_4, roc_auc_svm_4, modelo_svm_4 = basic_svm(dataset4)
performance_svm_5, f1_score_svm_5, roc_auc_svm_5, modelo_svm_5 = basic_svm(dataset5)
performance_svm_6, f1_score_svm_6, roc_auc_svm_6, modelo_svm_6 = basic_svm(dataset6)


# Imprimir las métricas
print("Métricas de los modelos de SVM:")
print(f"SVM 0: Accuracy={performance_svm_0}, F1-Score={f1_score_svm_0}, ROC AUC={roc_auc_svm_0}")
print(f"SVM 1: Accuracy={performance_svm_1}, F1-Score={f1_score_svm_1}, ROC AUC={roc_auc_svm_1}")
print(f"SVM 2: Accuracy={performance_svm_2}, F1-Score={f1_score_svm_2}, ROC AUC={roc_auc_svm_2}")
print(f"SVM 3: Accuracy={performance_svm_3}, F1-Score={f1_score_svm_3}, ROC AUC={roc_auc_svm_3}")
print(f"SVM 4: Accuracy={performance_svm_4}, F1-Score={f1_score_svm_4}, ROC AUC={roc_auc_svm_4}")
print(f"SVM 5: Accuracy={performance_svm_5}, F1-Score={f1_score_svm_5}, ROC AUC={roc_auc_svm_5}")
print(f"SVM 6: Accuracy={performance_svm_6}, F1-Score={f1_score_svm_6}, ROC AUC={roc_auc_svm_6}")
print("-----------------------------------")

Métricas de los modelos de SVM:
SVM 0: Accuracy=0.5371428571428571, F1-Score=0.4104784743633665, ROC AUC=0.6495643963494045
SVM 1: Accuracy=0.5371428571428571, F1-Score=0.5215056560158928, ROC AUC=0.6752456695463019
SVM 2: Accuracy=0.5142857142857142, F1-Score=0.38654287187039765, ROC AUC=0.6382685222913524
SVM 3: Accuracy=0.5142857142857142, F1-Score=0.4011094133441078, ROC AUC=0.6388620428668191
SVM 4: Accuracy=0.5428571428571428, F1-Score=0.41656225367425787, ROC AUC=0.6725577925801188
SVM 5: Accuracy=0.5428571428571428, F1-Score=0.41656225367425787, ROC AUC=0.6715068968002905
SVM 6: Accuracy=0.5371428571428571, F1-Score=0.41916305916305924, ROC AUC=0.6735649560452579
-----------------------------------


In [211]:
performance_mlp_0, f1_score_mlp_0, roc_auc_mlp_0, modelo_mlp_0 = basic_MLP(dataset)
performance_mlp_1, f1_score_mlp_1, roc_auc_mlp_1, modelo_mlp_1 = basic_MLP(dataset1)
performance_mlp_2, f1_score_mlp_2, roc_auc_mlp_2, modelo_mlp_2 = basic_MLP(dataset2)
performance_mlp_3, f1_score_mlp_3, roc_auc_mlp_3, modelo_mlp_3 = basic_MLP(dataset3)
performance_mlp_4, f1_score_mlp_4, roc_auc_mlp_4, modelo_mlp_4 = basic_MLP(dataset4)
performance_mlp_5, f1_score_mlp_5, roc_auc_mlp_5, modelo_mlp_5 = basic_MLP(dataset5)
performance_mlp_6, f1_score_mlp_6, roc_auc_mlp_6, modelo_mlp_6 = basic_MLP(dataset6)

# Imprimir las métricas
print("Métricas de los modelos de MLP:")
print(f"MLP 0: Accuracy={performance_mlp_0}, F1-Score={f1_score_mlp_0}, ROC AUC={roc_auc_mlp_0}")
print(f"MLP 1: Accuracy={performance_mlp_1}, F1-Score={f1_score_mlp_1}, ROC AUC={roc_auc_mlp_1}")
print(f"MLP 2: Accuracy={performance_mlp_2}, F1-Score={f1_score_mlp_2}, ROC AUC={roc_auc_mlp_2}")
print(f"MLP 3: Accuracy={performance_mlp_3}, F1-Score={f1_score_mlp_3}, ROC AUC={roc_auc_mlp_3}")
print(f"MLP 4: Accuracy={performance_mlp_4}, F1-Score={f1_score_mlp_4}, ROC AUC={roc_auc_mlp_4}")
print(f"MLP 5: Accuracy={performance_mlp_5}, F1-Score={f1_score_mlp_5}, ROC AUC={roc_auc_mlp_5}")
print(f"MLP 6: Accuracy={performance_mlp_6}, F1-Score={f1_score_mlp_6}, ROC AUC={roc_auc_mlp_6}")
print("-----------------------------------")

Métricas de los modelos de MLP:
MLP 0: Accuracy=0.45714285714285713, F1-Score=0.3811259847404007, ROC AUC=0.5725002108690732
MLP 1: Accuracy=0.37142857142857144, F1-Score=0.2589137652875358, ROC AUC=0.49930565901719565
MLP 2: Accuracy=0.28, F1-Score=0.13296803652968037, ROC AUC=0.502227568329792
MLP 3: Accuracy=0.3142857142857143, F1-Score=0.17199954587649177, ROC AUC=0.5056513872115741
MLP 4: Accuracy=0.41714285714285715, F1-Score=0.3013821138211383, ROC AUC=0.5313533097042024
MLP 5: Accuracy=0.29714285714285715, F1-Score=0.13673830594184574, ROC AUC=0.49981180367359235
MLP 6: Accuracy=0.4, F1-Score=0.2621565091958168, ROC AUC=0.5160565389299766
-----------------------------------


In [214]:
print("Exactitud clase constante - Alcista:", predict_constant_class(dataset['Tendencia'], "Alcista"))
print("Exactitud clase constante - Lateral:", predict_constant_class(dataset['Tendencia'], "Lateral"))
print("Exactitud clase constante - Bajistave:", predict_constant_class(dataset['Tendencia'], "Bajista"))

Exactitud clase constante - Alcista: 0.330654420206659
Exactitud clase constante - Lateral: 0.33295063145809417
Exactitud clase constante - Bajistave: 0.33639494833524686


DPara determinar cuál podría ser el mejor dataset para entrenar futuros modelos, es importante considerar la consistencia de los resultados entre los diferentes modelos. Aquí hay algunas formas de analizar los datos:

1. **Promedio de métricas**: Calcula el promedio de las métricas (Accuracy, F1-Score, ROC AUC) para cada dataset y compara los resultados. Esto te dará una idea general de cuál dataset tuvo el mejor rendimiento en promedio para todos los modelos.

2. **Análisis individual de métricas**: Examina cada métrica por separado para ver qué dataset obtuvo el mejor rendimiento en cada una de ellas. Por ejemplo, puede ser que un dataset tenga el mejor Accuracy pero no el mejor F1-Score.

3. **Consistencia entre modelos**: Observa si hay algún dataset que consistentemente produce buenos resultados en todos los modelos. Esto podría indicar que ese dataset tiene características que son fáciles de aprender para diferentes tipos de algoritmos.

A continuación, realizaré estos análisis:

### 1. Promedio de métricas:

- **Regresión Logística**: 0.343 Accuracy, 0.279 F1-Score, 0.701 ROC AUC
- **Gradient Boosting**: 0.567 Accuracy, 0.566 F1-Score, 0.759 ROC AUC
- **SVM**: 0.529 Accuracy, 0.441 F1-Score, 0.657 ROC AUC
- **MLP**: 0.362 Accuracy, 0.228 F1-Score, 0.510 ROC AUC

Según el promedio de métricas, el mejor rendimiento promedio lo tuvo el modelo de Gradient Boosting.

### 2. Análisis individual de métricas:

- **Accuracy**: Los modelos de Gradient Boosting tienen el mejor rendimiento promedio, seguidos por SVM, Regresión Logística y MLP.
- **F1-Score**: Similar a Accuracy, los modelos de Gradient Boosting tienen el mejor rendimiento promedio, seguidos por SVM, Regresión Logística y MLP.
- **ROC AUC**: Los modelos de Gradient Boosting tienen el mejor rendimiento promedio, seguidos por Regresión Logística, SVM y MLP.

### 3. Consistencia entre modelos:

- El modelo de Gradient Boosting parece ser el más consistente en términos de rendimiento entre los diferentes datasets.
- Los modelos de Regresión Logística y SVM también muestran cierta consistencia, aunque no tan alta como Gradient Boosting.
- MLP muestra un rendimiento inconsistente en general y tiende a tener resultados más bajos en comparación con los otros modelos.

Conclusión: Basándonos en estos análisis, parece que los datasets que funcionaron mejor para los modelos de Gradient Boosting también podrían ser buenos para futuros modelos. Es por esto, que futuros analisis de realizaran primariamente con los datasets 0 y/o 4.

In [213]:
class_distribution = dataset['Tendencia'].value_counts()

# Imprimir la distribución de clases
print(class_distribution)

Tendencia
Bajista    293
Lateral    290
Alcista    288
Name: count, dtype: int64
