# Entrenamiento

### Cargado de datos

In [None]:
for i in range(1, 8):
    file_name = f'../data/dataFrame_final/df_final_part_1_{i}.csv'
    globals()[f'df_part_{i}'] = pd.read_csv(file_name, encoding='ISO-8859-1')
    print(f'Archivo {file_name} cargado en df_part_{i}')

### Normalización


Es necesaria normalización para Redes neuronales y recomendada para Regresión Logística, SVM, Regresión Lineal y Polinomial

### Definición de funciones

In [None]:
!pip3.12 install dask dask-ml joblib


In [None]:
import joblib
from sklearn import metrics
from collections import defaultdict

# Función para evaluar y guardar resultados de un modelo
def evaluate_model( X_test, y_test, model, model_name):
   
    # Realizar predicciones
    predictions = model.predict(X_test)
    
    # Calcular métricas
    accuracy = metrics.accuracy_score(y_test, predictions)
    precision, recall, fscore, support = metrics.precision_recall_fscore_support(y_test, predictions, average='binary')
    auc = metrics.roc_auc_score(y_test, predictions)
    
    # Guardar el modelo
    joblib.dump(model, f"{model_name}.joblib")
    
    # Guardar los resultados en un diccionario
    results = {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': fscore,
        'auc': auc
    }
    
    print(f"Resultados para {model_name}: {results}")
    return results


In [None]:
import matplotlib.pyplot as plt

# Función para graficar la comparación de modelos
def plot_model_comparison(results_dict):
    metrics = ['accuracy', 'precision', 'recall', 'f1_score', 'auc']
    fig, ax = plt.subplots(1, len(metrics), figsize=(18, 5), sharey=True)
    
    # Graficar cada métrica en una subfigura
    for i, metric in enumerate(metrics):
        ax[i].bar(results_dict.keys(), [results[metric] for results in results_dict.values()])
        ax[i].set_title(metric.capitalize())
        ax[i].set_ylim([0, 1])
        ax[i].set_xticklabels(results_dict.keys(), rotation=45)
    
    plt.tight_layout()
    plt.show()


### Regresión Lineal

In [None]:
from sklearn.linear_model import SGDRegressor
import numpy as np
from joblib import dump

# Definir el modelo
model = SGDRegressor(max_iter=1000, tol=1e-3)

for i in range(1, 8):
    # Cargar el dataframe de la parte correspondiente
    data_frame = globals()[f'df_part_{i}']
    X_batch = data_frame.drop(columns=['label']).values
    y_batch = data_frame['label'].values
    
    # Entrenamiento incremental
    model.partial_fit(X_batch, y_batch)

# Guardar el modelo entrenado
dump(model, 'linear_regression_model.joblib')


### Regresión Polinomial

In [None]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline

# Creamos el pipeline de regresión polinomial
polynomial_model = Pipeline([
    ('poly_features', PolynomialFeatures(degree=3)),
    ('linear_regression', LinearRegression())
])

# Evaluamos y guardamos el resultado
results_dict['PolynomialRegression'] = evaluate_model(X_train, X_test, y_train, y_test, polynomial_model, 'PolynomialRegression')


### Regresión Logística

In [None]:
import numpy as np
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(solver='saga', max_iter=1000)

for i in range(1, 8):
    data_frame = globals()[f'df_part_{i}']
    X_train = data_frame.drop(columns=['target_column']).values
    y_train = data_frame['target_column'].values

    # Realiza el entrenamiento incremental
    model.partial_fit(X_train, y_train, classes=np.unique(y_train))


In [None]:
from sklearn.linear_model import LogisticRegression

# Modelo de Regresión Logística
logistic_model = LogisticRegression(max_iter=1000)

# Evaluamos y guardamos el resultado
results_dict['LogisticRegression'] = evaluate_model(X_train, X_test, y_train, y_test, logistic_model, 'LogisticRegression')


### Árboles de decisión

In [None]:
from sklearn.tree import DecisionTreeClassifier

# Modelo de Árbol de Decisión
tree_model = DecisionTreeClassifier()

# Evaluamos y guardamos el resultado
results_dict['DecisionTree'] = evaluate_model(X_train, X_test, y_train, y_test, tree_model, 'DecisionTree')


### Basado en instancias : KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier

# Modelo de K-Nearest Neighbors
knn_model = KNeighborsClassifier(n_neighbors=5)

# Evaluamos y guardamos el resultado
results_dict['KNeighbors'] = evaluate_model(X_train, X_test, y_train, y_test, knn_model, 'KNeighbors')


### Máquinas de soporte vectorial : SVC

In [None]:
from sklearn.svm import SVC

# Modelo de SVM
svm_model = SVC(kernel='rbf', probability=True)

# Evaluamos y guardamos el resultado
results_dict['SVM'] = evaluate_model(X_train, X_test, y_train, y_test, svm_model, 'SVM')


### Redes neuronales 

In [None]:
from sklearn.neural_network import MLPClassifier

# Modelo de Red Neuronal
nn_model = MLPClassifier(hidden_layer_sizes=(64, 32), max_iter=1000, random_state=42)

# Evaluamos y guardamos el resultado
results_dict['NeuralNetwork'] = evaluate_model(X_train, X_test, y_train, y_test, nn_model, 'NeuralNetwork')


## Comparación de resultados

In [None]:
# Graficar la comparación de los modelos
plot_model_comparison(results_dict)

## Ajuste de hiperparámetros

## Nuevos resultados