# Modelos de Predicción:

In [1]:
# Importaciones necesarias para el correcto funcionamiento de todos lo modelos y demás operaciones con los datos
import pandas as pd 
import numpy as np
import sklearn
import subprocess

In [2]:
result_df = pd.read_csv("Fitabase Data 4.12.16-5.12.16/test_train_data.csv")
print(result_df)

                Id                 Time   HeartRate  Intensity  Calories
0       2022484408  2016-04-12 07:21:00  101.600000          1   3.32064
1       2022484408  2016-04-12 07:22:00   87.888889          1   3.94326
2       2022484408  2016-04-12 07:23:00   58.000000          0   1.34901
3       2022484408  2016-04-12 07:24:00   58.000000          0   1.03770
4       2022484408  2016-04-12 07:25:00   56.777778          0   1.03770
...            ...                  ...         ...        ...       ...
333141  8877689391  2016-05-12 13:55:00   60.666667          0   1.33353
333142  8877689391  2016-05-12 13:56:00   61.875000          0   1.33353
333143  8877689391  2016-05-12 13:57:00   58.142857          0   1.33353
333144  8877689391  2016-05-12 13:58:00   61.200000          0   1.33353
333145  8877689391  2016-05-12 13:59:00   58.000000          0   1.33353

[333146 rows x 5 columns]


# Preprocesamiento datos:

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.metrics import r2_score
from sklearn.pipeline import Pipeline

# Convertir la columna 'Time' a tipo datetime en data_intensities_minutes
result_df['Time'] = pd.to_datetime(result_df['Time'])

# Extraemos características de la columna 'Time'
result_df['Hour'] = result_df['Time'].dt.hour
result_df['Minutes'] = result_df['Time'].dt.minute
result_df['Weekday'] = result_df['Time'].dt.weekday

# Definir las características y la variable objetivo
features = ['Id', 'Hour', 'Minutes', 'Intensity', 'Calories']
target = 'HeartRate'

# Separar las características y la variable objetivo
X = result_df[features]
y = result_df[target]

# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.17, random_state=777)

# Crear un transformador para manejar las variables categóricas y escalar las numéricas
categorical_features = ['Id']
numeric_features = ['Hour', 'Minutes', 'Intensity', 'Calories']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])



# Instalación de librerías necesarias

In [4]:
subprocess.run(['pip', 'install', 'xgboost'])


CompletedProcess(args=['pip', 'install', 'xgboost'], returncode=0)

In [5]:
subprocess.run(['pip', 'install', 'lightgbm'])

CompletedProcess(args=['pip', 'install', 'lightgbm'], returncode=0)

In [6]:
subprocess.run(['pip', 'install', 'tensorflow', 'scikit-learn'])

CompletedProcess(args=['pip', 'install', 'tensorflow', 'scikit-learn'], returncode=0)

# Gradient Boosted Trees : XGBoost

In [7]:
from xgboost import XGBRegressor

model = XGBRegressor(objective='reg:squarederror', random_state=42) 

# Entrenar el modelo
model.fit(X_train, y_train)

# Realizar predicciones en el conjunto de prueba
y_pred = model.predict(X_test)

# Calcular el R2 score
r2 = r2_score(y_test, y_pred)
print(f"\033[94;1mR2 Score: {r2:.6f}\033[0m")

[94;1mR2 Score: 0.861030[0m


In [8]:
best_params = {'colsample_bytree': 1.0, 'learning_rate': 0.2, 'max_depth': 7, 'n_estimators': 200, 'subsample': 1.0}

# Crear el modelo XGBoost con los mejores hiperparámetros
optimal_model = XGBRegressor(
    objective='reg:squarederror',
    random_state=42,
    n_estimators=best_params['n_estimators'],
    learning_rate=best_params['learning_rate'],
    max_depth=best_params['max_depth'],
    subsample=best_params['subsample'],
    colsample_bytree=best_params['colsample_bytree']
)

# Entrenar el modelo óptimo
optimal_model.fit(X_train, y_train)

# Realizar predicciones en el conjunto de prueba con el mejor modelo
y_pred = optimal_model.predict(X_test)

# Calcular el R2 score con el mejor modelo
r2 = r2_score(y_test, y_pred)

print("\033[91m" + f"Mejores hiperparámetros: {best_params}" + "\033[0m")
print(f"\033[94;1mR2 Score: {r2:.6f}\033[0m")

[91mMejores hiperparámetros: {'colsample_bytree': 1.0, 'learning_rate': 0.2, 'max_depth': 7, 'n_estimators': 200, 'subsample': 1.0}[0m
[94;1mR2 Score: 0.863465[0m


# LightGBM

In [9]:
import lightgbm as lgb

model = lgb.LGBMRegressor(objective='regression', random_state=42)

# Entrenar el modelo
model.fit(X_train, y_train)

# Realizar predicciones en el conjunto de prueba
y_pred = model.predict(X_test)

# Calcular el R2 score
r2 = r2_score(y_test, y_pred)
print(f"\033[94;1mR2 Score (LightGBM): {r2:.6f}\033[0m")

[WinError 2] El sistema no puede encontrar el archivo especificado
  File "C:\Users\34634\anaconda3\lib\site-packages\joblib\externals\loky\backend\context.py", line 227, in _count_physical_cores
    cpu_info = subprocess.run(
  File "C:\Users\34634\anaconda3\lib\subprocess.py", line 505, in run
    with Popen(*popenargs, **kwargs) as process:
  File "C:\Users\34634\anaconda3\lib\subprocess.py", line 951, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "C:\Users\34634\anaconda3\lib\subprocess.py", line 1420, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003017 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 358
[LightGBM] [Info] Number of data points in the train set: 276511, number of used features: 5
[LightGBM] [Info] Start training from score 73.705764
[94;1mR2 Score (LightGBM): 0.855260[0m


In [10]:
import lightgbm as lgb
from sklearn.metrics import r2_score

# Mejores hiperparámetros encontrados
best_params = {'learning_rate': 0.2, 'n_estimators': 200, 'num_leaves': 40}

# Crear el modelo LightGBM con los mejores hiperparámetros
best_model = lgb.LGBMRegressor(objective='regression', random_state=42, **best_params)

# Entrenar el modelo con los datos de entrenamiento
best_model.fit(X_train, y_train)

# Realizar predicciones en el conjunto de prueba con el mejor modelo
y_pred = best_model.predict(X_test)

# Mostrar los resultados con colores
print("\033[91m" + "Mejores hiperparámetros:" + "\033[0m", end=" ")
print("\033[92m" + f"{best_params}" + "\033[0m")
print("\033[94m" + f"Mejor modelo (LightGBM) - R2 Score: {r2_score(y_test, y_pred):.6f}" + "\033[0m")


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005043 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 358
[LightGBM] [Info] Number of data points in the train set: 276511, number of used features: 5
[LightGBM] [Info] Start training from score 73.705764
[91mMejores hiperparámetros:[0m [92m{'learning_rate': 0.2, 'n_estimators': 200, 'num_leaves': 40}[0m
[94mMejor modelo (LightGBM) - R2 Score: 0.863881[0m


# LSTM: Long Short Term Memory

In [None]:
from keras.models import Sequential
from keras.layers import LSTM, Dense

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train[numeric_features])
X_test_scaled = scaler.transform(X_test[numeric_features])

# Reshape para el formato de entrada de LSTM (n_samples, time steps, features)
X_train_reshaped = X_train_scaled.reshape((X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
X_test_reshaped = X_test_scaled.reshape((X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))

# Construir el modelo LSTM
model = Sequential()
model.add(LSTM(100, activation='relu', input_shape=(1, len(numeric_features))))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# Entrenar el modelo
model.fit(X_train_reshaped, y_train, epochs=15, batch_size=32, verbose=1)

# Realizar predicciones en el conjunto de prueba
y_pred = model.predict(X_test_reshaped)

# Calcular el R2 score
r2 = r2_score(y_test, y_pred)
print(f"\033[94;1mR2 Score: {r2:.6f}\033[0m")


# MLPRegressor: MultiLayer Perceptron Regressor

In [None]:
from sklearn.neural_network import MLPRegressor


# Crear el pipeline con preprocesamiento y el modelo MLPRegressor
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                                     ('regressor', MLPRegressor(random_state=42, max_iter=500))])
pipeline.fit(X_train, y_train)

# Evaluar el modelo
score = pipeline.score(X_test, y_test)
print(f"\033[94;1m R2 Score: {r2:.6f}\033[0m")


In [None]:
best_params = {'regressor__activation': 'tanh', 'regressor__alpha': 0.001, 'regressor__hidden_layer_sizes': (100, 50)}

# Crear el modelo MLPRegressor con los mejores hiperparámetros
mlp = MLPRegressor(random_state=42, max_iter=500, 
                   activation=best_params['regressor__activation'], 
                   alpha=best_params['regressor__alpha'], 
                   hidden_layer_sizes=best_params['regressor__hidden_layer_sizes'])

# Crear el pipeline con preprocesamiento y el modelo MLPRegressor
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                             ('regressor', mlp)])

# Entrenar el modelo
pipeline.fit(X_train, y_train)

# Realizar predicciones en el conjunto de prueba
y_pred = pipeline.predict(X_test)

# Evaluar el modelo con los mejores hiperparámetros
score = r2_score(y_test, y_pred)
print("\033[91m" + f"Mejores hiperparámetros: {best_params}" + "\033[0m")
print("\033[94m" + f"R2 score con mejores hiperparámetros: {best_params} - R2 Score: {score:.6f}\033[0m")


# KNN Regressor

In [None]:


from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.neighbors import KNeighborsRegressor


numeric_features = ['Hour', 'Minutes', 'Intensity', 'Calories']
categorical_features = ['Id']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(), categorical_features)
    ], remainder='passthrough')  # 'passthrough' mantiene los datos dispersos

model = KNeighborsRegressor(n_neighbors=5, algorithm='brute')  # Puedes probar con 'ball_tree' o 'kd_tree' si persiste el problema

pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('model', model)
])

pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)


In [None]:
n_neighbors_range = np.arange(1, 21)  # Explorar valores entre 1 y 20

# Crear un diccionario con los hiperparámetros a ajustar
param_grid = {'model__n_neighbors': n_neighbors_range}  # Usa la notación 'model__' para acceder al parámetro dentro del Pipeline

# Crear el modelo KNN Regressor
model = KNeighborsRegressor()

# Crear el pipeline que combina el preprocesamiento y el modelo
pipeline = Pipeline([
    ('preprocessor', preprocessor),  # Asume que tienes el preprocessor ya definido
    ('model', model)
])

# Usar GridSearchCV para encontrar la mejor combinación de hiperparámetros
grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='r2')  # Validación cruzada con 5 folds
grid_search.fit(X_train, y_train)

# Obtener los mejores hiperparámetros
best_params = grid_search.best_params_

# Crear el modelo KNN Regressor con los mejores hiperparámetros
model = KNeighborsRegressor(**best_params)  # Desempaqueta los parámetros

# Entrenar el modelo con los mejores hiperparámetros
model.fit(X_train, y_train)

# Evaluar el modelo en el conjunto de prueba
score = model.score(X_test, y_test)
print(f"\033[94;1mR2 Score (con mejores hiperparámetros): {score:.4f}\033[0m")

# Decision Tree Regressor

In [None]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import Pipeline  # Make sure to import Pipeline

# DecisionTreeRegressor Model
dt_regressor = DecisionTreeRegressor(random_state=11101)

# Construir el pipeline con el transformador y el modelo
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', dt_regressor)
])  # Add a closing parenthesis here

# Cross Validation execution
scores = cross_val_score(pipeline, X_train, y_train, cv=5, scoring='neg_mean_squared_error')
mse_scores = -scores  # Convertir a positivo, ya que cross_val_score devuelve la negación del MSE

# Entrenar el modelo en el conjunto de entrenamiento
pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)

mse_test = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error on Test Set: {mse_test}')

# Matriz de confusión
print("Matriz de Confusión:")
print(np.round(np.array([[mse_test]]), decimals=2))


# SVM: Support Vector Machine Regressor

In [None]:
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

# Definir las características y la variable objetivo
features = ['Hour', 'Minutes', 'Intensity', 'Calories']
target = 'HeartRate'

# Separar las características y la variable objetivo
X = result_df[features]
y = result_df[target]

# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=11101)

# Crear el modelo SVM Regressor
model = SVR(kernel='rbf', gamma='auto', epsilon=0.1)

# Entrenar el modelo
model.fit(X_train, y_train)

# Realizar predicciones en el conjunto de prueba
y_pred = model.predict(X_test)

# Evaluar el modelo
score = r2_score(y_test, y_pred)
print("R2 score:", score)

# Random Forest Regressor

In [None]:
from sklearn.ensemble import RandomForestRegressor

# Crear el modelo de Random Forest
model = RandomForestRegressor(n_estimators=100, random_state=11101)

# Crear el pipeline que combina el preprocesamiento y el modelo
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('model', model)
])

# Entrenar el modelo
pipeline.fit(X_train, y_train)

# Evaluar el modelo en el conjunto de prueba
score = pipeline.score(X_test, y_test)

print(f'R2 Score on Test Set: {score}')