In [5]:
# Tratamiento de datos
# -----------------------------------------------------------------------
import numpy as np
import pandas as pd

# Gráficos
# ------------------------------------------------------------------------
import matplotlib.pyplot as plt
import seaborn as sns


# Guardar transformers
import pickle


# Preprocesado
from sklearn.preprocessing import RobustScaler


#  Modelado y evaluación
# -----------------------------------------------------------------------------
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

In [6]:
df = pd.read_pickle("../data/bikes_limpio.pkl")
df.head()

Unnamed: 0,fecha,estacion,año,mes,festivo,dia_semana,laboral,clima,temperatura,sensacion_termica,humedad,viento,casual,registrado,total
0,2018-01-01,invierno,2018,enero,sí,lunes,no,niebla,14.1,18.2,80.6,10.7,331,654,985
1,2018-02-01,invierno,2018,febrero,no,jueves,sí,niebla,14.9,17.7,69.6,16.7,131,670,801
2,2018-03-01,invierno,2018,marzo,no,jueves,sí,cielo despejado,8.1,9.5,43.7,16.6,120,1229,1349
3,2018-04-01,primavera,2018,abril,no,domingo,no,cielo despejado,8.2,10.6,59.0,10.7,108,1454,1562
4,2018-05-01,primavera,2018,mayo,no,martes,sí,cielo despejado,9.3,11.5,43.7,12.5,82,1518,1600


In [7]:
datos_prediccion = {"estacion": "verano",
 "año": 2019,
 "mes": "julio",
 "festivo": "no",
 "dia_semana": "jueves",
 "laboral": "sí",
 "clima": "cielo despejado",
 "temperatura": 27,
 "sensacion_termica": 27,
 "humedad": 67,
 "viento": 7}

In [8]:
prediccion = pd.DataFrame(datos_prediccion, index = [0])
prediccion

Unnamed: 0,estacion,año,mes,festivo,dia_semana,laboral,clima,temperatura,sensacion_termica,humedad,viento
0,verano,2019,julio,no,jueves,sí,cielo despejado,27,27,67,7


# Estandarización

In [9]:
with open("modelos/scaler_registrados.pkl", "rb") as scaler_registrados:
    scaler = pickle.load(scaler_registrados)

In [10]:
num = prediccion.select_dtypes(include = np.number).drop(["año", "viento"], axis = 1)
num

Unnamed: 0,temperatura,sensacion_termica,humedad
0,27,27,67


In [11]:
num_scaler = pd.DataFrame(scaler.transform(num), columns = num.columns)
num_scaler

Unnamed: 0,temperatura,sensacion_termica,humedad
0,0.890573,0.401963,0.293349


In [12]:
prediccion[num_scaler.columns] = num_scaler
prediccion

Unnamed: 0,estacion,año,mes,festivo,dia_semana,laboral,clima,temperatura,sensacion_termica,humedad,viento
0,verano,2019,julio,no,jueves,sí,cielo despejado,0.890573,0.401963,0.293349,7


In [13]:
with open("modelos/robust_registrados.pkl", "rb") as robust_registrados:
    robust = pickle.load(robust_registrados)

In [14]:
num2 = prediccion["viento"].reset_index()

In [15]:
viento = pd.DataFrame(robust.transform(num2), columns = num2.columns)

In [16]:
prediccion[viento.columns] = viento
prediccion

Unnamed: 0,estacion,año,mes,festivo,dia_semana,laboral,clima,temperatura,sensacion_termica,humedad,viento,index
0,verano,2019,julio,no,jueves,sí,cielo despejado,0.890573,0.401963,0.293349,-0.772727,-1.0


# Encoding

In [17]:
dict_estacion = {"invierno": 0, "otoño": 1, "primavera": 1.5, "verano": 2}
dict_mes = {"enero": 0, "febrero": 0, "diciembre": 0, "noviembre": 1, "marzo": 2, "abril": 2, "septiembre": 2, "octubre": 2, "mayo": 3, "junio": 3, "julio": 3, "agosto": 3}
dict_semana = {"jueves": 0, "viernes": 0, "sábado": 0, "miércoles": 0, "domingo": 0.5, "martes": 0.5, "lunes": 1}
dict_clima = {"precipitaciones ligeras": 0, "niebla": 1, "cielo despejado": 2}
dict_año = {"2018": 0, "2019": 1}
dict_festivo = {"sí": 0, "no": 1}

In [18]:
dict_map = {"estacion": dict_estacion, "mes": dict_mes, "dia_semana": dict_semana, "clima": dict_clima, "año": dict_año, "festivo": dict_festivo}

for columna, orden in dict_map.items():
    prediccion[columna] = prediccion[columna].map(orden)

In [19]:
prediccion["año"] = 1
prediccion

Unnamed: 0,estacion,año,mes,festivo,dia_semana,laboral,clima,temperatura,sensacion_termica,humedad,viento,index
0,2.0,1,3,1,0.0,sí,2,0.890573,0.401963,0.293349,-0.772727,-1.0


In [20]:
with open("modelos/oh_registrados.pkl", "rb") as oh_registrados:
    oh = pickle.load(oh_registrados)

In [21]:
laboral = oh.transform(prediccion[["laboral"]])
oh_df = pd.DataFrame(laboral.toarray())
oh_df.columns = oh.get_feature_names_out()
prediccion = pd.concat([prediccion, oh_df], axis = 1)

In [22]:
prediccion = prediccion.drop(["index", "laboral"], axis = 1)
prediccion

Unnamed: 0,estacion,año,mes,festivo,dia_semana,clima,temperatura,sensacion_termica,humedad,viento,laboral_no,laboral_sí
0,2.0,1,3,1,0.0,2,0.890573,0.401963,0.293349,-0.772727,0.0,1.0


# Predicción

In [23]:
with open ("modelos/modelo_registrados.pkl", "rb") as bosque:
        modelo = pickle.load(bosque)

In [24]:
modelo.predict(prediccion)[0]

5543.499521086214