# Cursos de Series de tiempo con *Machine Learning*
## Modulo 400. Streamlit aplicación ML Forecasting Time Series 
                        Elaborado por: Naren Castellon

<center><img src="./imagen/streamlit ML Forecasting time series.png" width="900" height="500"></center>

In [19]:
# Cursos de Series de tiempo con Machine Learning
# Modulo 400. Aplicación con Streamlit ML Forecasting Time Series

#                       Elaborado por: Naren Castellon

# Cargar las librerias 

# Manipulacion y tratamiento de Datos
import numpy as np
import pandas as pd

# Desarrollo de Apps
import streamlit as st

# Modelacion
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from xgboost import XGBRegressor

# Evaluar el modelo
from sklearn.metrics import mean_squared_error

# Visualizacion de datos
import plotly.express as px
import plotly.graph_objects as go


# Generar datos de ventas ficticios

np.random.seed(42)

# Generar fechas
dates = pd.date_range(start= '2010-01-01', periods= 180, freq= 'MS')

sales_data = {
    'fecha': dates,
    'ventas': np.random.randint(50, 500, size = (180,)),
    'precio_unitario': np.random.uniform(5.0, 15.0, size =(180,)),
    'descuento': np.random.uniform(0, 0.25, size = (180,))
}

df_sales = pd.DataFrame(sales_data)
df_sales


Unnamed: 0,fecha,ventas,precio_unitario,descuento
0,2010-01-01,152,10.398411,0.038760
1,2010-02-01,485,7.030612,0.245460
2,2010-03-01,398,14.428536,0.209733
3,2010-04-01,320,10.988655,0.215101
4,2010-05-01,156,11.947849,0.062563
...,...,...,...,...
175,2024-08-01,437,5.276168,0.186053
176,2024-09-01,271,10.788649,0.170260
177,2024-10-01,456,9.384741,0.059377
178,2024-11-01,280,11.720261,0.100056


In [48]:
import pandas as pd

lags = [1, 2, 3]
# Crear las columnas de rezagos y añadirlas al DataFrame
for lag in lags:
    df_sales[f'lag_{lag}'] = df_sales['ventas'].shift(lag)

# Mostrar el DataFrame resultante
df_sales.dropna(inplace = True)
print(df_sales)


         fecha  ventas  precio_unitario  descuento  lag_1  lag_2  lag_3
3   2010-04-01     320        10.988655   0.215101  398.0  485.0  152.0
4   2010-05-01     156        11.947849   0.062563  320.0  398.0  485.0
5   2010-06-01     121        13.804678   0.009709  156.0  320.0  398.0
6   2010-07-01     238        11.243540   0.075816  121.0  156.0  320.0
7   2010-08-01      70         7.956337   0.134271  238.0  121.0  156.0
..         ...     ...              ...        ...    ...    ...    ...
175 2024-08-01     437         5.276168   0.186053  145.0  101.0  290.0
176 2024-09-01     271        10.788649   0.170260  437.0  145.0  101.0
177 2024-10-01     456         9.384741   0.059377  271.0  437.0  145.0
178 2024-11-01     280        11.720261   0.100056  456.0  271.0  437.0
179 2024-12-01     286         8.281527   0.119429  280.0  456.0  271.0

[177 rows x 7 columns]


In [25]:
dates = pd.date_range(start= '2021-01-01', periods= 100, freq= 'M')

In [26]:
dates[-12]

Timestamp('2028-05-31 00:00:00')

In [29]:
pd.date_range(start= df_sales["fecha"][:-24], periods= 9 + 1, freq='MS')[1:]

TypeError: Cannot convert input [0     2010-01-01
1     2010-02-01
2     2010-03-01
3     2010-04-01
4     2010-05-01
         ...    
151   2022-08-01
152   2022-09-01
153   2022-10-01
154   2022-11-01
155   2022-12-01
Name: fecha, Length: 156, dtype: datetime64[ns]] of type <class 'pandas.core.series.Series'> to Timestamp

In [20]:
df_sales.to_csv("ventas.csv", index= False)

In [21]:
pd.read_csv("ventas.csv")

Unnamed: 0,fecha,ventas,precio_unitario,descuento
0,2010-01-01,152,10.398411,0.038760
1,2010-02-01,485,7.030612,0.245460
2,2010-03-01,398,14.428536,0.209733
3,2010-04-01,320,10.988655,0.215101
4,2010-05-01,156,11.947849,0.062563
...,...,...,...,...
175,2024-08-01,437,5.276168,0.186053
176,2024-09-01,271,10.788649,0.170260
177,2024-10-01,456,9.384741,0.059377
178,2024-11-01,280,11.720261,0.100056


In [None]:
# agragar una nueva variable

df_sales["total"] = df_sales['ventas'] * df_sales['precio_unitario']*(1 - df_sales['descuento'])

print(df_sales)

# Prepar los datos para el entrenamiento

X = df_sales[['precio_unitario', 'descuento']]
y = df_sales['ventas']

# Dividir los datos en entrenamiento y prueba

X_train, X_test, y_train, y_test =  train_test_split(X, y, test_size= 0.20, random_state= 42, shuffle= False)



In [23]:
df_sales["fecha"][-24:]

156   2023-01-01
157   2023-02-01
158   2023-03-01
159   2023-04-01
160   2023-05-01
161   2023-06-01
162   2023-07-01
163   2023-08-01
164   2023-09-01
165   2023-10-01
166   2023-11-01
167   2023-12-01
168   2024-01-01
169   2024-02-01
170   2024-03-01
171   2024-04-01
172   2024-05-01
173   2024-06-01
174   2024-07-01
175   2024-08-01
176   2024-09-01
177   2024-10-01
178   2024-11-01
179   2024-12-01
Name: fecha, dtype: datetime64[ns]

In [None]:
# Generar fechas futuras para el pronóstico
future_dates = pd.date_range(start= df_sales["fecha"][-24], periods=horizonte + 1, freq='MS')[1:]
future_data = pd.DataFrame({
    'fecha': future_dates,
    'precio_unitario': np.random.uniform(5.0, 15.0, size=(horizonte,)),
    'descuento': np.random.uniform(0, 0.25, size=(horizonte,))
})
future_X = future_data[['precio_unitario', 'descuento']]