In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from IPython.display import HTML
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.tsa.stattools import pacf
from scipy.optimize import curve_fit
import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.ar_model import AutoReg
from time import time
from sklearn.metrics import mean_squared_error
from math import sqrt
import warnings



In [None]:
warnings.filterwarnings("ignore")


In [None]:
df = pd.read_csv('./Datasets/recorridos-realizados-2018.csv', encoding='latin-1')
len(df)

In [None]:
# Convertir fecha_hora_retiro a datetime

df.bici_Fecha_hora_retiro = df.bici_Fecha_hora_retiro.apply(lambda x: datetime.strptime(x, '%d/%m/%Y %H:%M:%S'))
df['date'] = df.bici_Fecha_hora_retiro.apply(lambda x: x.replace(minute=0, second=0))

In [None]:
#limpieza de datos

fecha_limite = pd.to_datetime('2016-08-01 00:00:00')
df_shorten = df[df['date'] >= fecha_limite].copy()
bicis_por_dia = df_shorten.groupby('date').bici_id_usuario.count().resample('D').sum()

In [None]:
# Obtengo minutos de alquiler de bici

parse_duration = lambda duration_str: timedelta(
    hours=int(duration_str.split(':')[0]),
    minutes=int(duration_str.split(':')[1]) if len(duration_str.split(':')) >=2 else 0,
    seconds=int(duration_str.split(':')[2]) if len(duration_str.split(':')) >=3 else 0
)

df_shorten['bici_tiempo_uso_delta'] = df_shorten['bici_tiempo_uso'].apply(lambda x: parse_duration(x))
df_shorten['total_minutes'] = df_shorten['bici_tiempo_uso_delta'].dt.total_seconds() / 60

In [None]:
df_trimmed = df_shorten[df_shorten['date'] <= pd.to_datetime('2017-12-01 23:59:59')].copy()
bicis_por_dia_trimmed = df_trimmed.groupby('date').bici_id_usuario.count().resample('D').sum()

### LSTM 

In [None]:
X = bicis_por_dia_trimmed
#size = int(len(X) * 0.66)
#train, test = X[0:size], X[size:len(X)]
X = pd.DataFrame(X)
X = X.reset_index()
X

In [None]:
timeseries = X['bici_id_usuario'].values.astype('float32')
plt.plot(timeseries)
plt.xticks(rotation=45)
plt.show()
timeseries = timeseries.reshape(-1, 1)


In [None]:
# train-test split for time series
train_size = int(len(timeseries) * 0.67)
test_size = len(timeseries) - train_size
train, test = timeseries[:train_size], timeseries[train_size:]

In [None]:
import torch

def create_dataset(dataset, lookback):
    """Transform a time series into a prediction dataset
    
    Args:
        dataset: A numpy array of time series, first dimension is the time steps
        lookback: Size of window for prediction
    """
    X, y = [], []
    for i in range(len(dataset)-lookback):
        feature = dataset[i:i+lookback]  # Extract the feature window
        target = dataset[i+1:i+lookback+1]  # Extract the target window (shifted by 1 step)
        X.append(feature)
        y.append(target)
    # Convert lists to PyTorch tensors
    X = torch.tensor(X, dtype=torch.float32)
    y = torch.tensor(y, dtype=torch.float32)
    # Reshape y to add an extra dimension
    #y = y.unsqueeze(-1)
    #X = X.unsqueeze(-1)
    return X, y


In [None]:
train.shape

In [None]:
#window

lookback=14
X_train, y_train = create_dataset(train, lookback=lookback)
X_test, y_test = create_dataset(test, lookback=lookback)

print(X_train.shape,  y_train.shape)
print(X_test.shape, y_test.shape)

In [None]:
import torch.nn as nn

class BikePredModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(input_size=1, hidden_size=50, num_layers=1, batch_first=True)
        self.linear = nn.Linear(50,1)

    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.linear(x)
        return x

In [None]:
import torch.optim as optim
import torch.utils.data as data

model = BikePredModel()
optimizer = optim.Adam(model.parameters(), lr=0.5)
loss_fn = nn.MSELoss()
#loss_fn = nn.HuberLoss()
loader = data.DataLoader(data.TensorDataset(X_train, y_train), shuffle=True, batch_size=64, drop_last=True)

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler_x = MinMaxScaler()
X_train = torch.tensor(scaler_x.fit_transform(X_train.squeeze().numpy())[:,:,None])
X_test = torch.tensor(scaler_x.transform(X_test.squeeze().numpy())[:,:,None])

# crear otro scaler
scaler_y = MinMaxScaler()
y_train = torch.tensor(scaler_y.fit_transform(y_train.squeeze().numpy())[:,:,None])
y_test = torch.tensor(scaler_y.transform(y_test.squeeze().numpy())[:,:,None])


#X_test = torch.tensor(scaler.transform(X_test.squeeze().numpy())[:,:,None])

In [None]:

# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

train_loss = []
test_loss = []
train_rmse = []
test_rmse = []
gradient_norm = []

n_epochs = 3000
for epoch in range(n_epochs):
    model.train()
    for X_batch, y_batch in loader:
        y_pred = model(X_batch)
        loss = loss_fn(y_pred, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    if epoch % 100 != 0:
        continue
    
    model.eval()
    with torch.no_grad():
        y_pred_train = model(X_train)
        train_loss.append(loss_fn(y_pred_train, y_train).item())
        train_rmse.append(np.sqrt(loss_fn(y_pred_train, y_train).item()))

        ## pruebo haciendo inverse transform
        
        
        y_pred_test = model(X_test)
        y_pred_test_inv = torch.tensor(scaler_y.inverse_transform(y_pred_test.squeeze().numpy())[:,:,None])
        test_loss.append(loss_fn(y_pred_test_inv, y_test).item())
        test_rmse.append(np.sqrt(loss_fn(y_pred_test_inv, y_test).item()))
    
    print("Epoch %d: train RMSE %.4f, test RMSE %.4f" % (epoch, train_rmse[-1], test_rmse[-1]))
    print("Epoch %d: train loss %.4f, test loss %.4f" % (epoch, train_loss[-1], test_loss[-1]))


### Plot loss, rmse and gradient norms

In [None]:
epochs = range(0, n_epochs, 100)

plt.plot(epochs, train_loss, label='Train Loss')
plt.plot(epochs, test_loss, label='Test Loss')

plt.title('Training and Test Loss over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
with torch.no_grad():
    # shift train predictions for plotting
    train_plot = np.ones_like(timeseries)  * np.nan
    y_pred = model(X_train)
    y_pred = y_pred[:, -1, :]
    train_plot[lookback:train_size] = model(X_train)[:, -1, :]
  
    # shift test predictions for plotting
    test_plot = np.ones_like(timeseries) * np.nan
    test_plot[train_size+lookback:len(timeseries)] = model(X_test)[:, -1, :]
# plot
plt.plot(timeseries, c='b')
plt.plot(train_plot, c='r')
plt.plot(test_plot, c='g')
plt.show()