In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error

df = pd.read_csv('/content/drive/MyDrive/Team7/ForwardKeys_data.csv')

# date to datetime
df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')
df['DayOfWeek'] = df['Date'].dt.dayofweek

# one hot encodings for day
dayofweek_onehot = pd.get_dummies(df['DayOfWeek'], prefix='Day')
df = df.join(dayofweek_onehot)

# all location
locations = ['Visitors in Blue Lagoon', 'Visitors in Machu Picchu', 'Visitors in Taj Mahal', 'Visitors in Doge\'s Palace', 'Visitors in Louvre Museum']

# hold seq data
sequence_data_all = {}

# normalize
scaler = MinMaxScaler(feature_range=(0, 1))
for location in locations:
    for time_slot in df['Time'].unique():

        # filter data for current loc and timeslot
        df_filtered = df[df['Time'] == time_slot][['Date', location] + dayofweek_onehot.columns.tolist()].copy()
        df_filtered[location] = scaler.fit_transform(df_filtered[[location]])

        # make data
        sequence_data = df_filtered[[location] + dayofweek_onehot.columns.tolist()].astype(np.float32)

        # create sequences
        def create_sequences(data, seq_length):
            xs, ys = [], []
            for i in range(len(data) - seq_length):
                x = data.iloc[i:(i + seq_length)].values
                y = data.iloc[i + seq_length, 0]
                xs.append(x)
                ys.append(y)
            return np.array(xs), np.array(ys)

        seq_length = 50
        X, y = create_sequences(sequence_data, seq_length)

        # convert PyTorch tensors
        X_tensor = torch.from_numpy(X)
        y_tensor = torch.from_numpy(y).view(-1, 1)
        sequence_data_all[(location, time_slot)] = (X_tensor, y_tensor)


In [None]:

# LSTM model class
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out, (hn, cn) = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

# train and eva
def train_evaluate_model(X_train, X_test, y_train, y_test, time_slot):
    lstm_model = LSTMModel(input_dim=X_train.shape[-1], hidden_dim=64, num_layers=3, output_dim=1)
    lstm_criterion = nn.MSELoss()
    lstm_optimizer = torch.optim.Adam(lstm_model.parameters(), lr=0.001)

    lstm_num_epochs = 170
    for epoch in range(lstm_num_epochs):
        lstm_model.train()
        lstm_optimizer.zero_grad()
        lstm_output = lstm_model(X_train)
        lstm_loss = lstm_criterion(lstm_output, y_train)
        lstm_loss.backward()
        lstm_optimizer.step()
        if epoch % 10 == 0:
            print(f'LSTM - Epoch {epoch}, Loss: {lstm_loss.item()} at {time_slot}')

    lstm_model.eval()
    with torch.no_grad():
        lstm_predictions = lstm_model(X_test)
        lstm_r_squared = r2_score(y_test.numpy(), lstm_predictions.numpy())
        lstm_rmse = np.sqrt(mean_squared_error(y_test.numpy(), lstm_predictions.numpy()))

        print(f'Time Slot: {time_slot}, LSTM R-squared: {lstm_r_squared}, RMSE: {lstm_rmse}')

location = 'Visitors in Taj Mahal'
time_slots = ['8:00', '10:00', '12:00', '14:00', '16:00', '18:00']
for time_slot in time_slots:
    X_train, X_test, y_train, y_test = train_test_split(*sequence_data_all[(location, time_slot)], test_size=0.2, random_state=42)
    train_evaluate_model(X_train, X_test, y_train, y_test, time_slot)


LSTM - Epoch 0, Loss: 0.19154991209506989 at 8:00
LSTM - Epoch 10, Loss: 0.11313506960868835 at 8:00
LSTM - Epoch 20, Loss: 0.10728836059570312 at 8:00
LSTM - Epoch 30, Loss: 0.10543463379144669 at 8:00
LSTM - Epoch 40, Loss: 0.1041603833436966 at 8:00
LSTM - Epoch 50, Loss: 0.10086940973997116 at 8:00
LSTM - Epoch 60, Loss: 0.0906166285276413 at 8:00
LSTM - Epoch 70, Loss: 0.049151461571455 at 8:00
LSTM - Epoch 80, Loss: 0.03175458684563637 at 8:00
LSTM - Epoch 90, Loss: 0.02410230040550232 at 8:00
LSTM - Epoch 100, Loss: 0.01693207398056984 at 8:00
LSTM - Epoch 110, Loss: 0.013494571670889854 at 8:00
LSTM - Epoch 120, Loss: 0.012184220366179943 at 8:00
LSTM - Epoch 130, Loss: 0.011714310385286808 at 8:00
LSTM - Epoch 140, Loss: 0.011515283025801182 at 8:00
LSTM - Epoch 150, Loss: 0.01135043241083622 at 8:00
LSTM - Epoch 160, Loss: 0.011230788193643093 at 8:00
Time Slot: 8:00, LSTM R-squared: 0.8815473650209716, RMSE: 0.10950367152690887
LSTM - Epoch 0, Loss: 0.29247206449508667 at 10