# 0. Imports

In [None]:
#!/usr/bin/env python
import gc
import os
from datetime import datetime

import pandas as pd
import torch
from sklearn.model_selection import train_test_split

from src.model import RNNBinaryClassifier

from src.data import preprocess_data, embed_data

## 1. Data Preprocessing

In [None]:
df_train, df_eval = preprocess_data()
df_train, df_eval = embed_data(df_train, df_eval)

X = df_train.drop(columns=['EventType', 'MatchID', 'PeriodID', 'ID']).values
y = df_train['EventType'].values

X_id = df_eval['ID'].values
X_eval = df_eval.drop(columns=['MatchID', 'PeriodID', 'ID']).values

X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.3)

## 2. Model Training

In [None]:
# For Kaggle submission save
def save_predictions_model(clf, params, accuracy):
    clf.fit(X, y)
    preds = clf.predict(X_eval)
    pred_df = pd.DataFrame({'ID': X_id, 'EventType': preds})
    # <clf name>/<params>/<filename>_predictions.csv
    params = params.replace(' ', '')
    today = datetime.today().strftime('%Y%m%d')
    file_folder = f"pred-{today}/{clf.__class__.__name__}-{accuracy}/{params}/"
    os.makedirs(file_folder, exist_ok=True)
    file_path = os.path.join(file_folder, f"{clf.__class__.__name__}_predictions.csv")
    pred_df.to_csv(file_path, index=False)
    print(f"Predictions saved to {file_path}")
    return file_folder


## RNN

In [None]:
gc.collect()
torch.cuda.empty_cache()

# Hyperparameters
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
input_size = X.shape[1]
hidden_size = 128
num_layers = 4
output_size = 1
learning_rate = 1e-4
batch_size = 64
num_epochs = 1000
stop_threshold = 1e-6

# Convert data to Tensor
X_train_tensor = torch.tensor(X_train, dtype=torch.float32, device=device)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32, device=device)

X_valid_tensor = torch.tensor(X_valid, dtype=torch.float32, device=device)
y_valid_tensor = torch.tensor(y_valid, dtype=torch.float32, device=device)

# Initialize the model
rnn = RNNBinaryClassifier(input_size, hidden_size, num_layers, output_size)

# Train the model
rnn.fit(X_train_tensor, y_train_tensor, num_epochs=num_epochs, batch_size=batch_size, stop_threshold=stop_threshold)

In [None]:
# Evaluate the model
acc = rnn.evaluate(X_valid_tensor, y_valid_tensor)
print(f"Validation Accuracy: {acc}")

In [None]:
# Save the model
checkpoint_filename = f"rnn_checkpoint_{datetime.today().strftime('%Y%m%d')}.pt"
torch.save(rnn.state_dict(), checkpoint_filename)
print(f"Model saved to {checkpoint_filename}")

# Save predictions
save_predictions_model(rnn,
                       f"hidden_size={hidden_size}, num_layers={num_layers}, lr={learning_rate}, epochs={num_epochs}",
                       accuracy=acc)