#### **Part 0: Imports**

In [15]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader

#### **Part 1: Load CSVs**

In [16]:
train_df = pd.read_csv("./data/train.csv")
test_df = pd.read_csv("./data/test.csv")

X_train = train_df.drop(columns=["Y1", "Y2"]).values
y_train = train_df[["Y1", "Y2"]].values

X_test = test_df.drop(columns=["id"]).values
test_ids = test_df["id"].values

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [17]:
def r2_score(y_true, y_pred):
    ss_res = np.sum((y_true, y_pred) ** 2, axis=0)
    ss_tot = np.sum((y_true - np.mean(y_true, axis=0)) ** 2, axis=0)
    r2 = 1 - ss_res / ss_tot
    return r2

#### **Part 2: Dataset Classes**

In [18]:
class DatasetXY(Dataset):
    def __init__(self, X, y = None):
        self.X = torch.tensor(X, dtype=torch.float32)
        if y is not None:
            self.y = torch.tensor(y, dtype=torch.float32)
        else:
            self.y = None
            
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        if self.y is not None:
            return self.X[idx], self.y[idx]
        else:
            return self.X[idx]

In [19]:
train_dataset = DatasetXY(X_train, y_train)
test_dataset = DatasetXY(X_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

#### **Part 3: Define RNN Model**

In [20]:
class RNNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers=1):
        super(RNNModel, self).__init__()
        self.rnn = nn.RNN(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        x = x.unsqueeze(1)
        out, _ = self.rnn(x)
        out = out[:, -1, :]
        out = self.fc(out)
        return out

#### **Part 4: Train the model**

In [21]:
input_dim = X_train.shape[1]
hidden_dim = 32
output_dim = 2

model = RNNModel(input_dim, hidden_dim, output_dim)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

epochs = 20
for epoch in range(epochs):
    model.train()
    train_loss = 0
    for xb, yb in train_loader:
        optimizer.zero_grad()
        pred = model(xb)
        loss = criterion(pred, yb)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        
    print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss/len(train_loader):.4f}")

Epoch 1/20, Train Loss: 0.3472
Epoch 2/20, Train Loss: 0.2758
Epoch 3/20, Train Loss: 0.2578
Epoch 4/20, Train Loss: 0.2496
Epoch 5/20, Train Loss: 0.2449
Epoch 6/20, Train Loss: 0.2406
Epoch 7/20, Train Loss: 0.2374
Epoch 8/20, Train Loss: 0.2371
Epoch 9/20, Train Loss: 0.2343
Epoch 10/20, Train Loss: 0.2325
Epoch 11/20, Train Loss: 0.2320
Epoch 12/20, Train Loss: 0.2298
Epoch 13/20, Train Loss: 0.2277
Epoch 14/20, Train Loss: 0.2266
Epoch 15/20, Train Loss: 0.2273
Epoch 16/20, Train Loss: 0.2266
Epoch 17/20, Train Loss: 0.2248
Epoch 18/20, Train Loss: 0.2255
Epoch 19/20, Train Loss: 0.2239
Epoch 20/20, Train Loss: 0.2228


#### **Part 5: Make predictions on test set**

In [22]:
model.eval()
predictions = []
with torch.no_grad():
    for xb in test_loader:
        pred = model(xb)
        predictions.append(pred.numpy())

predictions = np.vstack(predictions)

#### **Part 6: Create submission**

In [23]:
submission = pd.DataFrame({
    "id": test_ids,
    "Y1": predictions[:, 0],
    "Y2": predictions[:, 1]
})
submission.to_csv("submission.csv", index=False)