#Deep Learning Architectures Assignment 2
The goal is to design and train a neural network for a regression task.
Detailed requirements were presented during the last lecture.

#Summary
#Dataset
* The dataset contains 2210 examples.
* Each example consists of 2500 time steps.
* In each time step, 8 values are recorded.
* The result consists of two real numbers representing the coordinates.

##TODO:
* Split the dataset into three sets: test, validation, and training.
* Choose an architecture and design the model.
* Design the training procedure.
* Present the results. \\
// A correctly trained model should make predictions with an average error of < 2.

##What will be evaluated:
* Understanding of the topic, exploration of the dataset, justification of architecture choice [5 points]
* Correctness of the training implementation [5 points]
* Error obtained on test and validation data [5 points]
* Presentation of the achieved results [5 points]


##Extended task for extra points:
* Design a model for noisy data.
* To add noise, use the addNoise function.
* Start tests with low noise: 0.01 or 0.001.

In [None]:
from urllib.request import urlopen
import numpy as np
import pickle
import os

def download_part(filename):
  base_url = f"https://github.com/pa-k/AGU/blob/main/assignment2/{filename}?raw=true"
  url = urlopen(base_url)
  binary_data = url.read()
  with open(filename,"wb") as f:
    f.write(binary_data)

def loadDataset():
    parts = ["DLAA2.0.pkl", "DLAA2.1.pkl", "DLAA2.2.pkl", "DLAA2.3.pkl"]
    cData = b''
    for part in parts:
        if not os.path.exists(part):
          download_part(part)
        with open(part, "rb") as f:
            cData += pickle.load(f)
    return pickle.loads(cData)

def addNoise(input, noiseLevel=0.1):
  shape = input.shape
  noise = np.random.randn(*shape)*noiseLevel*np.max(input)
  return input+noise


##Load dataset

In [None]:
x, y = loadDataset()
print(x.shape)
print(y.shape)
print(y[1000])

# Your solution

In [None]:
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import torch.optim as optim
import torch.nn as nn
import numpy as np
import torch

In [None]:
total_samples = len(x)
train_ratio=0.7
val_ratio=0.15
test_ratio=0.15

train_size = int(total_samples * train_ratio)
val_size = int(total_samples * val_ratio)
test_size = total_samples - train_size - val_size
indices = np.random.permutation(total_samples)

train_indices = indices[:train_size]
val_indices = indices[train_size:train_size + val_size]
test_indices = indices[train_size + val_size:]

x_train, y_train = x[train_indices], y[train_indices]
x_val, y_val = x[val_indices], y[val_indices]
x_test, y_test = x[test_indices], y[test_indices]

scaler = StandardScaler()
x_train_normalized = scaler.fit_transform(x_train.reshape(-1, x_train.shape[-1])).reshape(x_train.shape)
x_val_normalized = scaler.transform(x_val.reshape(-1, x_val.shape[-1])).reshape(x_val.shape)
x_test_normalized = scaler.transform(x_test.reshape(-1, x_test.shape[-1])).reshape(x_test.shape)

y_scaler = StandardScaler()
y_train_normalized = y_scaler.fit_transform(y_train)
y_val_normalized = y_scaler.transform(y_val)
y_test_normalized = y_scaler.transform(y_test)


In [None]:
x_train_tensor = torch.tensor(x_train_normalized, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_normalized, dtype=torch.float32)
x_val_tensor = torch.tensor(x_val_normalized, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val_normalized, dtype=torch.float32)
x_test_tensor = torch.tensor(x_test_normalized, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_normalized, dtype=torch.float32)

train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
val_dataset = TensorDataset(x_val_tensor, y_val_tensor)
test_dataset = TensorDataset(x_test_tensor, y_test_tensor)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

In [None]:
class RegressionModel(nn.Module):
    def __init__(self):
        super(RegressionModel, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=8, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(in_channels=32, out_channels=128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(128 * 625, 128)
        self.fc2 = nn.Linear(128, 32)
        self.fc3 = nn.Linear(32, 2)
        self.relu = nn.ReLU6(inplace=True)
        self.dropout = nn.Dropout(0.3)
        self.layer_norm = nn.LayerNorm(128)

        for m in self.modules():
            if isinstance(m, nn.Conv1d) or isinstance(m, nn.Linear):
                nn.init.kaiming_uniform_(m.weight, nonlinearity='relu')

    def forward(self, x):
        x = x.transpose(1, 2)
        x = self.conv1(x)
        x = self.relu(x)
        x = self.pool(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.layer_norm(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = RegressionModel()
model.to(device)
criterion = nn.HuberLoss()
optimizer = optim.Adam(model.parameters())
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10)

In [None]:
num_epochs = 100
train_losses = []
val_losses = []

for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            val_loss += loss.item()

    train_losses.append(train_loss/len(train_loader))
    val_losses.append(val_loss/len(val_loader))
    # scheduler.step(val_loss)
    print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_losses[-1]}, Val Loss: {val_losses[-1]}')

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(range(1, num_epochs + 1), train_losses, label='Train Loss')
plt.plot(range(1, num_epochs + 1), val_losses, label='Validation Loss')
plt.title('Training and Validation Losses')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
test_loss = 0.0
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        test_loss += loss.item()

test_loss /= len(test_loader)
print(f'Test Loss: {test_loss:.4f}')

In [None]:
total_error_x = 0.0
total_error_y = 0.0
total_samples = 0

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        outputs = model(X_batch)

        error_x = torch.abs(outputs[:, 0] - y_batch[:, 0]).sum().item()
        error_y = torch.abs(outputs[:, 1] - y_batch[:, 1]).sum().item()

        total_error_x += error_x
        total_error_y += error_y
        total_samples += X_batch.size(0)

avg_error_x = total_error_x / total_samples
avg_error_y = total_error_y / total_samples

print(f'Avg error for X dimension: {avg_error_x:.2f}')
print(f'Avg error for Y dimension: {avg_error_y:.2f}')