# Deep Learning

## Examen Agosto 2022

## Ejercicio 2

Utilizando el mismo dataset que en el link anterior, diseñar una red neuronal que haga un overfitting sobre el dataset. ¿Cómo saben que la red neuronal está haciendo overfitting? Agregar regularización y encontrar los parámetros correctos para lograr un buen fit. Explicar el procedimiento realizado.

### Librerías y funciones globales

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import torch
from torch.utils.data import Dataset, DataLoader
from sklearn import metrics

In [2]:
df = pd.read_csv('data/dataset_1.csv')

In [3]:
x= df["# X"].to_numpy().reshape(-1, 1)
y= df["Y"].to_numpy().reshape(-1, 1)

In [5]:
n = x.shape[0]
idx = np.random.permutation(np.arange(0, n))
train_idx = idx[0:int(0.80 * n)]
test_idx = idx[int(0.80 * n):]

In [6]:
X_train = x[train_idx,:]
y_train = y[train_idx]
X_test = x[test_idx,:]
y_test = y[test_idx]

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(8000, 1)
(8000, 1)
(2000, 1)
(2000, 1)


### NNet

In [7]:
class MyDataset(Dataset):

    def __init__(self, X, y):
        super().__init__()
        self.X = X
        self.y = y

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [8]:
train = MyDataset(X_train, y_train)

In [9]:
test = MyDataset(X_test, y_test)

In [10]:
train_dataloader = DataLoader(train, batch_size=128, shuffle=True)
test_dataloader = DataLoader(test, batch_size=128, shuffle=True)

In [11]:
class NNet(torch.nn.Module):

    def __init__(self):
        super().__init__()
        self.linear_1 = torch.nn.Linear(in_features=1, out_features=1024, bias=True)
        self.relu_1 = torch.nn.ReLU()
        self.linear_2 = torch.nn.Linear(in_features=1024, out_features=2048, bias=True)
        self.relu_2 = torch.nn.ReLU()
        self.linear_3 = torch.nn.Linear(in_features=2048, out_features=2048, bias=True)
        self.relu_3 = torch.nn.ReLU()
        self.linear_4 = torch.nn.Linear(in_features=2048, out_features=1024, bias=True)
        self.relu_4 = torch.nn.ReLU()
        self.linear_5 = torch.nn.Linear(in_features=1024, out_features=512, bias=True)
        self.relu_5 = torch.nn.ReLU()
        self.output = torch.nn.Linear(in_features=512, out_features=1, bias=True)

    def forward(self, x):
        x = self.linear_1(x)
        x = self.relu_1(x)
        x = self.linear_2(x)
        x = self.relu_2(x)
        x = self.linear_3(x)
        x = self.relu_3(x)
        x = self.linear_4(x)
        x = self.relu_4(x)
        x = self.linear_5(x)
        x = self.relu_5(x)
        x = self.output(x)
        return x

In [12]:
nnet = NNet()

In [13]:
x_batch = next(iter(train_dataloader))[0]
print(type(x_batch))
x_batch = x_batch.float()
y_batch = nnet(x_batch)
print(x_batch.shape)
print(y_batch.shape)

<class 'torch.Tensor'>
torch.Size([128, 1])
torch.Size([128, 1])


In [14]:
optimizer = torch.optim.Adam(nnet.parameters(), lr=0.001)
loss_function = torch.nn.MSELoss()

In [15]:
device = ""
if torch.cuda.is_available():
    device = "cuda:0"
else:
    device = "cpu"

print(device)

cpu


In [None]:
nnet.to(device)

loss_list = []
for epoch in range(10):

    running_loss = 0
    running_y_score = []
    running_y_label = []
    batch_size = 0
    for i, data in enumerate(train_dataloader):

        # datos del batch
        X_batch, y_batch = data         
        X_batch = X_batch.to(device).float()
        Y_batch = y_batch.to(device).float()
        batch_size = X_batch.shape[0]
        
        # forward
        optimizer.zero_grad()
        y_batch_score = nnet(X_batch).reshape(-1)

        # backpropagation
        loss = loss_function(y_batch_score, Y_batch) # calcula el loss
        loss.backward() # hacer los gradientes del loss

        # actualización de parametros
        optimizer.step() 

        running_loss += loss.item()
        running_y_score += list(y_batch_score.detach().cpu().numpy())  
        running_y_label += list(Y_batch.detach().cpu().numpy()) 

    # metrics
    loss_list.append(running_loss)
    print(f"Epoch {epoch + 1} | Training loss: {running_loss / batch_size}")

In [16]:
def predict(nnet, X):
    nnet.eval()
    with torch.no_grad():
        X = X.to(device).float()
        pred = nnet.forward(X)
        return pred

In [None]:
X_tain_tensor = torch.from_numpy(X_train)

In [None]:
y_hat_train = predict(nnet, X_tain_tensor)

In [None]:
y_hat = y_hat_train.detach().numpy()

In [None]:
plot = sns.lineplot(x=np.squeeze(X_train), y=np.squeeze(y_train), label='real y')
plot = sns.lineplot(x=np.squeeze(X_train), y=np.squeeze(y_hat), label='predicted y')
plot.set(xlabel ='x', ylabel='y', title='TRAIN: y real vs y predicted')
plot.plot()