In [None]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

import numpy as np
import pandas as pd
import seaborn as sns
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

print("All packages imported successfully!")

In [None]:
# Check CPU device
device = torch.device("cpu")

# 1 - Data preprocessing

In [None]:
# Dataset 
xy = pd.read_csv("_data/bottle.csv")

xy.head()

In [None]:
# retrieve salinity and temperature columns
x_raw = xy.iloc[:,5] # salinity 
y_raw = xy.iloc[:,6] # temperature

In [None]:
# verify if there is any data missing
print(f'Salinity missing data summation (%): {x_raw.isna().sum()/len(x_raw)*100:.3f} %')
print(f'Temperature missing data summation (%): {y_raw.isna().sum()/len(y_raw)*100:.3f} %')

In [None]:
# TODO: fill missing data with ffill

# YOUR CODE HERE

In [None]:
# verify if data was filled
print(f'Salinity missing data summation: {x_raw.isna().sum()}')
print(f'Temperature missing data summation: {y_raw.isna().sum()}')

In [None]:
# reshape data
x = np.array(x_raw).reshape(-1,1) # Temperature
y = np.array(y_raw).reshape(-1,1) # Salinity

In [None]:
# TODO: Split into train+validation and test sets using sklearn library

# YOUR CODE HERE

In [None]:
# TODO: Normalize the input (x in [0,1])

# YOUR CODE HERE

# 2 - Define Model

In [None]:
# define hyperparameters (change them to see their influence in training)
num_epochs = 20
batch_size = 100
learning_rate = 0.001

# DON'T CHANGE THIS
input_size = 1 # Number of features
output_size = 1

In [None]:
# TODO: Create dataloaders to Pytorch models
class Salinity(Dataset):
    ''' Receive a dataset and preprocess to transform into data loaders
    
        Input:
            x_data (float numpy array): input values
            y_data (float numpy array): expected output value
    '''
    def __init__(self, x_data, y_data):
        # TODO: complete the generator
        
        # YOUR CODE HERE (delete pass after filling with your code)
        
        pass
    
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return self.n_samples

train_dataset = Salinity(torch.from_numpy(x_train.astype(np.float32)), torch.from_numpy(y_train.astype(np.float32)))
val_dataset = Salinity(torch.from_numpy(x_val.astype(np.float32)), torch.from_numpy(y_val.astype(np.float32)))
test_dataset = Salinity(torch.from_numpy(x_test.astype(np.float32)), torch.from_numpy(y_test.astype(np.float32)))

# TODO: complete the dataloader (see https://pytorch.org/docs/stable/data.html)
train_loader = None # YOUR CODE HERE
val_loader = None # YOUR CODE HERE
test_loader = None # YOUR CODE HERE

In [None]:
# TODO: build the neural network architecture for linear regression
class NeuralNet(nn.Module):
    def __init__(self, input_size, output_size):
        super(NeuralNet, self).__init__()
        # TODO: complete with the Neural Network architecture (set up the linear layer)
        
        # YOUR CODE HERE (delete pass after filling with your code)
        
        pass
    
    def forward(self, x):
        # TODO: complete the forward method
        
        # YOUR CODE HERE (delete pass after filling with your code)
        
        pass

In [None]:
# TODO: implement the method
model = None # YOUR CODE HERE
model.to(device)

In [None]:
# Loss function and optimizer

# TODO: complete with the loss function (hint: use Mean Square Error - MSE) 
criterion = None # YOUR CODE HERE

# TODO: complete with the optimizer (hint: use Stochastic Gradient Descent -SGD)
optimizer = None # YOUR CODE HERE

print(model)

# 3 - Train model

In [None]:
# iterations
for e in tqdm(range(1, num_epochs+1)):
    for X_train_batch, y_train_batch in train_loader:
        # alloacate to GPU or CPU
        X_train_batch, y_train_batch = X_train_batch.to(device), y_train_batch.to(device)
        
        # clean the gradients
        optimizer.zero_grad() 
        
        # TODO: coomplete the forward pass
        y_train_pred = None # YOUR CODE HERE      
        train_loss = None # YOUR CODE HERE
        
        # TODO: complete the backward pass
        # YOUR CODE HERE
        
        # TODO: complete the weight update
        # YOUR CODE HERE

        # clean the gradients
        optimizer.zero_grad()

    print(f'Epoch: {e}, Loss: {train_loss:.4f}')

# 4 - Inference

In [None]:
# inference 

# transform into tensor
x = torch.from_numpy(x_test.astype(np.float32))

# prediction
predicted = model(x).detach().numpy()

In [None]:
from sklearn import linear_model

# sklearn's Least Square Method
regr = linear_model.LinearRegression()
regr.fit(x_train, y_train)
y_pred = regr.predict(x_test)

# Plot outputs
plt.figure(figsize=(10, 10))
plt.plot(x_test, y_pred , label='Least Square') # Least Square Method approach
plt.plot(x_test, predicted, label='Neural Network') # Neural Network approach
plt.xlabel('Salinity')
plt.ylabel('Temperature')
plt.grid(True)
plt.legend()
plt.show()