# A deeper dive into loading data

In [None]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

import torchmetrics

## Using the TensorDataset class

In [None]:
np_features = np.array(np.random.rand(12, 8))
np_target = np.array(np.random.rand(12, 1))

torch_features = torch.tensor(np_features)
torch_target = torch.tensor(np_target)

dataset = TensorDataset(torch_features, torch_target)

print(dataset[-1])

## From data loading to running a forward pass

In [None]:
dataframe = pd.read_csv("./data/animals.csv")

features_array = dataframe[['ph', 'Sulfate', 'Conductivity', 'Organic_carbon']]
target_array = dataframe['Potability']

features = torch.tensor(np.array(features_array)).float()
target = torch.tensor(np.array(target_array)).float()

dataset = TensorDataset(features, target)

dataloader = DataLoader(dataset, shuffle=True, batch_size=2)
x, y = next(iter(dataloader))

model = nn.Sequential(nn.Linear(4, 2), nn.Linear(2, 1))
output = model(features)
print(output)

# Evaluating model performance

## Writing the evaluation loop

In [None]:
# Set the model to evaluation mode
model.eval()
validation_loss = 0.0

with torch.no_grad():
  
  for data in validationloader:
    
      outputs = model(data[0])
      loss = criterion(outputs, data[1])
      
      # Sum the current loss to the validation_loss variable
      validation_loss += loss.item()
      
# Calculate the mean loss value
validation_loss_epoch = validation_loss / len(validationloader)
print(validation_loss_epoch)

# Set the model back to training mode
model.train()

## Calculating accuracy using torchmetrics

In [None]:
# Create accuracy metric using torch metrics
metric = torchmetrics.Accuracy(task="multiclass", num_classes=3)
for data in dataloader:
    features, labels = data
    outputs = model(features)
    
    # Calculate accuracy over the batch
    acc = metric(outputs.softmax(dim=-1), labels.argmax(dim=-1))
    
# Calculate accuracy over the whole epoch
acc = metric.compute()

# Reset the metric for the next epoch 
metric.reset()
plot_errors(model, dataloader)

# Fighting overfitting

## Experimenting with dropout

In [None]:
model = nn.Sequential(
  nn.Linear(3072, 16),
  nn.ReLU(),
  nn.Dropout(p=0.8))


input_tensor = torch.randn((1, 3072))
model(input_tensor)

# Improving model performance

## Implementing random search

In [None]:
values = []
for idx in range(10):
    factor = np.random.uniform(2, 4)
    lr = 10 ** -factor
    
    momentum = np.random.uniform(0.85, 0.99)
    
    values.append((lr, momentum))

print(values)