# Hyperparameter search for Movielens VAE

This is the ipynb I will use to actually look for the best configuration of the model and its hyperparameters


## Input info 

In [15]:
import pandas as pd
import numpy as np
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'

raw_data = pd.read_csv("./data/Movielens100/u.data", sep = None, names=["userId", "movieId", "rating", "timestamp"])
raw_data = raw_data.loc[:, raw_data.columns != "timestamp"]
#make indices start at 0
raw_data["userId"] -= 1
raw_data["movieId"] -= 1
#make ratings center around 0
raw_data["rating"] -= 3

# create (943, 1682) matrix of user ratings per movie
user_ratings = pd.DataFrame(np.zeros((943,1682)))
for i in raw_data.index:
    user_ratings[raw_data["movieId"][i]][raw_data["userId"][i]] = raw_data["rating"][i]
user_ratings = user_ratings.to_numpy()    
user_ratings

  raw_data = pd.read_csv("./data/Movielens100/u.data", sep = None, names=["userId", "movieId", "rating", "timestamp"])


array([[2., 0., 1., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [2., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 2., 0., ..., 0., 0., 0.]])

## Begin model and tuning of Hyperparameters

In [58]:
from torch.utils.data import Dataset, DataLoader
from torch import nn 
import torch
from sklearn.metrics import precision_recall_fscore_support
device = "cuda"

class MovielensDataset(Dataset):
    def __init__(self, data, mask_magnitude):
        self.data = data
        # create a mask of 0 and 1 values where half are 0 and half are 1. 
        #The ratio of masked values is something that can and should be optimized. 
        self.random_mask = np.clip((np.random.randn(1682) + mask_magnitude).round(), a_max = 1, a_min = 0)
        
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        np.random.shuffle(self.random_mask)
        return self.data[idx] * self.random_mask, self.data[idx]


class VariationalAutoEncoder(nn.Module):
    def __init__(self, dropout_rate):
        super().__init__()
        self.encoder = nn.Sequential(
            #Encoder
            nn.Linear(1682, 1024),
            nn.LeakyReLU(),
            nn.Linear(1024,1024),
            nn.LeakyReLU(),
            nn.Dropout(dropout_rate),
        )
        self.decoder = nn.Sequential(
            #Decoder
            nn.Linear(512,1024),
            nn.LeakyReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(1024,1682),
            nn.Tanh()
        )
        # distribution parameters
        self.fc_mu = nn.Linear(1024, 512)
        self.fc_var = nn.Linear(1024, 512)
        self.f1 = 0.0
        # for the gaussian likelihood
        self.log_scale = nn.Parameter(torch.Tensor([0.0]))
        
        
    def gaussian_likelihood(self, x_hat, logscale, x):
        scale = torch.exp(logscale)
        mean = x_hat
        dist = torch.distributions.Normal(mean, scale)

        # measure prob of seeing data under p(x|z)
        log_pxz = dist.log_prob(x)
        return log_pxz.sum(dim=1)
    
    def kl_divergence(self, z, mu, std):
        # --------------------------
        # Monte carlo KL divergence
        # --------------------------
        # 1. define the first two probabilities (in this case Normal for both)
        p = torch.distributions.Normal(torch.zeros_like(mu), torch.ones_like(std))
        q = torch.distributions.Normal(mu, std)

        # 2. get the probabilities from the equation
        log_qzx = q.log_prob(z)
        log_pz = p.log_prob(z)

        # kl
        kl = (log_qzx - log_pz)
        kl = kl.sum(-1)
        return kl
    
    def forward(self, x):
        # encode x to get the mu and variance parameters
        x_encoded = self.encoder(x)
        mu, log_var = self.fc_mu(x_encoded), self.fc_var(x_encoded)

        # sample z from q
        std = torch.exp(log_var / 2)
        
        #perform the kernel trick to allow for backprop through sampling
        
        epsilon = torch.distributions.Normal(0, 1).rsample()
        z = mu + epsilon * std
        # decoded
        ratings = self.decoder(z) * 2
        return ratings, z, mu, std
    
    def vae_loss(self, x_hat, x, z, mu, std):
        # reconstruction loss
        recon_loss = self.gaussian_likelihood(x_hat, self.log_scale, x)
    
        # kl
        kl = self.kl_divergence(z, mu, std)

        # elbo
        elbo = (kl - recon_loss)
        elbo = elbo.mean()

        return elbo
def get_precision_and_recall(labels, predictions):
    relevance_labels = labels > 0.5
    
    #To evaluate for comparison to other papers we mask by setting 20% of labels to false 
    
    pred_relevance = predictions > 0.5
    
    precision_recall_fscore_support(relevance_labels.flatten().cpu(), pred_relevance.flatten().cpu(), average="binary")

def train(dataloader, model,  optimizer, epoch):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X,y) in enumerate(dataloader):
        X, y = X.to(device).to(torch.float32), y.to(device).to(torch.float32)
        
        #compute prediction error
        pred, z, mu, std = model(X)
        loss = model.vae_loss(pred, y, z, mu, std)
        mse_loss = mse_loss_fcn(pred, y)
        
        #Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if epoch % 100 == 0 and batch % 64 == 0:
            loss , current = loss.item(), (batch+1) * len(X)
            print("Epoch : " + str(epoch))
            print(f"loss: {loss:>7f}")
            print(f"MSE loss: {mse_loss:>7f}")
            losses.append(loss)
            mse_losses.append(mse_loss.item())

def test(dataloader, model, epoch):
    size = len(dataloader.dataset)
    num_batches= len(dataloader)
    model.eval()
    test_loss, correct, test_mse_loss = 0,0,0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device).to(torch.float32), y.to(device).to(torch.float32)
            
            
            pred, z, mu, std = model(X)
            test_loss += model.vae_loss(pred, y, z, mu, std).item()
            test_mse_loss += mse_loss_fcn(pred, y)
    test_loss /= num_batches
    test_mse_loss /= num_batches
    if epoch % 100 ==0:
        test_losses.append(test_loss)
        #Come back and actually do the accuracy where it sees if it's at least on the right side of it or compute precision/recall or something like that
        print(f"Test Error: \n Avg Loss : {test_loss:>8f} ")
        print(f" MSE loss: {test_mse_loss:>7f}")
        test_mse_losses.append(test_mse_loss.item())
        
        #compute precision and recall
        relevance_labels = y > 0
        pred_relevance = pred > 0.5
        model.f1 = precision_recall_fscore_support(relevance_labels.flatten().cpu(), pred_relevance.flatten().cpu(), average="binary")
        
        
        

## The actual search 
The following contains all of the logic you need to do to restart everything over so long as you've already loaded the ones above

In [None]:
epochs = 2000
highest_f1_score = [0,0,0,0]
mask_magnitude = 1.3
#Search 

for l2_weight in [0., 0.1, 0.2, 0.3]:
    for dropout_rate in [0., 0.1, 0.2, 0.3]:
        print("using hyperparameters : " + str( l2_weight) + " , " + str(dropout_rate))
        train_dataset = MovielensDataset(user_ratings[:843], mask_magnitude)
        test_dataset = MovielensDataset(user_ratings[843:], mask_magnitude)
        train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
        test_dataloader = DataLoader(test_dataset, batch_size=100, shuffle=True)


        losses = []
        mse_losses = []    
        test_losses = []
        test_mse_losses = []

        model = VariationalAutoEncoder(dropout_rate = dropout_rate).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr = 1e-4, weight_decay = l2_weight)
        mse_loss_fcn = nn.MSELoss()

        for t in range(epochs):
            train(train_dataloader, model,  optimizer, t)
            test(test_dataloader, model, t )
            if t % 100 == 0:
                print(" F1 Score: " + str(model.f1) + "\n")
                if model.f1[2] > highest_f1_score[2]:
                    highest_f1_score = model.f1
                    torch.save(model.state_dict(), "./models/best.pth")
                    print("New best using hyper parameters : " + str(mask_magnitude) + " ," + str(l2_weight) + " ," + str(dropout_rate)  )

Epoch : 0
loss: 1794.213989
MSE loss: 0.295352
Test Error: 
 Avg Loss : 1635.590332 
 MSE loss: 0.099956
 F1 Score: (0.0, 0.0, 0.0, None)



  _warn_prf(average, modifier, msg_start, len(result))


Epoch : 100
loss: 1404.353027
MSE loss: 0.077986
Test Error: 
 Avg Loss : 1424.222412 
 MSE loss: 0.080196
 F1 Score: (0.5707348656523147, 0.2857837574971632, 0.3808597969323828, None)

New best using hyper parameters : 1.3 ,0.0 ,0.0
Epoch : 200
loss: 1164.096680
MSE loss: 0.058970
Test Error: 
 Avg Loss : 1188.011353 
 MSE loss: 0.074344
 F1 Score: (0.627349319507453, 0.31382720051872265, 0.4183684494867639, None)

New best using hyper parameters : 1.3 ,0.0 ,0.0
Epoch : 300
loss: 904.481812
MSE loss: 0.045473
Test Error: 
 Avg Loss : 996.119995 
 MSE loss: 0.072100
 F1 Score: (0.6593198992443325, 0.3394391311395688, 0.4481540930979133, None)

New best using hyper parameters : 1.3 ,0.0 ,0.0
Epoch : 400
loss: 712.950134
MSE loss: 0.034307
Test Error: 
 Avg Loss : 800.585144 
 MSE loss: 0.070057
 F1 Score: (0.6704275534441805, 0.3660236667207003, 0.4735241690258991, None)

New best using hyper parameters : 1.3 ,0.0 ,0.0
Epoch : 500
loss: 404.868042
MSE loss: 0.033360
Test Error: 
 Avg Lo

Epoch : 400
loss: 747.882324
MSE loss: 0.046658
Test Error: 
 Avg Loss : 804.863281 
 MSE loss: 0.074972
 F1 Score: (0.6963053187170117, 0.27800291781488085, 0.3973586654309546, None)

Epoch : 500
loss: 546.911987
MSE loss: 0.040373
Test Error: 
 Avg Loss : 650.266052 
 MSE loss: 0.077613
 F1 Score: (0.6335078534031413, 0.2942130004863025, 0.4018153641797653, None)

Epoch : 600
loss: 312.975891
MSE loss: 0.033687
Test Error: 
 Avg Loss : 535.353516 
 MSE loss: 0.086998
 F1 Score: (0.7104984093319194, 0.10860755389852489, 0.18841394825646796, None)

Epoch : 700
loss: 83.139427
MSE loss: 0.028836
Test Error: 
 Avg Loss : 355.680847 
 MSE loss: 0.073949
 F1 Score: (0.7622133599202393, 0.24785216404603663, 0.3740672782874618, None)

Epoch : 800
loss: -87.972610
MSE loss: 0.026279
Test Error: 
 Avg Loss : 282.899384 
 MSE loss: 0.074340
 F1 Score: (0.7610574478901881, 0.2426649375911817, 0.3679941002949852, None)

Epoch : 900
loss: -223.077591
MSE loss: 0.024642
Test Error: 
 Avg Loss : 246

Epoch : 900
loss: -188.440460
MSE loss: 0.030721
Test Error: 
 Avg Loss : 235.290421 
 MSE loss: 0.075802
 F1 Score: (0.5787420770355924, 0.38482736261954936, 0.462272417486126, None)

Epoch : 1000
loss: -397.793152
MSE loss: 0.025652
Test Error: 
 Avg Loss : 282.985535 
 MSE loss: 0.076792
 F1 Score: (0.5669694852084789, 0.3945534122224023, 0.46530300133817626, None)

Epoch : 1100
loss: -531.477783
MSE loss: 0.020243
Test Error: 
 Avg Loss : 317.364838 
 MSE loss: 0.078167
 F1 Score: (0.5661040787623066, 0.3914734965148322, 0.46286535697172976, None)

Epoch : 1200
loss: -749.105896
MSE loss: 0.017869
Test Error: 
 Avg Loss : 516.059448 
 MSE loss: 0.077568
 F1 Score: (0.5580541532813217, 0.3942292105689739, 0.46204996675216103, None)

Epoch : 1300
loss: -839.298706
MSE loss: 0.018540
Test Error: 
 Avg Loss : 833.666626 
 MSE loss: 0.078491
 F1 Score: (0.5733366312144447, 0.37574971632355325, 0.45397571484528004, None)

Epoch : 1400
loss: -832.228638
MSE loss: 0.018379
Test Error: 
 Av

  _warn_prf(average, modifier, msg_start, len(result))


Epoch : 100
loss: 1397.565918
MSE loss: 0.066814
Test Error: 
 Avg Loss : 1393.563721 
 MSE loss: 0.080472
 F1 Score: (0.5679666792881485, 0.24315124007132435, 0.340522133938706, None)

Epoch : 200
loss: 1194.846191
MSE loss: 0.082975
Test Error: 
 Avg Loss : 1190.466797 
 MSE loss: 0.078478
 F1 Score: (0.5654685494223364, 0.28562165667044903, 0.379536887452881, None)

Epoch : 300
loss: 1013.297485
MSE loss: 0.069729
Test Error: 
 Avg Loss : 1007.967102 
 MSE loss: 0.077270
 F1 Score: (0.6115702479338843, 0.26390014589074406, 0.3687011663458272, None)

Epoch : 400
loss: 796.904236
MSE loss: 0.060836
Test Error: 
 Avg Loss : 798.995361 
 MSE loss: 0.076278
 F1 Score: (0.601219120949631, 0.30377694926244125, 0.4036183502046091, None)

Epoch : 500
loss: 625.374512
MSE loss: 0.065109
Test Error: 
 Avg Loss : 645.813171 
 MSE loss: 0.075034
 F1 Score: (0.5935574229691877, 0.3434916518074242, 0.43515761371804085, None)

Epoch : 600
loss: 365.662598
MSE loss: 0.053543
Test Error: 
 Avg Loss :

Epoch : 600
loss: 438.323425
MSE loss: 0.061390
Test Error: 
 Avg Loss : 490.466003 
 MSE loss: 0.074814
 F1 Score: (0.6104972375690608, 0.3224185443345761, 0.4219794208125597, None)

Epoch : 700
loss: 307.642456
MSE loss: 0.062523
Test Error: 
 Avg Loss : 363.479645 
 MSE loss: 0.074454
 F1 Score: (0.6158998252766453, 0.34284324850056735, 0.44048734770384257, None)

Epoch : 800
loss: 162.183380
MSE loss: 0.060154
Test Error: 
 Avg Loss : 289.770782 
 MSE loss: 0.074419
 F1 Score: (0.6256814052089643, 0.3349003079915708, 0.4362791679864851, None)

Epoch : 900
loss: 6.335465
MSE loss: 0.050483
Test Error: 
 Avg Loss : 222.893158 
 MSE loss: 0.074051
 F1 Score: (0.6298780487804878, 0.3349003079915708, 0.4372949518467563, None)

Epoch : 1000
loss: -169.191193
MSE loss: 0.041891
Test Error: 
 Avg Loss : 200.170700 
 MSE loss: 0.073928
 F1 Score: (0.6424546023794615, 0.3326308964175717, 0.43832105094520984, None)

Epoch : 1100
loss: -227.051941
MSE loss: 0.039853
Test Error: 
 Avg Loss : 24