>> **Machine Learning Academy : Seven Step Template **






<center><img src="https://www.dropbox.com/s/r86beo9nt8xrgh9/MLA%20Seven%20Step%20Process.png?raw=1" height=300px width=1000px></img></center>
---



# Step - 1 : Frame The Problem

# Step - 2 : Obtain the Data

## Import Libraries

In [0]:
#Installing modules we need. And doing it only once.
import pkgutil; 
if not pkgutil.find_loader("torch"):
  !pip install torch -q

In [0]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

In [0]:
torch.cuda.is_available() # verifies we have GPU

### Getting Data

In [0]:
!wget https://www.dropbox.com/s/o13uszgy955mdh8/ml-100k.zip -q
!unzip ml-100k.zip >0

In [0]:
ls -l


In [0]:
# Preparing the training set and the test set
training_set_df = pd.read_csv('ml-100k/u1.base', delimiter = '\t',names="user_id,movie_id,rating,timestamp".split(","))
training_set = np.array(training_set_df, dtype = 'int')
test_set_df = pd.read_csv('ml-100k/u1.test', delimiter = '\t',names="user_id,movie_id,rating,timestamp".split(","))
test_set = np.array(test_set_df, dtype = 'int')

# Step - 3 : Analyse the Data

In [0]:
training_set_df.info()

In [0]:
training_set_df.describe()

In [0]:
test_set_df.info()

# Step - 4 : Feature Engineering

## Feature Engineering

We want to fill the missing values of the age in the dataset with the average age value for each of the classes. This is called data imputation.

In [0]:
# Getting the number of users and movies
nb_users = int(max(max(training_set[:,0]), max(test_set[:,0])))
nb_movies = int(max(max(training_set[:,1]), max(test_set[:,1])))
print(nb_users,nb_movies)

In [0]:
# Converting the data into an array with users in lines and movies in columns
def convert(data):
    new_data = []
    for id_user in range(1, nb_users + 1):
        id_movies = data[:,1][data[:,0] == id_user]
        id_ratings = data[:,2][data[:,0] == id_user]
        ratings = np.zeros(nb_movies)
        ratings[id_movies - 1] = id_ratings
        new_data.append(list(ratings))
    return new_data
training_list = convert(training_set)
test_list = convert(test_set)

In [0]:
len(training_list)

In [0]:
len(training_list[0])

In [0]:
# Converting the data into Torch tensors
training_torch = torch.FloatTensor(training_list)
test_torch = torch.FloatTensor(test_list)

# Step - 5 : Model Creation

<center><img src="https://www.dropbox.com/s/i37mgynkrf1d3vb/supervised_flow_chart.png?raw=1" height=300px width=1000px></img></center>

## Train Test Split

In [0]:
# Done Above

## Building a  model

In [0]:
# Creating the architecture of the Neural Network
class AutoEncoder(nn.Module):
    def __init__(self, ):
        super(AutoEncoder, self).__init__()
        self.fc1 = nn.Linear(nb_movies, 20)
        self.fc2 = nn.Linear(20, 10)
        self.fc3 = nn.Linear(10, 20)
        self.fc4 = nn.Linear(20, nb_movies)
        self.activation = nn.Sigmoid()
    def forward(self, x):
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.activation(self.fc3(x))
        x = self.fc4(x)
        return x
sae = AutoEncoder()
criterion = nn.MSELoss()
optimizer = optim.RMSprop(sae.parameters(), lr = 0.01, weight_decay = 0.5)

In [0]:
# Training the AutoEncoder
nb_epoch = 20 # 200
for epoch in range(1, nb_epoch + 1):
    train_loss = 0
    s = 0.
    for id_user in range(nb_users):
        input = Variable(training_torch[id_user]).unsqueeze(0)
        target = input.clone()
        if torch.sum(target.data > 0) > 0:
            output = sae(input)
            target.require_grad = False
            output[target == 0] = 0
            loss = criterion(output, target)
            mean_corrector = nb_movies/float(torch.sum(target.data > 0) + 1e-10)
            loss.backward()
            train_loss += np.sqrt(loss.item()*mean_corrector)
            s += 1.
            optimizer.step()
    print('epoch: '+str(epoch)+' loss: '+str(train_loss/s))

## Predict using Model

In [0]:
# Testing the SAE
test_loss = 0
s = 0.
for id_user in range(nb_users):
    input = Variable(training_torch[id_user]).unsqueeze(0)
    target = Variable(test_torch[id_user])
    if torch.sum(target.data > 0) > 0:
        output = sae(input)
        output = sae(input).view(output.shape[1]) # did to this to get same shape as input
        target.require_grad = False
        output[target == 0] = 0
        loss = criterion(output, target)
        mean_corrector = nb_movies/float(torch.sum(target.data > 0) + 1e-10)
        test_loss += np.sqrt(loss.item()*mean_corrector)
        s += 1.
print('test loss: '+str(test_loss/s))

Let's move on to evaluate our model.

# Step - 6 : Evaluation

In [0]:
print('test loss: '+str(test_loss/s))