In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as otpim
from torch.autograd import Variable

### Reading Trainning Data

In [2]:
trainning_set = pd.read_csv('ml-100k/u1.base',sep='\t',header=None)
test_set = pd.read_csv('ml-100k/u1.test',sep='\t',header=None)

In [3]:
trainning_set = np.array(trainning_set)
test_set = np.array(test_set)

In [4]:
trainning_set[0]

array([        1,         1,         5, 874965758])

In [5]:
number_of_users = np.max(trainning_set[:,0])
number_of_movies = np.max(trainning_set[:,1])

### Building No of users * No of Movies Matrix

In [6]:
def covert_data(input_data):
    master_matrix = []
    for i in range(1,number_of_users+1):

        ratings = np.zeros(number_of_movies)
        movies_watched = input_data[:,1][input_data[:,0] == i]
        ratings_given = input_data[:,2][input_data[:,0] == i]
        ratings[movies_watched-1] = ratings_given
        master_matrix.append(ratings)
    return np.array(master_matrix)

In [7]:
trainning_set = covert_data(trainning_set)
test_set = covert_data(test_set)

### Coverting data to torch tensors

In [8]:
trainning_set = torch.FloatTensor(trainning_set)
test_set = torch.FloatTensor(test_set)

### Building Auto Encoder Class

In [9]:
class Auto_Encoder(nn.Module):
    
    def __init__(self):
        
        super(Auto_Encoder,self).__init__()
        self.fc1 = nn.Linear(number_of_movies,20)
        self.fc2 = nn.Linear(20,10)
        self.fc3 = nn.Linear(10,20)
        self.fc4 = nn.Linear(20,number_of_movies)
        self.activation = nn.Sigmoid()
    
    def forward_pass(self,x):
        
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.activation(self.fc3(x))
        x = self.fc4(x)
        return x
    
        

### Creating AE object, Loss and Optimiser objects

In [10]:
ae = Auto_Encoder()
mse = nn.MSELoss()
opti = otpim.RMSprop(ae.parameters(),lr=0.001,weight_decay=0.5)

### Trainning

In [11]:
no_of_epoc = 10

for i in range(no_of_epoc):
    
    train_loss = 0
    counter = 0
    
    for j in range(number_of_users):
        
        input = Variable(trainning_set[j]).unsqueeze(0)
        target = input
        
        if (sum(trainning_set[j].data > 0) > 0):
            
            output = ae.forward_pass(input)
            output[target == 0] = 0
            loss = mse(output,target)
            loss.backward()
            train_loss += np.sqrt(loss.data)
            opti.step()
            counter += 1
    
    print("Number of Epoc:{},Train Loss:{}".format(i,train_loss/counter))
        

Number of Epoc:0,Train Loss:0.3576861321926117
Number of Epoc:1,Train Loss:0.21932445466518402
Number of Epoc:2,Train Loss:0.2102953940629959
Number of Epoc:3,Train Loss:0.2071165144443512
Number of Epoc:4,Train Loss:0.20552925765514374
Number of Epoc:5,Train Loss:0.20459482073783875
Number of Epoc:6,Train Loss:0.2039918601512909
Number of Epoc:7,Train Loss:0.20357933640480042
Number of Epoc:8,Train Loss:0.20328310132026672
Number of Epoc:9,Train Loss:0.20306333899497986


### Testing

In [15]:
test_loss = 0
counter = 0
for j in range(number_of_users):
    
    input = Variable(trainning_set[j]).unsqueeze(0)
    target = Variable(test_set[j]).unsqueeze(0)
    
    if (sum(trainning_set[j].data > 0) > 0):
            
            output = ae.forward_pass(input)
            output[target == 0] = 0
            loss = mse(output,target)
            test_loss += np.sqrt(loss.data)
            print
            counter += 1
    

print("Test Loss:{}".format(i,test_loss/counter))
        

Test Loss:9
