In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable
import torch.nn.parallel
import pandas as pd
import numpy as np

In [2]:
training_data = pd.read_csv('ml-100k/u1.base',header=None,delimiter='\t')
test_data = pd.read_csv('ml-100k/u1.test',header=None,delimiter='\t')

In [3]:
training_data = np.array(training_data,dtype=int)
test_data = np.array(test_data,dtype=int)

In [4]:
training_data[:10]

array([[        1,         1,         5, 874965758],
       [        1,         2,         3, 876893171],
       [        1,         3,         4, 878542960],
       [        1,         4,         3, 876893119],
       [        1,         5,         3, 889751712],
       [        1,         7,         4, 875071561],
       [        1,         8,         1, 875072484],
       [        1,         9,         5, 878543541],
       [        1,        11,         2, 875072262],
       [        1,        13,         5, 875071805]])

In [5]:
nb_movies = int(max(max(training_data[:,1]),max(test_data[:,1])))
nb_users = int(max(max(training_data[:,0]),max(test_data[:,0])))

In [6]:
nb_users

943

In [7]:
nb_movies

1682

In [8]:
def convert(data):
    new_data = []
    for i in range(1,nb_users+1):
        mov = data[data[:,0] == i][:,1]
        rate = data[data[:,0]==i][:,2]
        rating = np.zeros(nb_movies)
        rating[mov-1] = rate
        new_data.append(list(rating))
    return new_data

training_data = convert(training_data)
test_data = convert(test_data)

In [9]:
len(training_data)

943

In [10]:
len(training_data[0])

1682

In [11]:
training_data = torch.FloatTensor(training_data)
test_data = torch.FloatTensor(test_data)

In [12]:
class SAE(nn.Module):
    def __init__(self,):
        super(SAE,self).__init__()
        self.fc1 = nn.Linear(nb_movies,20)
        self.fc2 = nn.Linear(20,10)
        self.fc3 = nn.Linear(10,20)
        self.fc4 = nn.Linear(20,nb_movies)
        self.activation = nn.Sigmoid()
        
    def forward(self,x):
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.activation(self.fc3(x))
        x = self.fc4(x)
        return x

sae = SAE()
criterion = nn.MSELoss()
optimizer = optim.RMSprop(sae.parameters(),lr=0.01,weight_decay=0.5)
epochs = 200
for epoch in range(1,epochs+1):
    train_loss = 0
    s = 0.
    for id in range(nb_users):
        input = Variable(training_data[id]).unsqueeze(0)
        target = input.clone()
        if torch.sum(target.data > 0) > 0: 
            output = sae(input)
            target.require_grad = False
            output[target == 0] = 0
            loss = criterion(output,target)
            mean_corrector =  nb_movies/float(torch.sum(target.data > 0) + 1e-10)
            loss.backward()
            train_loss += np.sqrt(loss.data*mean_corrector)
            s += 1.
            optimizer.step()
    print("epoch: "+str(epoch)+" train loss: "+str(train_loss/s))

epoch: 1 train loss: tensor(1.7709)
epoch: 2 train loss: tensor(1.0968)
epoch: 3 train loss: tensor(1.0534)
epoch: 4 train loss: tensor(1.0384)
epoch: 5 train loss: tensor(1.0310)
epoch: 6 train loss: tensor(1.0264)
epoch: 7 train loss: tensor(1.0240)
epoch: 8 train loss: tensor(1.0218)
epoch: 9 train loss: tensor(1.0207)
epoch: 10 train loss: tensor(1.0197)
epoch: 11 train loss: tensor(1.0190)
epoch: 12 train loss: tensor(1.0185)
epoch: 13 train loss: tensor(1.0179)
epoch: 14 train loss: tensor(1.0176)
epoch: 15 train loss: tensor(1.0171)
epoch: 16 train loss: tensor(1.0167)
epoch: 17 train loss: tensor(1.0164)
epoch: 18 train loss: tensor(1.0166)
epoch: 19 train loss: tensor(1.0165)
epoch: 20 train loss: tensor(1.0162)
epoch: 21 train loss: tensor(1.0159)
epoch: 22 train loss: tensor(1.0161)
epoch: 23 train loss: tensor(1.0160)
epoch: 24 train loss: tensor(1.0157)
epoch: 25 train loss: tensor(1.0155)
epoch: 26 train loss: tensor(1.0156)
epoch: 27 train loss: tensor(1.0153)
epoch: 28 

In [15]:
loss.data

tensor(0.1338)

In [16]:
mean_corrector

10.011904761904763

In [20]:
test_loss = 0
s = 0.
for id in range(nb_users):
    input = Variable(training_data[id]).unsqueeze(0)
    target = Variable(test_data[id]).unsqueeze(0)
    if torch.sum(target.data > 0) > 0: 
        output = sae(input)
        target.require_grad = False
        output[target == 0] = 0
        loss = criterion(output,target)
        mean_corrector =  nb_movies/float(torch.sum(target.data > 0) + 1e-10)
        test_loss += np.sqrt(loss.data*mean_corrector)
        s += 1.
print("train loss: "+str(train_loss/s))

train loss: tensor(1.8828)
