In [0]:
!wget https://sds-platform-private.s3-us-east-2.amazonaws.com/uploads/P16-AutoEncoders.zip

In [0]:
!unzip P16-AutoEncoders.zip

In [0]:
cd /content/AutoEncoders

In [0]:
!unzip ml-100k.zip

In [0]:
!unzip ml-1m.zip

In [0]:
import numpy as np
import pandas as pd

In [0]:
movies = pd.read_csv('/content/AutoEncoders/ml-1m/movies.dat' , sep='::' , header = None, engine='python' , encoding='latin-1')
users = pd.read_csv('/content/AutoEncoders/ml-1m/users.dat' , sep='::' , header = None, engine='python' , encoding='latin-1')
ratings = pd.read_csv('/content/AutoEncoders/ml-1m/ratings.dat' , sep='::' , header = None, engine='python' , encoding='latin-1')

In [0]:
training_set = pd.read_csv('/content/AutoEncoders/ml-100k/u1.base' , delimiter= '\t')
training_set = np.array(training_set, dtype='int')
test_set = pd.read_csv('/content/AutoEncoders/ml-100k/u1.test', delimiter= '\t')
test_set = np.array(test_set, dtype='int')

In [0]:
nb_users = int(max(max(training_set[:,0]),max(test_set[:,0])))
nb_movies = int(max(max(training_set[:,1]),max(test_set[:,1])))

In [0]:
def convert(data):
  new_data = []
  for id_users in range(1 , nb_users+1):
    id_movies = data[:,1][data[:,0] == id_users]
    id_ratings = data[:,2][data[:,0] == id_users]
    ratings = np.zeros(nb_movies)
    ratings[id_movies-1] = id_ratings  
    new_data.append(ratings)
  return new_data

training_set = np.array(convert(training_set))
test_set = np.array(convert(test_set))

In [0]:
from keras.models import Model
from keras.layers import Dense , Input
from keras.optimizers import RMSprop
import keras.backend as K
K.clear_session()

rms = RMSprop(learning_rate=0.01 , decay=0.5)

input = Input(shape=(nb_movies,))
encoded1 = Dense(160 , activation='sigmoid')(input)
encoded2 = Dense(80 , activation='sigmoid')(encoded1)
decoded1 = Dense(160 , activation='sigmoid')(encoded2)
output = Dense(nb_movies)(decoded1)

autoencoder = Model(input,output)
autoencoder.compile(optimizer=rms, loss='mean_squared_error', metrics=['mse'])

In [191]:
epochs = 100
batch_size = 1

for epoch in range(epochs):
  train_loss=0
  s=0
  for i in range(int(training_set.shape[0]/batch_size)):
    batch_input = training_set[(i) * batch_size:(i + 1) * batch_size]
    target = np.copy(batch_input)
    if np.sum(training_set[i]) > 0:
      predicted = autoencoder.predict_on_batch(training_set[(i) * batch_size:(i + 1) * batch_size])
      K.stop_gradient(target)
      predicted[target == 0] = 0
      loss = autoencoder.train_on_batch(target,predicted)
      mean_corrector = nb_movies/float(np.sum(target) + 1e-10)
      train_loss +=  np.sqrt(loss[0]*mean_corrector)
      s += 1

  print('epoch: ' +str(epoch) + ' loss: '+str(train_loss/s))

epoch: 0 loss: 0.01740862315735166
epoch: 1 loss: 0.0020120151799797687
epoch: 2 loss: 0.00176524660739645
epoch: 3 loss: 0.001648073398923099
epoch: 4 loss: 0.0015767720735243185
epoch: 5 loss: 0.0015275437254003241
epoch: 6 loss: 0.001490704539544367
epoch: 7 loss: 0.0014615202788846246
epoch: 8 loss: 0.001437405646316554
epoch: 9 loss: 0.0014168372251656255
epoch: 10 loss: 0.001398868081145623
epoch: 11 loss: 0.0013828827198464405
epoch: 12 loss: 0.0013684634245880668
epoch: 13 loss: 0.0013553164771937189
epoch: 14 loss: 0.0013432304779787553
epoch: 15 loss: 0.001332043868982863
epoch: 16 loss: 0.001321636159978018
epoch: 17 loss: 0.0013119081315178462
epoch: 18 loss: 0.0013027823350046856
epoch: 19 loss: 0.001294192427440752
epoch: 20 loss: 0.0012860848614767739
epoch: 21 loss: 0.0012784133282439847
epoch: 22 loss: 0.0012711385504171238
epoch: 23 loss: 0.0012642247883640134
epoch: 24 loss: 0.0012576430982944365
epoch: 25 loss: 0.0012513662327938593
epoch: 26 loss: 0.001245370889501

In [193]:
i=0
batch_size=1
predicted_xd = autoencoder.predict_on_batch(training_set[(i) * batch_size:(i + 1) * batch_size])
predicted_xd[0,0:20] , test_set[0,0:20]

(array([-0.01080162,  0.00159429, -0.00303094, -0.00087448,  0.00229199,
         0.00223315,  0.00309813,  0.0024549 , -0.00139659, -0.00440375,
        -0.00176306,  0.00563586,  0.00282036, -0.00320526, -0.002957  ,
        -0.00225092, -0.0027782 , -0.00112683, -0.00085548, -0.00248449],
       dtype=float32),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 3., 0., 5., 0., 5., 0., 0., 3.,
        0., 0., 4.]))

In [192]:
test_loss=0
s=0
for i in range(int(training_set.shape[0]/batch_size)):
  batch_input = training_set[(i) * batch_size:(i + 1) * batch_size]
  target = test_set[(i) * batch_size:(i + 1) * batch_size]
  if np.sum(test_set[i]) > 0:
    predicted = autoencoder.predict_on_batch(training_set[(i) * batch_size:(i + 1) * batch_size])
    K.stop_gradient(target)
    predicted[target == 0] = 0
    loss = autoencoder.train_on_batch(target,predicted)
    mean_corrector = nb_movies/float(np.sum(target) + 1e-10)
    test_loss +=  np.sqrt(loss[0]*mean_corrector)
    s += 1

print(' loss: '+str(test_loss/s))

 loss: 0.0025676181844012224
