<a href="https://colab.research.google.com/github/ntolayd/Projects/blob/main/Recommendation_Engines/Rec_Eng_Restricted_Boltzmann_Machine.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

reference:

https://github.com/IBM/dl-learning-path-assets/blob/main/unsupervised-deeplearning/notebooks/CollabortiveFilteringUsingRBM.ipynb

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
!unzip "/content/gdrive/My Drive/ml-1m.zip"

Archive:  /content/gdrive/My Drive/ml-1m.zip
   creating: ml-1m/
  inflating: ml-1m/movies.dat        
  inflating: ml-1m/ratings.dat       
  inflating: ml-1m/README            
  inflating: ml-1m/users.dat         


In [10]:
import pandas as pd
import matplotlib.pyplot as plt
import math
import numpy as np
import seaborn as sns
import warnings
import tensorflow as tf
warnings.filterwarnings('ignore')

In [4]:
movies = pd.read_csv("ml-1m/movies.dat", sep='::', engine='python', header=None)
movies.columns = ['movie_id', 'movie_name', 'genre']
ratings = pd.read_csv("ml-1m/ratings.dat", sep='::', engine='python', header=None)
ratings.columns=['user_id', 'movie_id', 'rating','timestamp']

In [5]:
#converting to user - item matrix
train_matrix = pd.pivot_table(ratings, index='user_id', columns='movie_id', values='rating')

In [8]:
user_rating = train_matrix.copy().fillna(0).values / 5

In [9]:
user_rating

array([[1. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       ...,
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0.6, 0. , 0. , ..., 0. , 0. , 0. ]])

In [30]:
#Defining Restricted Bottzmann Machine

#number of unique movies as visible units
visible_units = len(train_matrix.columns)

#Number of hidden units
hidden_units = 15

#initializing biases and weights
visible_bias = tf.Variable(tf.zeros([visible_units], tf.float32))
hidden_bias = tf.Variable(tf.zeros([hidden_units], tf.float32))
weights = tf.Variable(tf.zeros([visible_units, hidden_units], tf.float32))

#feedforward 
def feedforward(inp, weights, hidden_bias):
  hidden_prob = tf.nn.sigmoid(tf.matmul([inp], weights) + hidden_bias) 
  hidden_state = tf.nn.relu(tf.sign(hidden_prob - tf.random.uniform(tf.shape(hidden_prob))))
  return hidden_state

#reconstruction the input layer
def reconstruct(hidden_state, weights, visible_bias):
  visible_prob =  tf.nn.sigmoid(tf.matmul(hidden_state, tf.transpose(weights)) + visible_bias)
  visible_state = tf.nn.relu(tf.sign(visible_prob - tf.random.uniform(tf.shape(visible_prob))))
  return visible_state[0]

In [17]:
#input layer
inp = tf.zeros([visible_units], tf.float32)
#hidden layer
hidden_values = feedforward(inp, weights=weights, hidden_bias=hidden_bias)
#visible output
visible_values = reconstruct(hidden_values, weights=weights, visible_bias=visible_bias)

In [23]:
visible_state = inp

In [29]:
[inp]

[<tf.Tensor: shape=(3706,), dtype=float32, numpy=array([0., 0., 0., ..., 0., 0., 0.], dtype=float32)>]

In [None]:
#Training
epochs= 5 
batch_size=128
errors = []
final_weights = []
K=1
learning_rate=0.1

training_data = tf.data.Dataset.from_tensor_slices((np.float32(user_rating))).batch(batch_size)

def error(v0_state, v1_state):
    return tf.reduce_mean(tf.square(v0_state - v1_state))

for epoch in range(epochs):
  batch_number = 0
  for batch_x in training_data:
    for sample in range(len(batch_x)):
      for k in range(K):
        visible_state = batch_x[sample]
        hidden_state= feedforward(visible_state, weights, hidden_bias)
        visible1_state = reconstruct(hidden_state, weights, visible_bias)
        hidden1_state = feedforward(visible1_state, weights, hidden_bias)

        delta_weights = tf.matmul(tf.transpose([visible_state]), hidden_state) - tf.matmul(tf.transpose([visible1_state]), hidden1_state)

        weights = weights + learning_rate * delta_weights
        visible_bias = visible_bias + learning_rate * tf.reduce_mean(visible_state - visible1_state, 0)
        hidden_bias = hidden_bias + learning_rate * tf.reduce_mean(hidden_state - hidden1_state, 0)

        visible_state = visible1_state

      if sample == len(batch_x)-1:
        err = error(batch_x[sample], visible1_state)
        final_weights.append(weights)
        errors.append(err)
        print ( 'Epoch: %d' % (epoch + 1), 
                "batch #: %i " % batch_number, "of %i" % (len(user_rating)/batch_size), 
                "sample #: %i" % sample,
                'reconstruction error: %f' % err)
    batch_number += 1

plt.plot(errors)
plt.ylabel('Error')
plt.xlabel('Epoch')
plt.show()

In [51]:
def make_recommendation(user_id,n):
  user_input = user_rating[user_id-1].reshape(1, -1)
  user_input = tf.convert_to_tensor(user_rating[user_id-1],"float32")
  h0 = tf.nn.sigmoid(tf.matmul([user_input], weights) + hidden_bias)
  v1 = tf.nn.sigmoid(tf.matmul(h0, tf.transpose(weights)) + visible_bias)
  scored_movies = movies[movies['movie_id'].isin(train_matrix.columns)]
  scored_movies['Recommedation_Score'] = v1[0]
  rated_movies = ratings[ratings['user_id'] == user_id]
  return rated_movies.merge(scored_movies, on='movie_id', how='outer').sort_values(["Recommedation_Score"], ascending=False).head(n)


In [52]:
make_recommendation(25,20)

Unnamed: 0,user_id,movie_id,rating,timestamp,movie_name,genre,Recommedation_Score
18,25.0,2571,3.0,978132158.0,"Matrix, The (1999)",Action|Sci-Fi|Thriller,0.938058
73,25.0,1210,5.0,978132138.0,Star Wars: Episode VI - Return of the Jedi (1983),Action|Adventure|Romance|Sci-Fi|War,0.933135
1213,,1240,,,"Terminator, The (1984)",Action|Sci-Fi|Thriller,0.875581
1,25.0,2628,5.0,978130670.0,Star Wars: Episode I - The Phantom Menace (1999),Action|Adventure|Fantasy|Sci-Fi,0.845902
521,,457,,,"Fugitive, The (1993)",Action|Thriller,0.836734
32,25.0,260,5.0,978131930.0,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Fantasy|Sci-Fi,0.808847
78,25.0,1580,4.0,978132347.0,Men in Black (1997),Action|Adventure|Comedy|Sci-Fi,0.791091
55,25.0,1610,5.0,978132793.0,"Hunt for Red October, The (1990)",Action|Thriller,0.708065
13,25.0,1196,5.0,978132002.0,Star Wars: Episode V - The Empire Strikes Back...,Action|Adventure|Drama|Sci-Fi|War,0.699974
1095,,1097,,,E.T. the Extra-Terrestrial (1982),Children's|Drama|Fantasy|Sci-Fi,0.662231


In [34]:
#Feeding in the user and reconstructing the input

h0 = tf.nn.sigmoid(tf.matmul([user_input], weights) + hidden_bias)

v1 = tf.nn.sigmoid(tf.matmul(h0, tf.transpose(weights)) + visible_bias)


In [35]:
scored_movies_df_mock = scored_movies_df_mock.assign(RecommendationScore = rec[0])
scored_movies_df_mock.sort_values(["RecommendationScore"], ascending=False).head(20)

<tf.Tensor: shape=(1, 3706), dtype=float32, numpy=
array([[0.12079933, 0.00209603, 0.00068572, ..., 0.00047657, 0.00076959,
        0.00221562]], dtype=float32)>

In [40]:
movies.shape

(3883, 3)