In [1]:
# download movie lens 100k dataset

# !wget http://files.grouplens.org/datasets/movielens/ml-100k.zip
# !unzip ml-100k.zip
import pandas as pd
import numpy as np
ratings = pd.read_csv('ml-100k/u.data', sep='\t', header=None, names=['user_id', 'item_id', 'rating', 'timestamp'])
ratings.drop('timestamp', axis=1, inplace=True)
ratings.head()

Unnamed: 0,user_id,item_id,rating
0,196,242,3
1,186,302,3
2,22,377,1
3,244,51,2
4,166,346,1


In [2]:
#perform some preprocessing to encode users and movies as integer indices.

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
ratings['user_id'] = le.fit_transform(ratings['user_id'])
ratings['item_id'] = le.fit_transform(ratings['item_id'])

In [3]:
# prompt: Prepare training and validation data

from sklearn.model_selection import train_test_split
train_data, test_data = train_test_split(ratings, test_size=0.2)
train_data.head()

Unnamed: 0,user_id,item_id,rating
85678,477,149,4
86962,549,248,4
42076,642,281,3
17958,434,251,2
23488,293,346,5


In [4]:
num_users = len(train_data.user_id.unique())
num_movies = len(train_data.item_id.unique())
min_rating = train_data['rating'].min()
max_rating = train_data['rating'].max()
print(     "Number of users: {}, Number of Movies: {}, Min rating: {}, Max rating: {}".format(         num_users, num_movies, min_rating, max_rating     ) )

Number of users: 943, Number of Movies: 1653, Min rating: 1, Max rating: 5


In [5]:
def set_label(data: pd.DataFrame):
    return 1 if data.rating >= 3 else 0

train_data['target'] = train_data.apply(lambda x: 1 if x['rating'] >= 3 else 0, axis=1)
test_data['target'] = test_data.apply(lambda x: 1 if x['rating'] >= 3 else 0, axis=1)

x_train = train_data.drop(columns='target')
y_train = train_data['target']

x_test = test_data.drop(columns='target')
y_test = test_data['target']

In [6]:
class Network:

    def __init__(self, input_size: int, hidden_size: int, output_size: int):
        self.__input_size = input_size
        self.__hidden_size = hidden_size
        self.__output_size = output_size

        self.W1 = np.zeros((self.__hidden_size, self.__input_size))
        self.b1 = np.zeros((self.__hidden_size,1))

        self.W2 = np.zeros((self.__output_size, self.__hidden_size))
        self.b2 = np.zeros((self.__output_size,1))

        self.z1 = None
        self.z2 = None

    def fit(self, x_input: pd.DataFrame, y_input: pd.DataFrame, iteration=100, learning_rate=0.1):

        y_input_one_hot = pd.get_dummies(y_input).to_numpy()
        x_input = x_input.to_numpy()
        m = y_input_one_hot.shape[0]

        for _ in range(iteration):

            #first layer
            self.z1 = np.matmul(x_input, self.W1.T).T
            self.z1 += self.b1

            #second layer
            a1 = self.leaky_relu(self.z1)
            self.z2 = np.matmul(a1.T, self.W2.T).T
            self.z2 += self.b2

            y_pred = self.softmax(self.z2)

            loss = self.compute_loss(y_real=y_input, y_predicted=y_pred)
            print(np.sum(loss))

            a2 =  self.z2

            #Derivatives
            dz2 = a2 - y_input_one_hot.T
            dw2 = (1/m) * np.matmul(dz2, a1.T)
            db2 = (1/m) * np.sum(dz2)


            da1 = np.matmul(self.W2.T, dz2)
            dz1 = da1 * np.vectorize(self.leaky_relu_derivative)(self.z1)
            dw1 = (1/m) * np.matmul(dz1, x_input)
            db1 = (1./m) * np.sum(dz1)


            # Gradient Descent
            self.W1 = self.W1 - learning_rate * dw1
            self.W2 = self.W2 - learning_rate * dw2

            self.b1 = self.b1 - learning_rate * db1
            self.b2 = self.b2 - learning_rate * db2


    def predict(self, x_input: pd.DataFrame):
        z1 = np.matmul(x_input, self.W1.T).T
        z1 += self.b1

        #second layer
        a1 = np.vectorize(self.leaky_relu)(z1)
        z2 = np.matmul(a1.T, self.W2.T).T
        z2 += self.b2
        return np.argmax(self.softmax(z2), axis=0)

    @classmethod
    def leaky_relu(cls, x, alpha=0.01):
        return np.maximum(x*alpha, alpha)

    @classmethod
    def leaky_relu_derivative(cls, x, alpha=0.01):
        return 1.0 if x >= 0.0 else alpha

    @classmethod
    def softmax(cls, x):
        return np.exp(x)/np.sum(np.exp(x))

    @classmethod
    def compute_loss(cls, y_real, y_predicted: pd.DataFrame):
        y_real_one_hot = pd.get_dummies(y_real)

        loss_sum = np.sum(np.multiply(y_real_one_hot.T, np.log(y_predicted)))
        m = y_real_one_hot.shape[0]
        loss = -(1/m) * loss_sum
        return loss

In [7]:
nn = Network(input_size=3, hidden_size=64, output_size=2)

In [11]:
nn.fit(x_train, y_train, iteration=50, learning_rate=0.07)

11.985245304434827
11.98514886971152
11.985052499897105
11.984956194943072
11.984859954800964
11.984763779422348
11.984667668758838
11.984571622762092
11.98447564138379
11.984379724575659
11.984283872289469
11.984188084477015
11.98409236109014
11.983996702080724
11.983901107400676
11.983805577001956
11.983710110836558
11.983614708856502
11.98351937101387
11.983424097260752
11.983328887549295
11.98323374183168
11.983138660060128
11.983043642186896
11.982948688164264
11.982853797944577
11.982758971480195
11.98266420872353
11.982569509627016
11.982474874143147
11.982380302224428
11.982285793823424
11.982191348892714
11.982096967384942
11.982002649252774
11.981908394448903
11.981814202926083
11.981720074637096
11.981626009534748
11.981532007571897
11.981438068701424
11.981344192876271
11.981250380049401
11.981156630173803
11.98106294320253
11.980969319088652
11.980875757785284
11.980782259245569
11.980688823422708
11.980595450269906


In [12]:
x = nn.predict(x_test)
x

array([1, 1, 1, ..., 1, 1, 1], dtype=int64)

In [13]:
from sklearn.metrics import accuracy_score, classification_report
cr = classification_report(y_test, x)
print(cr)

              precision    recall  f1-score   support

           0       0.00      0.00      0.00      3514
           1       0.82      1.00      0.90     16486

    accuracy                           0.82     20000
   macro avg       0.41      0.50      0.45     20000
weighted avg       0.68      0.82      0.74     20000



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
