In [54]:
import pandas as pd
import numpy as np
from zipfile import ZipFile
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.models import Model, Sequential
from pathlib import Path
import matplotlib.pyplot as plt

In [55]:
rating1 = pd.read_csv('https://raw.githubusercontent.com/kunal-mallick/Anime_Recommendations_System/refs/heads/main/Dataset/rating1.csv')

rating1

Unnamed: 0,user_id,anime_id,rating
0,1,20,-1
1,1,24,-1
2,1,79,-1
3,1,226,-1
4,1,241,-1
...,...,...,...
3906863,36791,2795,7
3906864,36791,2904,10
3906865,36791,3270,10
3906866,36791,3299,8


In [56]:
rating2 = pd.read_csv('https://raw.githubusercontent.com/kunal-mallick/Anime_Recommendations_System/refs/heads/main/Dataset/rating2.csv')

rating2

Unnamed: 0,user_id,anime_id,rating
0,36791,3467,7
1,36791,3503,8
2,36791,3549,9
3,36791,3653,8
4,36791,3654,7
...,...,...,...
3906864,73515,16512,7
3906865,73515,17187,9
3906866,73515,22145,10
3906867,73516,790,9


In [57]:
df = pd.concat([rating1, rating2],axis=0)
df.reset_index(inplace=True)
df.drop(columns='index', inplace=True)
df.columns = ["userId", "movieId", "rating"]
df.loc[df['rating'] ==-1,'rating'] = 0

df

Unnamed: 0,userId,movieId,rating
0,1,20,0
1,1,24,0
2,1,79,0
3,1,226,0
4,1,241,0
...,...,...,...
7813732,73515,16512,7
7813733,73515,17187,9
7813734,73515,22145,10
7813735,73516,790,9


In [58]:
user_ids = df["userId"].unique().tolist()
user2user_encoded = {x: i for i, x in enumerate(user_ids)}
userencoded2user = {i: x for i, x in enumerate(user_ids)}
movie_ids = df["movieId"].unique().tolist()
movie2movie_encoded = {x: i for i, x in enumerate(movie_ids)}
movie_encoded2movie = {i: x for i, x in enumerate(movie_ids)}
df["user"] = df["userId"].map(user2user_encoded)
df["movie"] = df["movieId"].map(movie2movie_encoded)

In [59]:
num_users = len(user2user_encoded)
num_movies = len(movie2movie_encoded)
df["rating"] = df["rating"].values.astype(np.float32)
# min and max ratings will be used to normalize the ratings later
min_rating = min(df["rating"])
max_rating = max(df["rating"])

print(
    "Number of users: {}, Number of Movies: {}, Min rating: {}, Max rating: {}".format(
        num_users, num_movies, min_rating, max_rating))

Number of users: 73515, Number of Movies: 11200, Min rating: 0.0, Max rating: 10.0


In [60]:
movie2movie_encoded

{20: 0,
 24: 1,
 79: 2,
 226: 3,
 241: 4,
 355: 5,
 356: 6,
 442: 7,
 487: 8,
 846: 9,
 936: 10,
 1546: 11,
 1692: 12,
 1836: 13,
 2001: 14,
 2025: 15,
 2144: 16,
 2787: 17,
 2993: 18,
 3455: 19,
 4063: 20,
 4214: 21,
 4224: 22,
 4581: 23,
 4744: 24,
 4898: 25,
 4999: 26,
 5034: 27,
 5277: 28,
 5667: 29,
 5781: 30,
 5958: 31,
 6163: 32,
 6205: 33,
 6324: 34,
 6500: 35,
 6547: 36,
 6682: 37,
 6707: 38,
 6747: 39,
 6773: 40,
 6793: 41,
 7088: 42,
 7148: 43,
 7593: 44,
 7739: 45,
 7858: 46,
 8074: 47,
 8407: 48,
 8424: 49,
 8525: 50,
 8630: 51,
 8841: 52,
 9041: 53,
 9062: 54,
 9136: 55,
 9181: 56,
 9330: 57,
 9367: 58,
 9515: 59,
 9581: 60,
 9675: 61,
 9750: 62,
 9790: 63,
 9919: 64,
 10067: 65,
 10073: 66,
 10076: 67,
 10079: 68,
 10080: 69,
 10209: 70,
 10578: 71,
 10604: 72,
 10719: 73,
 10790: 74,
 10793: 75,
 10794: 76,
 10805: 77,
 10897: 78,
 11161: 79,
 11266: 80,
 11617: 81,
 11737: 82,
 11757: 83,
 11759: 84,
 11771: 85,
 12293: 86,
 12549: 87,
 12729: 88,
 13357: 89,
 13367: 9

In [61]:
df.head()

Unnamed: 0,userId,movieId,rating,user,movie
0,1,20,0.0,0,0
1,1,24,0.0,0,1
2,1,79,0.0,0,2
3,1,226,0.0,0,3
4,1,241,0.0,0,4


In [62]:
num_users = len(user2user_encoded)
num_movies = len(movie2movie_encoded)
df["rating"] = df["rating"].values.astype(np.float32)
# min and max ratings will be used to normalize the ratings later
min_rating = min(df["rating"])
max_rating = max(df["rating"])

print(
    "Number of users: {}, Number of Movies: {}, Min rating: {}, Max rating: {}".format(
        num_users, num_movies, min_rating, max_rating))


Number of users: 73515, Number of Movies: 11200, Min rating: 0.0, Max rating: 10.0


In [63]:
df = df.sample(frac=1, random_state=42) # randomly sample the data-set
x = df[["user", "movie"]].values
# Normalize the targets between 0 and 1. Makes it easy to train.
y = df["rating"].apply(lambda x: (x - min_rating) / (max_rating - min_rating)).values
# Assuming training on 90% of the data and validating on 10%.
train_indices = int(0.9 * df.shape[0])
x_train, x_val, y_train, y_val = (
    x[:train_indices],
    x[train_indices:],
    y[:train_indices],
    y[train_indices:],
)

In [64]:
x_train.shape

(7032363, 2)

In [65]:
y_train.shape

(7032363,)

In [66]:
## defining the model
embedding_size = 50

user_ips= layers.Input(shape=[1])
user_embedding = layers.Embedding(num_users,embedding_size,embeddings_initializer="he_normal",embeddings_regularizer=keras.regularizers.l2(1e-6))(user_ips)
# Embedding layer will create 610 vectors look-up table of 50 dimension each
user_vect= layers.Flatten()(user_embedding)

movie_ips= layers.Input(shape=[1])
movie_embedding = layers.Embedding(num_movies, embedding_size, embeddings_initializer="he_normal",embeddings_regularizer=keras.regularizers.l2(1e-6))(movie_ips)
movie_vect= layers.Flatten()(movie_embedding)

prod = layers.dot(inputs=[user_vect, movie_vect],axes=1)

dense1= layers.Dense(150, activation='relu', kernel_initializer="he_normal")(prod)
dense2= layers.Dense(50, activation='relu',kernel_initializer="he_normal")(dense1)
dense3= layers.Dense(1,activation='relu')(dense2)

model = Model([user_ips, movie_ips], dense3)
model.compile(optimizer='adam',loss='mean_squared_error')

keras.utils.plot_model(model,show_shapes=True)

You must install pydot (`pip install pydot`) for `plot_model` to work.


In [67]:
history = model.fit([x_train[:,0], x_train[:,1]], y_train, batch_size=64,epochs=10,verbose=1)

Epoch 1/10




[1m109881/109881[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6346s[0m 58ms/step - loss: 0.5146
Epoch 2/10
[1m 59429/109881[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m47:38[0m 57ms/step - loss: 0.5145

KeyboardInterrupt: 

In [None]:
pred=model.predict([x_train[4:5,0], x_train[4:5,1]])
pred

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 140ms/step


array([[0.7133116]], dtype=float32)