In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_recommenders as tfrs
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
import seaborn as sns
from zipfile import ZipFile
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Embedding
from tensorflow.keras.utils import get_file
from sklearn.metrics import precision_score, recall_score, accuracy_score

In [2]:
# --- Load Dataset ---
cols = ['userId', 'movieId', 'rating', 'timestamp']
movies = tfds.load("movielens/100k-movies", split="train")
movies_df = tfds.as_dataframe(movies)

In [3]:
# Decode byte strings
movies_df['movie_id'] = movies_df['movie_id'].apply(lambda x: x.decode('utf-8') if isinstance(x, bytes) else x)
movies_df['movie_title'] = movies_df['movie_title'].apply(lambda x: x.decode('utf-8') if isinstance(x, bytes) else x)
movies_df['movie_id'] = movies_df['movie_id'].astype(int)
movies_df.rename(columns={'movie_id': 'movieId'}, inplace=True)

In [4]:
# Map genres
genre_map = {
    0: 'Action', 1: 'Adventure', 2: 'Animation', 3: 'Children', 4: 'Comedy',
    5: 'Crime', 6: 'Documentary', 7: 'Drama', 8: 'Fantasy', 9: 'Film-Noir',
    10: 'Horror', 11: 'Musical', 12: 'Mystery', 13: 'Romance', 14: 'Sci-Fi',
    15: 'Thriller', 16: 'War', 17: 'Western'
}
movies_df['genre_names'] = movies_df['movie_genres'].apply(lambda genre_ids: [genre_map.get(gid, 'Unknown') for gid in genre_ids])
movies_df.head()

Unnamed: 0,movie_genres,movieId,movie_title,genre_names
0,[4],1681,You So Crazy (1994),[Comedy]
1,"[4, 7]",1457,Love Is All There Is (1996),"[Comedy, Drama]"
2,"[1, 3]",500,Fly Away Home (1996),"[Adventure, Children]"
3,[0],838,In the Line of Duty 2 (1987),[Action]
4,[7],1648,"Niagara, Niagara (1997)",[Drama]


In [5]:

df = pd.read_csv(r'C:\Users\EIG_Lab(1)\Documents\ml-100k\ml-100k\u.data', sep='\t', names=cols)



In [6]:
# Encode IDs
user2user_encoded = {x: i for i, x in enumerate(df['userId'].unique())}
movie2movie_encoded = {x: i for i, x in enumerate(df['movieId'].unique())}
df['user'] = df['userId'].map(user2user_encoded)
df['movie'] = df['movieId'].map(movie2movie_encoded)
df.head()

Unnamed: 0,userId,movieId,rating,timestamp,user,movie
0,196,242,3,881250949,0,0
1,186,302,3,891717742,1,1
2,22,377,1,878887116,2,2
3,244,51,2,880606923,3,3
4,166,346,1,886397596,4,4
