# Modules

In [2]:
import numpy as np
import pandas as pd

In [3]:
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs
from tensorflow.keras import layers

2024-09-09 21:51:18.653931: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-09-09 21:51:18.679692: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-09 21:51:18.679724: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-09 21:51:18.680673: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-09-09 21:51:18.686120: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-09-09 21:51:18.687055: I tensorflow/core/platform/cpu_feature_guard.cc:1

# Data import

In [19]:
# Ratings data.
ratings = tfds.load("movielens/100k-ratings", split="all")
# Features of all the available movies.
movies = tfds.load("movielens/100k-movies", split="all")

In [20]:
df = tfds.as_dataframe(ratings)

# Identifying users with the highest genre-specific viewing percentages

In [10]:
genre_mapping = {
    'Action': 0,
    'Adventure': 1,
    'Animation': 2,
    'Children': 3,
    'Comedy': 4,
    'Crime': 5,
    'Documentary': 6,
    'Drama': 7,
    'Fantasy': 8,
    'Film-Noir': 9,
    'Horror': 10,
    'IMAX': 11,
    'Musical': 12,
    'Mystery': 13,
    'Romance': 14,
    'Sci-Fi': 15,
    'Thriller': 16,
    'Unknown': 17,
    'War': 18,
    'Western': 19,
    '(no genres listed)': 20
}

### Get user viewing history

## Horror (10)

In [44]:
# Calculate the total number of movies watched by each user
total_movies_per_user = df.groupby('user_id').size().reset_index(name='total_movie_count')

# Filter for rows where 'movie_genres' contains '10' (Horror genre)
df_horror = df[df['movie_genres'].apply(lambda x: 10 in [int(i) for i in x])]

# Count the number of horror movies watched by each user
horror_movies_per_user = df_horror.groupby('user_id').size().reset_index(name='horror_movie_count')

# Merge the total movies count with horror movie count
user_movie_data = pd.merge(total_movies_per_user, horror_movies_per_user, on='user_id', how='left')

user_movie_data['horror_movie_count'] = user_movie_data['horror_movie_count'].fillna(0)

# Calculate the percentage of horror movies watched per user
user_movie_data['horror_percentage'] = (user_movie_data['horror_movie_count'] / user_movie_data['total_movie_count']) * 100

# Sort the users by the percentage of horror movies watched in descending order
top_horror_percentage_users = user_movie_data.sort_values(by='horror_percentage', ascending=False)

# Print
top_horror_percentage_users.head()

Unnamed: 0,user_id,total_movie_count,horror_movie_count,horror_percentage
296,b'366',33,25.0,75.757576
794,b'814',35,25.0,71.428571
561,b'604',27,17.0,62.962963
298,b'368',45,26.0,57.777778
297,b'367',58,31.0,53.448276


In [48]:
user_id=str(366)

# Filter the dataset for user_id = '123'
filtered_ratings = ratings.filter(lambda x: x['user_id'] == tf.constant(user_id))

# Iterate through the filtered dataset to access the records
for record in filtered_ratings:
    movie_title = record['movie_title'].numpy().decode('utf-8')
    user_rating = record['user_rating'].numpy()
    print(f"Movie: {movie_title}, Rating: {user_rating}")

Movie: Natural Born Killers (1994), Rating: 5.0
Movie: Pulp Fiction (1994), Rating: 5.0
Movie: Scream (1996), Rating: 4.0
Movie: Tales from the Crypt Presents: Bordello of Blood (1996), Rating: 4.0
Movie: Omen, The (1976), Rating: 5.0
Movie: Believers, The (1987), Rating: 2.0
Movie: Body Snatchers (1993), Rating: 5.0
Movie: Braindead (1992), Rating: 5.0
Movie: Psycho (1960), Rating: 5.0
Movie: Mute Witness (1994), Rating: 3.0
Movie: Army of Darkness (1993), Rating: 4.0
Movie: Jaws (1975), Rating: 1.0
Movie: Candyman (1992), Rating: 5.0
Movie: Interview with the Vampire (1994), Rating: 5.0
Movie: Bram Stoker's Dracula (1992), Rating: 5.0
Movie: Bride of Frankenstein (1935), Rating: 5.0
Movie: Silence of the Lambs, The (1991), Rating: 5.0
Movie: American Werewolf in London, An (1981), Rating: 5.0
Movie: Evil Dead II (1987), Rating: 5.0
Movie: Nosferatu (Nosferatu, eine Symphonie des Grauens) (1922), Rating: 4.0
Movie: Shining, The (1980), Rating: 5.0
Movie: Lawnmower Man 2: Beyond Cybers

## Children (3)

In [43]:
# Calculate the total number of movies watched by each user
total_movies_per_user = df.groupby('user_id').size().reset_index(name='total_movie_count')

# Filter for rows where 'movie_genres' contains '3' (Children genre)
df_children = df[df['movie_genres'].apply(lambda x: 3 in [int(i) for i in x])]

# Count the number of horror movies watched by each user
children_movies_per_user = df_children.groupby('user_id').size().reset_index(name='children_movie_count')

# Merge the total movies count with horror movie count
user_movie_data = pd.merge(total_movies_per_user, children_movies_per_user, on='user_id', how='left')

user_movie_data['children_movie_count'] = user_movie_data['children_movie_count'].fillna(0)

# Calculate the percentage of horror movies watched per user
user_movie_data['children_percentage'] = (user_movie_data['children_movie_count'] / user_movie_data['total_movie_count']) * 100

# Sort the users by the percentage of horror movies watched in descending order
top_children_percentage_users = user_movie_data.sort_values(by='children_percentage', ascending=False)

# Print
top_children_percentage_users.head()

Unnamed: 0,user_id,total_movie_count,children_movie_count,children_percentage
413,b'471',31,26.0,83.870968
579,b'620',110,44.0,40.0
172,b'254',159,52.0,32.704403
491,b'541',133,43.0,32.330827
803,b'822',25,8.0,32.0


### Get user viewing history

In [47]:
user_id=str(471)

# Filter the dataset for user_id = '123'
filtered_ratings = ratings.filter(lambda x: x['user_id'] == tf.constant(user_id))

# Iterate through the filtered dataset to access the records
for record in filtered_ratings:
    movie_title = record['movie_title'].numpy().decode('utf-8')
    user_rating = record['user_rating'].numpy()
    print(f"Movie: {movie_title}, Rating: {user_rating}")

Movie: First Kid (1996), Rating: 5.0
Movie: Jungle Book, The (1994), Rating: 5.0
Movie: That Darn Cat! (1997), Rating: 4.0
Movie: Snow White and the Seven Dwarfs (1937), Rating: 2.0
Movie: Cinderella (1950), Rating: 3.0
Movie: Casper (1995), Rating: 3.0
Movie: Hunchback of Notre Dame, The (1996), Rating: 1.0
Movie: Lion King, The (1994), Rating: 3.0
Movie: Toy Story (1995), Rating: 4.0
Movie: Mrs. Doubtfire (1993), Rating: 5.0
Movie: Aladdin (1992), Rating: 4.0
Movie: Home Alone (1990), Rating: 5.0
Movie: Homeward Bound: The Incredible Journey (1993), Rating: 5.0
Movie: Pinocchio (1940), Rating: 2.0
Movie: Star Wars (1977), Rating: 3.0
Movie: Fox and the Hound, The (1981), Rating: 2.0
Movie: Empire Strikes Back, The (1980), Rating: 4.0
Movie: Babe (1995), Rating: 5.0
Movie: Willy Wonka and the Chocolate Factory (1971), Rating: 2.0
Movie: Winnie the Pooh and the Blustery Day (1968), Rating: 2.0
Movie: Alice in Wonderland (1951), Rating: 1.0
Movie: Goofy Movie, A (1995), Rating: 4.0
Movi