<h1 style="color: blue; font-style: italic; font-family: sans-serif; text-align: center;">Revolutionizing Recommendations as a Personalization Strategy:<p style="color:brown; text-align: center;">Deep Learning - Neural Network Weight</p></h1>

### [Article: Revolutionizing Recommendations as a Personalization Strategy: Deep Learning](https://medium.com/@shukla.shankar.ravi/revolutionizing-recommendations-as-a-personalization-strategy-deep-learning-6b7d33804eb9)

# Deep Learning - Neural Network Weight

In [1]:
import numpy as np
import pandas as pd
import random

import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.utils import plot_model

import warnings
warnings.filterwarnings("ignore")
random.seed(9973)

## Step 1. Load the datasets

In [2]:
#https://grouplens.org/datasets/movielens/
movies_df = pd.read_csv('./data/movies.csv')  # movieId, title, genres
ratings_df = pd.read_csv('./data/ratings.csv')  # userId, movieId, rating, timestamp
tags_df = pd.read_csv('./data/tags.csv')  # userId, movieId, tag, timestamp


## Step 2. Data Preprocessing

In [3]:
# Reindex userId and movieId to start from 0
ratings_df['userId'] = pd.factorize(ratings_df['userId'])[0]
ratings_df['movieId'] = pd.factorize(ratings_df['movieId'])[0]
tags_df['userId'] = pd.factorize(tags_df['userId'])[0]
tags_df['movieId'] = pd.factorize(tags_df['movieId'])[0]

# Get unique values for number of users and number of movies
num_users = ratings_df['userId'].nunique()
num_movies = ratings_df['movieId'].nunique()

# Incorporating Time-based Weightage for ratings
# Convert timestamp to datetime format
ratings_df['timestamp'] = pd.to_datetime(ratings_df['timestamp'], unit='s')

# Calculate the weightage based on recency (more recent ratings should have higher weight)
ratings_df['weight'] = (ratings_df['timestamp'].max() - ratings_df['timestamp']).dt.days + 1
ratings_df['weight'] = 1 / (ratings_df['weight'])  # Inverse weighting: recent ratings get higher weight

# Prepare input and target data for training
user_ids = ratings_df['userId'].values
movie_ids = ratings_df['movieId'].values
ratings = ratings_df['rating'].values
weights = ratings_df['weight'].values

# Incorporate Tags: Most frequent tags for each movie
# For simplicity, we'll take the most frequent tag for each movie and use it as a categorical feature
tag_counts = tags_df.groupby('movieId')['tag'].value_counts().reset_index(name='count')
tag_counts = tag_counts.loc[tag_counts.groupby('movieId')['count'].idxmax()]  # Keep the most frequent tag

# Merge tags with ratings dataset
ratings_with_tags = ratings_df.merge(tag_counts[['movieId', 'tag']], on='movieId', how='left')

# Encode tags
tag_encoder = LabelEncoder()
ratings_with_tags['tag_encoded'] = tag_encoder.fit_transform(ratings_with_tags['tag'].fillna('Unknown'))

# Prepare tags as additional input for the model
tags = ratings_with_tags['tag_encoded'].values


## Step 3. Design Neural Network Architectures

In [4]:
# Model Definition (Using Keras Sequential)

embedding_size = 50  # Size of the embedding vectors for users and movies

# Define inputs
user_input = layers.Input(shape=(1,), name='user')
movie_input = layers.Input(shape=(1,), name='movie')
tag_input = layers.Input(shape=(1,), name='tag')  # Input for tags

# Embedding layers for users and movies
user_embedding = layers.Embedding(input_dim=num_users, output_dim=embedding_size, input_length=1)(user_input)
movie_embedding = layers.Embedding(input_dim=num_movies, output_dim=embedding_size, input_length=1)(movie_input)

# Flatten the embeddings
user_vec = layers.Flatten()(user_embedding)
movie_vec = layers.Flatten()(movie_embedding)

# Tag embedding
tag_embedding = layers.Embedding(input_dim=len(tag_encoder.classes_), output_dim=embedding_size, input_length=1)(tag_input)
tag_vec = layers.Flatten()(tag_embedding)

# Concatenate the user, movie, and tag embeddings
concat = layers.Concatenate()([user_vec, movie_vec, tag_vec])

# Add a fully connected network (MLP)
fc1 = layers.Dense(128, activation='relu')(concat)
fc2 = layers.Dense(64, activation='relu')(fc1)
output = layers.Dense(1)(fc2)  # Single output to predict the rating

# Build the model
model = models.Model(inputs=[user_input, movie_input, tag_input], outputs=output)

# Compile the model with Adam optimizer and MSE loss function
model.compile(optimizer=Adam(), loss='mse', weighted_metrics=['accuracy'])

# Visualize Model
#plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)


## Step 4.  Train the Model

In [5]:

# Split data into training and testing sets
train_user_ids, test_user_ids, train_movie_ids, test_movie_ids, train_tags, test_tags, train_ratings, test_ratings, train_weights, test_weights = train_test_split(
    user_ids, movie_ids, tags, ratings, weights, test_size=0.2, random_state=42
)

# Train the model
history = model.fit(
    [train_user_ids, train_movie_ids, train_tags], train_ratings,
    epochs=10,
    batch_size=64,
    sample_weight=train_weights,  # Use the weightage of ratings during training
    validation_data=([test_user_ids, test_movie_ids, test_tags], test_ratings, test_weights)
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Step 5. Generate Recommendations for a user

In [6]:

def recommend_movies_for_user(user_id, model, num_recommendations=10):
    """
    Recommend top N movies for a given user based on predicted ratings from a trained model.

    Args:
        user_id (int): ID of the user to recommend movies to.
        model (keras.Model): Trained Keras model that predicts user-movie ratings.
        num_recommendations (int): Number of top recommendations to return.

    Returns:
        pd.DataFrame: DataFrame containing recommended movie IDs and titles.
    """
    
    # Generate an array of all possible movie IDs (e.g., from 0 to num_movies-1)
    all_movie_ids = np.arange(num_movies)  # All movie IDs
    
    # Generate predicted ratings for the user and all movies
    # Inputs: [user_id repeated for each movie, movie IDs, dummy input (e.g., zeros for tag or context)]
    predicted_ratings = model.predict([np.array([user_id] * num_movies), all_movie_ids, np.zeros(num_movies)])
    
    # Sort movies based on predicted ratings (in descending order)
    recommended_movie_ids = np.argsort(predicted_ratings.flatten())[::-1][:num_recommendations]
    
    # Get movie details for the recommended movie IDs
    recommended_movies = movies_df.iloc[recommended_movie_ids]
    
    # Return just the movie ID and title for display
    return recommended_movies[['movieId', 'title']]

In [7]:
# Example: Get top 10 recommended movies for a user (say user_id=1)
user_id = 1
recommended_movies = recommend_movies_for_user(user_id, model, num_recommendations=10)

print(f"Top 10 recommended movies for User {user_id}:\n")
print(recommended_movies)

Top 10 recommended movies for User 1:

      movieId                                            title
7659    88672                         Our Idiot Brother (2011)
2872     3840                               Pumpkinhead (1988)
3537     4835                     Coal Miner's Daughter (1980)
6514    53894                                     Sicko (2007)
4748     7072                                Stagecoach (1939)
8135   101741                                    Trance (2013)
258       298                  Pushing Hands (Tui shou) (1992)
5240     8588                         Killing Me Softly (2002)
4750     7074                            Navigator, The (1924)
7678    89118  Skin I Live In, The (La piel que habito) (2011)
