In [1]:
import pandas as pd
df = pd.read_csv('movies.csv')
df.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [2]:
df1 = pd.read_csv('ratings.csv')
df1.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,16,4.0,1217897793
1,1,24,1.5,1217895807
2,1,32,4.0,1217896246
3,1,47,4.0,1217896556
4,1,50,4.0,1217896523


# Merging the two dataset together 

In [3]:
merged_df = pd.merge(df,df1, on = 'movieId', how = 'inner')
merged_df.head()

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,2,5.0,859046895
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5,4.0,1303501039
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,8,5.0,858610933
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,11,4.0,850815810
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,14,4.0,851766286


In [4]:
merged_df.shape

(105339, 6)

In [5]:
merged_df['title'].nunique()

10323

In [6]:
merged_df['genres'].value_counts()

Drama                                      7678
Comedy                                     6676
Comedy|Romance                             3733
Drama|Romance                              3407
Comedy|Drama                               3101
                                           ... 
Adventure|Drama|Romance|Sci-Fi|Thriller       1
Action|Comedy|Drama|Horror                    1
Comedy|Crime|Drama|Film-Noir|Thriller         1
Adventure|Children|Fantasy|Western            1
Action|Fantasy|Mystery                        1
Name: genres, Length: 938, dtype: int64

In [7]:
merged_df.isnull().sum()

movieId      0
title        0
genres       0
userId       0
rating       0
timestamp    0
dtype: int64

In [8]:
merged_df.describe()

Unnamed: 0,movieId,userId,rating,timestamp
count,105339.0,105339.0,105339.0,105339.0
mean,13381.312477,364.924539,3.51685,1130424000.0
std,26170.456869,197.486905,1.044872,180266000.0
min,1.0,1.0,0.5,828565000.0
25%,1073.0,192.0,3.0,971100800.0
50%,2497.0,383.0,3.5,1115154000.0
75%,5991.0,557.0,4.0,1275496000.0
max,149532.0,668.0,5.0,1452405000.0


In [9]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 105339 entries, 0 to 105338
Data columns (total 6 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   movieId    105339 non-null  int64  
 1   title      105339 non-null  object 
 2   genres     105339 non-null  object 
 3   userId     105339 non-null  int64  
 4   rating     105339 non-null  float64
 5   timestamp  105339 non-null  int64  
dtypes: float64(1), int64(3), object(2)
memory usage: 5.6+ MB


# Objective: 1 

Create a popularity-basedrecommender system at a genre level. The userwill input a genre (g), minimum ratingthreshold (t) for a movie,and no. ofrecommendations(N) for which it should be recommended top N movies which are most popular within that genre (g) ordered by ratings in descending order where each movie has at least (t) reviews.Example:Input:•Genre (g) : Comedy•Minimumreviewsthreshold(t):100•Num recommendations (N) : 5

In [10]:
#Take user input for genre (g)
genre = input("Enter the genre: ")

# Take user input for minimum review threshold (t)
min_reviews_threshold = int(input("Enter the minimum review threshold: "))

#  Take user input for the number of recommendations (N)
N = int(input("Enter the number of recommendations (N): "))

# Filter the DataFrame based on the specified genre (g)
genre_df = merged_df[merged_df['genres'].str.contains(genre, case=False)]

# Filter movies based on the minimum review threshold (t)
popular_movies = genre_df[genre_df['userId'] >= min_reviews_threshold]

# Sort the remaining movies by ratings in descending order
sorted_movies = popular_movies.sort_values(by='rating', ascending=False)

# Get the top N recommendations
top_recommendations = sorted_movies.head(N)

# Step 8: Display the recommendations
print(f'\nTop {N} recommended movies in the "{genre}" genre with at least {min_reviews_threshold} reviews:')
print(top_recommendations[['movieId', 'title', 'rating', 'userId']])


Enter the genre: comedy
Enter the minimum review threshold: 300
Enter the number of recommendations (N): 5

Top 5 recommended movies in the "comedy" genre with at least 300 reviews:
       movieId                                       title  rating  userId
105          1                            Toy Story (1995)     5.0     303
50004     2324  Life Is Beautiful (La Vita è bella) (1997)     5.0     646
50003     2324  Life Is Beautiful (La Vita è bella) (1997)     5.0     632
50002     2324  Life Is Beautiful (La Vita è bella) (1997)     5.0     631
49999     2324  Life Is Beautiful (La Vita è bella) (1997)     5.0     605


In [11]:
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import pandas as pd

# Assuming 'merged_df' is your merged DataFrame

# Step 1: Create a Dash web application
app = dash.Dash(__name__)

# Step 2: Define the layout of the dashboard
app.layout = html.Div([
    html.H1("Movie Recommendations Dashboard"),
    
    # Input components
    html.Label("Select Genre:"),
    dcc.Input(id='genre-input', type='text', value='Comedy'),
    
    html.Label("Minimum Review Threshold:"),
    dcc.Input(id='min-reviews-input', type='number', value=100),
    
    html.Label("Number of Recommendations:"),
    dcc.Input(id='num-recommendations-input', type='number', value=5),
    
    # Output component to display recommendations
    html.Div(id='output-recommendations'),
])

# Step 3: Define the callback to update recommendations based on user input
@app.callback(
    Output('output-recommendations', 'children'),
    [Input('genre-input', 'value'),
     Input('min-reviews-input', 'value'),
     Input('num-recommendations-input', 'value')]
)
def update_recommendations(genre, min_reviews_threshold, N):
    genre_df = merged_df[merged_df['genres'].str.contains(genre, case=False)]
    popular_movies = genre_df[genre_df['userId'] >= min_reviews_threshold]
    sorted_movies = popular_movies.sort_values(by='rating', ascending=False)
    top_recommendations = sorted_movies.head(N)
    
    recommendations_table = html.Table(
        # Header
        [html.Tr([html.Th(col) for col in top_recommendations.columns])] +
        # Body
        [html.Tr([html.Td(top_recommendations.iloc[i][col]) for col in top_recommendations.columns]) for i in range(len(top_recommendations))]
    )
    
    return recommendations_table

# Step 4: Run the application
if __name__ == '__main__':
    app.run_server(debug=True)


The dash_core_components package is deprecated. Please replace
`import dash_core_components as dcc` with `from dash import dcc`
  import dash_core_components as dcc
The dash_html_components package is deprecated. Please replace
`import dash_html_components as html` with `from dash import html`
  import dash_html_components as html


In [12]:


# Take user input for genre (g)
genre = input("Enter the genre: ")

# Take user input for minimum review threshold (t)
min_reviews_threshold = int(input("Enter the minimum review threshold: "))

# Take user input for the number of recommendations (N)
N = int(input("Enter the number of recommendations (N): "))

# Filter the DataFrame based on the specified genre (g)
genre_df = merged_df[merged_df['genres'].str.contains(genre, case=False)]

# Filter movies based on the minimum review threshold (t)
popular_movies = genre_df[genre_df['userId'] >= min_reviews_threshold]

# Sort the remaining movies by ratings in descending order
sorted_movies = popular_movies.sort_values(by='rating', ascending=False)

# Get the top N recommendations
top_recommendations = sorted_movies.head(N)

# Display the recommendations
print(f'\nTop {N} recommended movies in the "{genre}" genre with at least {min_reviews_threshold} reviews:')
print(top_recommendations[['movieId', 'title', 'rating', 'userId']])


Enter the genre: action
Enter the minimum review threshold: 500
Enter the number of recommendations (N): 5

Top 5 recommended movies in the "action" genre with at least 500 reviews:
       movieId                                   title  rating  userId
36279     1374  Star Trek II: The Wrath of Khan (1982)     5.0     560
20805      736                          Twister (1996)     5.0     572
20799      736                          Twister (1996)     5.0     552
97720    68205              Crank: High Voltage (2009)     5.0     599
20678      733                        Rock, The (1996)     5.0     664


In [14]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors


# Step 1: Create a user-item matrix
user_item_matrix = merged_df.pivot(index='userId', columns='movieId', values='rating').fillna(0)

# Step 2: Create a similarity matrix between users using cosine similarity
user_similarity = cosine_similarity(user_item_matrix)

# Step 3: Create a Nearest Neighbors model
k_neighbors = 10  # Adjust the number of neighbors 'K' as needed
model_knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=k_neighbors)
model_knn.fit(user_item_matrix)

# Step 4: Function to get movie recommendations based on similar users
def get_movie_recommendations(user_id, N=5):
    # Find K similar users
    user_index = user_item_matrix.index.get_loc(user_id)
    distances, indices = model_knn.kneighbors(user_item_matrix.iloc[user_index].values.reshape(1, -1))

    # Get the movies the similar users have liked
    similar_users_movies = user_item_matrix.iloc[indices.flatten()]

    # Aggregate ratings from similar users
    aggregated_ratings = similar_users_movies.mean(axis=0)

    # Filter out movies the target user has already rated
    target_user_ratings = user_item_matrix.loc[user_id]
    unrated_movies = target_user_ratings[target_user_ratings == 0].index

    # Sort and get the top N recommendations
    recommendations = aggregated_ratings[unrated_movies].sort_values(ascending=False).head(N)

    return recommendations

# Step 5: Get user input for the target user (u)
target_user_id = int(input("Enter the target user ID (u): "))

# Step 6: Get recommendations based on similar users
recommendations = get_movie_recommendations(target_user_id, N=5)

# Step 7: Display recommendations
print(f"\nTop 5 movie recommendations for user {target_user_id}:")
print(recommendations)


Enter the target user ID (u): 200

Top 5 movie recommendations for user 200:
movieId
590    2.85
150    2.65
110    2.45
349    2.10
592    1.90
dtype: float64
