# User Based Recommender

    1. Preparation of Data Set

    2. Determining the Movies Watched by the User to Make a Suggestion

    3. Accessing Data and Ids of Other Users Watching the Same Movies

    4. Identifying Users with the Most Similar Behaviors to the User to Suggest

    5. Calculating the Weighted Average Recommendation Score

## Import Necessary Libraries

In [1]:
import numpy as np
import pandas as pd
pd.set_option("display.max_columns",None)
pd.set_option("display.width",500)

## Create the user_movie_df

In [2]:
def create_user_mvoie_df():
    movie = pd.read_csv("C://Users//Tohid//Downloads//recommender_systems//datasets//movie_lens_dataset//movie.csv")
    rating = pd.read_csv("C://Users//Tohid//Downloads//recommender_systems//datasets//movie_lens_dataset//rating.csv")
    df = movie.merge(rating,how="left",on="movieId")
    comment_counts = pd.DataFrame(df["title"].value_counts())
    rare_movies = comment_counts[comment_counts["title"]<=10000].index
    common_movies = df[~df["title"].isin(rare_movies)]
    user_movie_df = common_movies.pivot_table(index=["userId"],columns=["title"],values=["rating"])
    return user_movie_df

In [3]:
user_movie_df = create_user_mvoie_df()

## Create User Based Recommender

In [19]:
def user_based_recommender(random_user, user_movie_df, ratio=60, corr_th=0.65, score=3.5):
    random_user_df = user_movie_df[user_movie_df.index==random_user]
    movies_watched = random_user_df.columns[random_user_df.notna().any()].tolist()
    movies_watched_df = user_movie_df[movies_watched]
    user_movie_count = movies_watched_df.T.notnull().sum()
    user_movie_count = user_movie_count.reset_index()
    user_movie_count.columns = ["userId","movie_count"]
    prec = len(movies_watched)*ratio/100
    user_same_movies = user_movie_count[user_movie_count["movie_count"]>prec]["userId"]
    final_df = pd.concat([movies_watched_df[movies_watched_df.index.isin(user_same_movies)],random_user_df[movies_watched]])
    corr_df = final_df.T.corr().unstack().sort_values().drop_duplicates()
    corr_df = pd.DataFrame(corr_df,columns=["corr"])
    corr_df.index.names = ["user_id_1","user_id_2"]
    corr_df = corr_df.reset_index()
    top_users = corr_df[(corr_df["user_id_1"]==random_user) & (corr_df["corr"]>=corr_th)][["user_id_2","corr"]].reset_index(drop=True)
    top_users = top_users.sort_values(by="corr",ascending=False)
    top_users.rename(columns={"user_id_2":"userId"},inplace=True)
    rating = pd.read_csv("C://Users//Tohid//Downloads//recommender_systems//datasets//movie_lens_dataset//rating.csv")
    top_users_ratings = top_users.merge(rating[["userId","movieId","rating"]],how="inner")
    top_users_ratings["weighted_rating"] = top_users_ratings["corr"] * top_users_ratings["rating"]
    recommendation_df = top_users_ratings.groupby(["movieId"]).agg({"weighted_rating":"mean"})
    recommendation_df = recommendation_df.reset_index()
    movies_to_be_rocommend = recommendation_df[recommendation_df["weighted_rating"]>score].sort_values("weighted_rating",ascending=False)
    movie = pd.read_csv("C://Users//Tohid//Downloads//recommender_systems//datasets//movie_lens_dataset//movie.csv")
    return movies_to_be_rocommend.merge(movie[["movieId","title"]])

## An example of a movie recommendation system

In [20]:
random_user = int(pd.Series(user_movie_df.index).sample(1).values)

In [21]:
user_based_recommender(random_user,user_movie_df)

Unnamed: 0,movieId,weighted_rating,title
0,2280,5.0,Clay Pigeons (1998)
1,3865,5.0,"Original Kings of Comedy, The (2000)"
2,81,4.0,Things to Do in Denver When You're Dead (1995)
3,1620,4.0,Kiss the Girls (1997)
4,1674,4.0,Witness (1985)
5,1711,4.0,Midnight in the Garden of Good and Evil (1997)
6,1717,4.0,Scream 2 (1997)
7,1960,4.0,"Last Emperor, The (1987)"
8,3146,4.0,Deuce Bigalow: Male Gigolo (1999)
9,3424,4.0,Do the Right Thing (1989)
