# Collaborative Filtering

In [16]:
import pandas as pd 
from scipy import sparse
from sklearn.metrics.pairwise import cosine_similarity

## Prepare Using Dummy Data

In [37]:
ratings = pd.read_csv('dataset/toy_ratings1.csv', index_col=0)
ratings

Unnamed: 0,action1,action2,action3,romantic1,romantic2,romantic3
user 1,4.0,5.0,3.0,,2.0,1.0
user 2,5.0,3.0,3.0,2.0,2.0,
user 3,1.0,,,4.0,5.0,4.0
user 4,,2.0,1.0,4.0,,3.0
user 5,1.0,,2.0,3.0,3.0,4.0


In [38]:
# ratings = dataFrame.pivot_table(values=['rating'], index='userId', columns=['movieId'])
ratings = ratings.fillna(0)
ratings.head()

Unnamed: 0,action1,action2,action3,romantic1,romantic2,romantic3
user 1,4.0,5.0,3.0,0.0,2.0,1.0
user 2,5.0,3.0,3.0,2.0,2.0,0.0
user 3,1.0,0.0,0.0,4.0,5.0,4.0
user 4,0.0,2.0,1.0,4.0,0.0,3.0
user 5,1.0,0.0,2.0,3.0,3.0,4.0


In [39]:
def standardize(row):
    new_row = (row - row.mean()) / (row.max() - row.min())
    return new_row

ratings_std = ratings.apply(standardize)
ratings_std.head()

## We are taking transpose since we need similarity between item which need to be in rows
item_similarity = cosine_similarity(ratings_std.T)
item_similarity_df = pd.DataFrame(item_similarity, index=ratings.columns, columns=ratings.columns)
item_similarity_df

Unnamed: 0,action1,action2,action3,romantic1,romantic2,romantic3
action1,1.0,0.706689,0.813682,-0.799411,-0.025392,-0.914106
action2,0.706689,1.0,0.723102,-0.845154,-0.518999,-0.843374
action3,0.813682,0.723102,1.0,-0.847946,-0.37998,-0.802181
romantic1,-0.799411,-0.845154,-0.847946,1.0,0.148039,0.723747
romantic2,-0.025392,-0.518999,-0.37998,0.148039,1.0,0.393939
romantic3,-0.914106,-0.843374,-0.802181,0.723747,0.393939,1.0


In [42]:
# Let's make some recommendation using one movie and user's rating to it

def get_similar_movie(movie_name, user_rating):
    # 2.5 is the mean as ratings can go 0 - 5
    similarity_scores = item_similarity_df[movie_name]*(user_rating - 2.5)
    similarity_scores = similarity_scores.sort_values(ascending=False)
    return similarity_scores


get_similar_movie('romantic3',1)
## If we give less rating to any movie it get listed at last

action1      1.371159
action2      1.265061
action3      1.203271
romantic2   -0.590909
romantic1   -1.085620
romantic3   -1.500000
Name: romantic3, dtype: float64

In [46]:
# In case user have rated multiple movies
action_lover = [("action1",5), ("romantic2", 1), ("romantic3", 1)]

similar_movies = pd.DataFrame()
for movie, rating in action_lover:
    similar_movies = similar_movies.append(item_similarity_df[movie] * (rating - 2.5), ignore_index=True)
print(similar_movies)
similar_movies.sum().sort_values(ascending=False)
    

    action1   action2   action3  romantic1  romantic2  romantic3
0  2.500000  1.766722  2.034204  -1.998527  -0.063480  -2.285265
1  0.038088  0.778499  0.569970  -0.222059  -1.500000  -0.590909
2  1.371159  1.265061  1.203271  -1.085620  -0.590909  -1.500000


action1      3.909247
action2      3.810282
action3      3.807445
romantic2   -2.154389
romantic1   -3.306206
romantic3   -4.376174
dtype: float64