In [1]:
import numpy as np
import pandas as pd

from sklearn.metrics.pairwise import cosine_similarity

In [176]:
data = pd.read_csv('/kaggle/input/dataset/Dataset.csv')[['item_id', 'user_id', 'rating']]
data

Unnamed: 0,item_id,user_id,rating
0,50,0,5
1,172,0,5
2,133,0,1
3,242,196,3
4,302,186,3
...,...,...,...
99998,476,880,3
99999,204,716,5
100000,1090,276,1
100001,225,13,2


In [279]:
data.groupby(by='item_id').rating.mean()

item_id
1       3.878319
2       3.206107
3       3.033333
4       3.550239
5       3.302326
          ...   
1678    1.000000
1679    3.000000
1680    2.000000
1681    3.000000
1682    3.000000
Name: rating, Length: 1682, dtype: float64

In [280]:
data.groupby(by='user_id').rating.mean()

user_id
0      3.666667
1      3.610294
2      3.709677
3      2.796296
4      4.333333
         ...   
939    4.265306
940    3.457944
941    4.045455
942    4.265823
943    3.410714
Name: rating, Length: 944, dtype: float64

In [171]:
metadata = pd.read_csv('/kaggle/input/dataset/Movie_Id_Titles.csv').set_index('item_id')
metadata

Unnamed: 0_level_0,title
item_id,Unnamed: 1_level_1
1,Toy Story (1995)
2,GoldenEye (1995)
3,Four Rooms (1995)
4,Get Shorty (1995)
5,Copycat (1995)
...,...
1678,Mat' i syn (1997)
1679,B. Monkey (1998)
1680,Sliding Doors (1998)
1681,You So Crazy (1994)


In [244]:
user = data[data.user_id == 0]
user

Unnamed: 0,item_id,user_id,rating
0,50,0,5
1,172,0,5
2,133,0,1


In [253]:
user = data[data.user_id == 344]
user

Unnamed: 0,item_id,user_id,rating
5255,479,344,4
5321,306,344,5
5423,278,344,3
5444,39,344,3
5521,462,344,2
...,...,...,...
90061,319,344,1
93602,715,344,4
98611,173,344,5
98920,756,344,2


In [245]:
movie = data[data.item_id == 50]
movie

Unnamed: 0,item_id,user_id,rating
0,50,0,5
502,50,290,5
860,50,79,4
1055,50,2,5
1093,50,8,5
...,...,...,...
99570,50,749,5
99646,50,886,5
99856,50,739,4
99931,50,903,5


In [251]:
((movie[movie.rating == 5].count() / 584) * 100)[0]

55.821917808219176

In [281]:
data[data.item_id == 5]

Unnamed: 0,item_id,user_id,rating
81,5,293,3
1461,5,43,4
1995,5,311,3
3311,5,109,3
10260,5,344,3
...,...,...,...
96917,5,643,3
97217,5,577,4
98214,5,267,3
99747,5,814,3


In [274]:
class Collaborative_Recommendation_System():
    """
    """
    
    def __init__(self, dataset, metadataset):
        """
        """
        if list(dataset.columns) != ['item_id', 'user_id', 'rating']:
            raise("Dataset columns should be ['item_id', 'user_id', 'rating']")
            pass
        if metadataset.index.name != 'item_id': raise("Meta Dataset index sould be item_id")
        
        self.data = dataset
        self.metadata = metadataset
        self.similarity_matrix = []
        
        if len(dataset.user_id.unique()) < len(dataset.item_id.unique()):
            self.similarity_matrix = self.similarity(
                data.pivot_table(values='rating', index='user_id', columns='item_id').fillna(0))
            pass
        else: 
            self.similarity_matrix = self.similarity(
                data.pivot_table(values='rating', index='item_id', columns='user_id').fillna(0))
            pass
        pass
    
    def similarity(self, pivot_table):
        return pd.DataFrame(data= cosine_similarity(pivot_table, pivot_table),
                            index= pivot_table.index, columns=pivot_table.index)

    def user_based(self, user_id, top_users=10, top_items=10):
        # get the similar users
        users = self.similarity_matrix[user_id].sort_values(ascending=False)[1: top_users+1]
        # get movies that user not watch and the similar users were waching to recommand them
        unseen_items = self.data[(self.data.user_id.apply(
            lambda x: x in users.index.values)) & (self.data.user_id != 2)]
        
        # Calculate the rating for the movies that the user is expected to give.
        # in case he watches it based on his closest users.
        items = []
        for item in unseen_items.item_id.unique():
            # watching percentage to to make popularity play role in recommendation.
            prec_watch = self.data[data.item_id == item].count()[0] / data.shape[0]
            item_ratings = unseen_items[unseen_items.item_id == item].set_index('user_id')
            # weighted rating, add the similarity score of user as facctor
            rating = ((item_ratings.rating * users[item_ratings.index]).sum() / users.sum())
            items.append([item, rating, rating * prec_watch]) 
            pass
              
        return pd.DataFrame(data= items, columns= ['item_id', 'rating', 'rating with watching percentage'])\
            .set_index('item_id').sort_values(by='rating', ascending=False)[:top_items].join(metadata)
    
    def item_based(self, item_id, top_items=10): 
        return pd.DataFrame(
            data= self.similarity_matrix[item_id].sort_values(ascending=False)\
            [1: top_items+1]).set_index('item_id').join(metadata)
        

In [275]:
rec = Collaborative_Recommendation_System(dataset=data, metadataset=metadata)

In [276]:
rec.user_based(0)

Unnamed: 0_level_0,rating,rating with watching percentage,title
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
50,4.795069,0.028002,Star Wars (1977)
172,4.795069,0.017645,"Empire Strikes Back, The (1980)"
181,3.690025,0.018708,Return of the Jedi (1983)
174,1.652027,0.006938,Raiders of the Lost Ark (1981)
210,1.472896,0.004875,Indiana Jones and the Last Crusade (1989)
258,1.455743,0.00741,Contact (1997)
64,1.299574,0.003678,"Shawshank Redemption, The (1994)"
127,1.278131,0.005279,"Godfather, The (1972)"
100,1.2611,0.006406,Fargo (1996)
234,1.200756,0.003362,Jaws (1975)


In [277]:
rec.user_based(1)

Unnamed: 0_level_0,rating,rating with watching percentage,title
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
50,5.0,0.029199,Star Wars (1977)
174,4.902232,0.020589,Raiders of the Lost Ark (1981)
172,4.801553,0.017669,"Empire Strikes Back, The (1980)"
98,4.703498,0.018343,"Silence of the Lambs, The (1991)"
176,4.698682,0.013344,Aliens (1986)
56,4.603402,0.018137,Pulp Fiction (1994)
96,4.590189,0.013541,Terminator 2: Judgment Day (1991)
181,4.494908,0.022789,Return of the Jedi (1983)
173,4.494727,0.014562,"Princess Bride, The (1987)"
183,4.493423,0.013075,Alien (1979)


In [230]:
rec.user_based(5)

Unnamed: 0_level_0,rating,title
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1
173,4.803076,"Princess Bride, The (1987)"
50,4.605782,Star Wars (1977)
168,4.508587,Monty Python and the Holy Grail (1974)
174,4.500854,Raiders of the Lost Ark (1981)
172,4.410465,"Empire Strikes Back, The (1980)"
181,4.405004,Return of the Jedi (1983)
153,4.398212,"Fish Called Wanda, A (1988)"
89,4.20195,Blade Runner (1982)
195,4.195862,"Terminator, The (1984)"
228,4.007674,Star Trek: The Wrath of Khan (1982)
