In [1]:
import pandas as pd
from surprise import Reader, Dataset, SVD
from surprise.model_selection import cross_validate

class RecommendationSystem:
    def __init__(self, data):
        self.df = pd.read_csv(data)
        self.df = self.df.iloc[: , 1:]
        self.all_movies = self.df.movie.unique()
        self.model = None
        
    def fit(self):
        data = Dataset.load_from_df(self.df,Reader())
        trainset = data.build_full_trainset()
        
        self.model = SVD()
        self.model.fit(trainset)
        
    def recommend(self, user_id, topk=10):
        watched = self.df[self.df.userId == user_id].movie
        not_watched = [movie for movie in self.all_movies if movie not in watched]
        score = [self.model.predict(user_id, movie).est for movie in not_watched]
        
        result = pd.DataFrame({"movie" : not_watched, "pred_score": score})
        result.sort_values("pred_score", ascending=False, inplace=True)
        return result.head(topk)
        

In [2]:
df = pd.read_csv("collaborative_filtering.csv",)
df = df.iloc[: , 1:]
df

Unnamed: 0,userId,movie,rating
0,1,One Flew Over the Cuckoo's Nest (1975),5
1,1,James and the Giant Peach (1996),3
2,1,My Fair Lady (1964),3
3,1,Erin Brockovich (2000),4
4,1,"Bug's Life, A (1998)",5
...,...,...,...
1000204,6040,Weekend at Bernie's (1989),1
1000205,6040,"Crying Game, The (1992)",5
1000206,6040,Welcome to the Dollhouse (1995),5
1000207,6040,Sophie's Choice (1982),4


In [3]:
df.to_csv("collaborative_filtering.csv")

In [4]:
recsys = RecommendationSystem("collaborative_filtering.csv")
recsys.fit()

In [5]:
recsys.recommend(user_id=1)

Unnamed: 0,movie,pred_score
2617,Sanjuro (1962),4.920704
23,Schindler's List (1993),4.880068
420,Casablanca (1942),4.820502
648,Roman Holiday (1953),4.8006
693,It's a Wonderful Life (1946),4.776097
629,Rear Window (1954),4.764472
723,"Philadelphia Story, The (1940)",4.759327
661,Citizen Kane (1941),4.754352
70,Yojimbo (1961),4.736807
2231,Paths of Glory (1957),4.721966
