# Importing Libraries & Data

In [None]:
# !pip3 install surprise

import pandas as pd
from surprise import Reader, Dataset, SVD
from surprise.model_selection import cross_validate

Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Collecting scikit-surprise
  Downloading scikit-surprise-1.1.1.tar.gz (11.8 MB)
[K     |████████████████████████████████| 11.8 MB 4.1 MB/s 
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.1-cp37-cp37m-linux_x86_64.whl size=1630164 sha256=c6544752ee103f50156a68c8ac40e0b66f3bbb512af5e5a7bf1c0c34bf071d9e
  Stored in directory: /root/.cache/pip/wheels/76/44/74/b498c42be47b2406bd27994e16c5188e337c657025ab400c1c
Successfully built scikit-surprise
Installing collected packages: scikit-surprise, surprise
Successfully installed scikit-surprise-1.1.1 surprise-0.1


In [None]:
ratings = pd.read_csv('https://raw.githubusercontent.com/nchichilidze/RS-with-GE/main/preprocessed_movielens_1m/ratings.csv')
movies = pd.read_csv('https://raw.githubusercontent.com/nchichilidze/RS-with-GE/main/the_movies_dataset/movies_filled.csv')
users = pd.read_csv('https://raw.githubusercontent.com/nchichilidze/RS-with-GE/main/preprocessed_movielens_1m/users.csv')
ratings = ratings.iloc[: , 1:]
users = users.iloc[: , 1:]

In [None]:
main = ratings.copy(deep=True)

## Feature 1: Content Based Filtering Based Recommendation

I have generated 50 similar movies for each movie in the dataframe. I am setting each rating's CBF label to 1 if this movie would be recommended to the user based on their favourite (positively rated) movies, and setting it to 0 if CBF would not have recommended this movie to the user. 

In [None]:
cbf_recommendations = pd.read_csv('https://raw.githubusercontent.com/nchichilidze/RS-with-GE/main/content_based_filtering/cbf_recommendations.csv')

In [None]:
cbf_recommendations = cbf_recommendations.astype({'CBF_recommendations': 'string', 'user': 'int64'})

In [None]:
cbf_recommendations = cbf_recommendations.drop(columns = 'favourite_movies')

In [None]:
cbf_recommendations.head()

Unnamed: 0,user,CBF_recommendations
0,1,"[905, 1940, 1958, 1961, 590, 1942, 1944, 1939,..."
1,2,"[654, 25, 1733, 2885, 2415, 469, 779, 2969, 13..."
2,3,"[1220, 643, 651, 856, 2923, 3617, 2795, 1001, ..."
3,4,"[2474, 770, 824, 826, 1155, 1666, 1234, 1519, ..."
4,5,"[1198, 589, 480, 1200, 260, 2099, 356, 1339, 3..."


In [None]:
main['CBF'] = 0

for idx, row in main.iterrows():

  user_id = int(row['user_id'])
  movie_id = str(row['movie_id'])

  cbf = cbf_recommendations[cbf_recommendations['user'] == user_id]['CBF_recommendations']
  cbf = str(cbf)

  id_occurances = [', ' + movie_id, '[' + movie_id]

  if (', ' + movie_id) in cbf or ('[' + movie_id) in cbf: 
    main.at[idx, 'CBF'] = 1


In [None]:
main['CBF'].value_counts()

0    982147
1     18062
Name: CBF, dtype: int64

## Feature 2: Collaborative Filtering Based Prediction

Using the SVD Collaborative filtering algorithm, I'm generating a predicted rating for each rating in the dataframe

In [None]:
reader = Reader()
data = Dataset.load_from_df(ratings[['user_id', 'movie_id', 'rating']], reader)

svd = SVD()

cross_validate(svd, data, measures=['RMSE', 'MAE'], cv = 10, verbose=True)

trainset = data.build_full_trainset()
svd.fit(trainset)
testset = trainset.build_anti_testset()
predictions = svd.test(testset)

def predict_rating_CF(user_id, movie_id, true_rating): 
  return svd.predict(user_id, movie_id, true_rating)


main['CF'] = 0

for index, row in main.iterrows():
    user = row['user_id']
    movie = row['movie_id']
    rating = row['rating']
    predicted_rating = predict_rating_CF(user, movie, rating)
    predicted_rating = predicted_rating[3]
    
    main.at[index,'CF'] = predicted_rating

Evaluating RMSE, MAE of algorithm SVD on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.8684  0.8674  0.8648  0.8649  0.8636  0.8669  0.8623  0.8666  0.8695  0.8688  0.8663  0.0022  
MAE (testset)     0.6806  0.6801  0.6789  0.6780  0.6779  0.6800  0.6763  0.6807  0.6815  0.6813  0.6795  0.0016  
Fit time          62.42   55.50   54.90   54.62   55.05   54.81   55.08   54.68   54.97   54.55   55.66   2.27    
Test time         1.67    1.25    1.17    1.16    1.68    1.20    1.13    1.14    1.16    1.29    1.28    0.20    


In [None]:
main.head()

Unnamed: 0,user_id,movie_id,rating,CBF,CF
0,1,1193,5,0,4
1,1,661,3,0,3
2,1,914,3,0,4
3,1,3408,4,0,4
4,1,2355,5,0,4


In [None]:
main.to_csv('main.csv', index=False)