# Collaborative-Filtering Based Recommandation

In [1]:
import pandas as pd
from surprise import Dataset, Reader, SVD, KNNBasic
from surprise.model_selection import train_test_split

In [13]:
movies = pd.read_csv("dataset/movie.csv")

In [14]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [2]:
rating = pd.read_csv('dataset/rating.csv')

In [3]:
rating.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,2,3.5,2005-04-02 23:53:47
1,1,29,3.5,2005-04-02 23:31:16
2,1,32,3.5,2005-04-02 23:33:39
3,1,47,3.5,2005-04-02 23:32:07
4,1,50,3.5,2005-04-02 23:29:40


In [4]:
rating.duplicated().sum()

0

In [5]:
rating['movieId'].duplicated().sum()

19973519

In [6]:
reader = Reader(rating_scale=(rating.rating.min(), rating.rating.max()))
reader

<surprise.reader.Reader at 0x227521bd750>

In [7]:
data = Dataset.load_from_df(rating[['userId', 'movieId', 'rating']], reader)

In [8]:
data

<surprise.dataset.DatasetAutoFolds at 0x227521be350>

##### using SVD Algorithm

In [9]:
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

In [10]:
trainset

<surprise.trainset.Trainset at 0x227521bf4f0>

In [11]:
testset

[(130202, 1393, 3.5),
 (24914, 62999, 4.0),
 (51507, 89804, 4.0),
 (69683, 7004, 2.5),
 (135504, 2657, 2.0),
 (15021, 616, 3.0),
 (76509, 593, 5.0),
 (46350, 3858, 1.0),
 (88827, 589, 3.0),
 (108172, 1954, 4.0),
 (48747, 1210, 5.0),
 (92781, 435, 5.0),
 (82789, 25977, 1.5),
 (71287, 4124, 2.0),
 (70391, 3017, 3.0),
 (71336, 368, 3.0),
 (21996, 837, 3.0),
 (22693, 61646, 3.5),
 (22674, 7254, 3.5),
 (35735, 3911, 4.5),
 (92822, 68358, 4.0),
 (35215, 1247, 5.0),
 (46926, 2324, 4.0),
 (24740, 2001, 3.0),
 (6624, 3703, 2.5),
 (83728, 1073, 5.0),
 (1155, 1722, 2.0),
 (39556, 8528, 3.5),
 (51827, 708, 4.0),
 (81487, 2555, 1.0),
 (114658, 590, 5.0),
 (17996, 6898, 3.0),
 (44633, 40815, 4.0),
 (98834, 2348, 3.0),
 (45553, 502, 2.0),
 (109287, 410, 2.0),
 (12973, 1641, 2.0),
 (127955, 293, 4.5),
 (110981, 150, 4.0),
 (110285, 533, 1.0),
 (26955, 1, 4.0),
 (21423, 8361, 2.5),
 (63787, 2067, 2.0),
 (15974, 596, 4.0),
 (6233, 1219, 4.5),
 (40617, 3398, 3.0),
 (21901, 7445, 2.0),
 (55848, 268, 4.0),

In [13]:
model = SVD(
    verbose=True,
    n_factors=100,
    n_epochs=20,
)

In [14]:
model.fit(trainset)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing epoch 11
Processing epoch 12
Processing epoch 13
Processing epoch 14
Processing epoch 15
Processing epoch 16
Processing epoch 17
Processing epoch 18
Processing epoch 19


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x227521beb00>

In [25]:
model.test(testset)

[Prediction(uid=130202, iid=1393, r_ui=3.5, est=3.771068216980255, details={'was_impossible': False}),
 Prediction(uid=24914, iid=62999, r_ui=4.0, est=3.980017799105638, details={'was_impossible': False}),
 Prediction(uid=51507, iid=89804, r_ui=4.0, est=3.8949408722012, details={'was_impossible': False}),
 Prediction(uid=69683, iid=7004, r_ui=2.5, est=2.5163149556391877, details={'was_impossible': False}),
 Prediction(uid=135504, iid=2657, r_ui=2.0, est=3.2123610246621355, details={'was_impossible': False}),
 Prediction(uid=15021, iid=616, r_ui=3.0, est=2.8201870398124673, details={'was_impossible': False}),
 Prediction(uid=76509, iid=593, r_ui=5.0, est=4.348133969574109, details={'was_impossible': False}),
 Prediction(uid=46350, iid=3858, r_ui=1.0, est=2.2574779064173405, details={'was_impossible': False}),
 Prediction(uid=88827, iid=589, r_ui=3.0, est=3.4672363703346445, details={'was_impossible': False}),
 Prediction(uid=108172, iid=1954, r_ui=4.0, est=4.225647297049734, details={'w

In [None]:
model.predict()

#### Predict Top-n movies

In [4]:
rating['movieId'].unique().sum()

1572269395

In [8]:
import pickle
model = pickle.load(open("C_filtering_model.pkl", "rb"))


In [41]:
def C_recommand(user_id, top_n=10):
    all_movies_ids = rating['movieId'].unique()
    # print(len(all_movies_ids))
    # print(all_movies_ids)
    # print()
    
    rated_movies = rating[rating['userId'] == user_id]['movieId'].tolist()
    # print(len(rated_movies))
    # print(rated_movies)

    unseen_movies = [movies_id for movies_id in all_movies_ids if movies_id not in rated_movies]
    # print(len(unseen_movies))
    # print(unseen_movies)

    # predict rating for unseen movies
    prediction = []
    for movies_id in unseen_movies:
        pred = model.predict(user_id, movies_id)
        # print(pred)
        # print()
        prediction.append((movies_id, pred.est))

    prediction.sort(key=lambda x: x[1], reverse=True)
    # print(prediction)
    
    top_preds = prediction[:top_n]

    temp_df = pd.DataFrame(top_preds, columns=['movieId', 'estimated_rating'])


    return movies[movies['movieId'].isin(temp_df['movieId'])]['title'].tolist()
    

In [39]:
C_recommand(1)

['Unknown Soldier, The (Tuntematon sotilas) (1955)',
 'Serenity (2005)',
 "Dr. Horrible's Sing-Along Blog (2008)",
 'Harry Potter and the Deathly Hallows: Part 1 (2010)',
 'Harry Potter and the Deathly Hallows: Part 2 (2011)',
 'Dylan Moran: Like, Totally (2006)',
 'Frozen Planet (2011)',
 'Tracks (2013)',
 'Zero Motivation (Efes beyahasei enosh) (2014)',
 'The Imitation Game (2014)']

In [37]:
movies[rating['userId'] == 1]['title'].tolist()

  movies[rating['userId'] == 1]['title'].tolist()


['Toy Story (1995)',
 'Jumanji (1995)',
 'Grumpier Old Men (1995)',
 'Waiting to Exhale (1995)',
 'Father of the Bride Part II (1995)',
 'Heat (1995)',
 'Sabrina (1995)',
 'Tom and Huck (1995)',
 'Sudden Death (1995)',
 'GoldenEye (1995)',
 'American President, The (1995)',
 'Dracula: Dead and Loving It (1995)',
 'Balto (1995)',
 'Nixon (1995)',
 'Cutthroat Island (1995)',
 'Casino (1995)',
 'Sense and Sensibility (1995)',
 'Four Rooms (1995)',
 'Ace Ventura: When Nature Calls (1995)',
 'Money Train (1995)',
 'Get Shorty (1995)',
 'Copycat (1995)',
 'Assassins (1995)',
 'Powder (1995)',
 'Leaving Las Vegas (1995)',
 'Othello (1995)',
 'Now and Then (1995)',
 'Persuasion (1995)',
 'City of Lost Children, The (Cité des enfants perdus, La) (1995)',
 'Shanghai Triad (Yao a yao yao dao waipo qiao) (1995)',
 'Dangerous Minds (1995)',
 'Twelve Monkeys (a.k.a. 12 Monkeys) (1995)',
 'Wings of Courage (1995)',
 'Babe (1995)',
 'Carrington (1995)',
 'Dead Man Walking (1995)',
 'Across the Sea of 

In [11]:
import pickle

In [20]:
pickle.dump(model, open('C_filtering_model.pkl', 'wb'))

In [21]:
pickle.dump(rating, open('C_rating.pkl', 'wb'))

In [22]:
movies = pd.read_csv('dataset/movie.csv')
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [23]:
pickle.dump(movies, open('C_movies.pkl', 'wb'))

In [24]:
movies = pickle.load(open("C_movies.pkl", "rb"))