# Collaborative-Filtering Based Recommandation

In [1]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split

In [2]:
movies = pd.read_csv("dataset/movie.csv")

In [3]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [4]:
rating = pd.read_csv('dataset/rating.csv')

In [5]:
rating.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,2,3.5,2005-04-02 23:53:47
1,1,29,3.5,2005-04-02 23:31:16
2,1,32,3.5,2005-04-02 23:33:39
3,1,47,3.5,2005-04-02 23:32:07
4,1,50,3.5,2005-04-02 23:29:40


In [6]:
rating.duplicated().sum()

0

In [7]:
rating['movieId'].duplicated().sum()

19973519

In [8]:
reader = Reader(rating_scale=(rating.rating.min(), rating.rating.max()))
reader

<surprise.reader.Reader at 0x23a73bc7a00>

In [9]:
data = Dataset.load_from_df(rating[['userId', 'movieId', 'rating']], reader)

In [10]:
data

<surprise.dataset.DatasetAutoFolds at 0x23a73bc6350>

##### using SVD Algorithm

In [11]:
trainset, _ = train_test_split(data, test_size=0.1, random_state=42)

In [12]:
trainset

<surprise.trainset.Trainset at 0x23a5c6ac340>

In [13]:
model = SVD(
    verbose=True,
    n_factors=100,
    n_epochs=20,
)

In [14]:
model.fit(trainset)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing epoch 11
Processing epoch 12
Processing epoch 13
Processing epoch 14
Processing epoch 15
Processing epoch 16
Processing epoch 17
Processing epoch 18
Processing epoch 19


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x23a94db85b0>

#### Predict Top-n movies

In [15]:
rating['movieId'].unique().sum()

1572269395

In [16]:
def C_recommand(user_id, top_n=10):
    all_movies_ids = rating['movieId'].unique()
    # print(len(all_movies_ids))
    # print(all_movies_ids)
    # print()
    
    rated_movies = rating[rating['userId'] == user_id]['movieId'].tolist()
    # print(len(rated_movies))
    # print(rated_movies)

    unseen_movies = [movies_id for movies_id in all_movies_ids if movies_id not in rated_movies]
    # print(len(unseen_movies))
    # print(unseen_movies)

    # predict rating for unseen movies
    prediction = []
    for movies_id in unseen_movies:
        pred = model.predict(user_id, movies_id)
        # print(pred)
        # print()
        prediction.append((movies_id, pred.est))

    prediction.sort(key=lambda x: x[1], reverse=True)
    # print(prediction)
    
    top_preds = prediction[:top_n]

    temp_df = pd.DataFrame(top_preds, columns=['movieId', 'estimated_rating'])


    return movies[movies['movieId'].isin(temp_df['movieId'])]['title'].tolist()
    

In [None]:
C_recommand(1)

In [18]:
movies[rating['userId'] == 1]['title'].tolist()

  movies[rating['userId'] == 1]['title'].tolist()


['Toy Story (1995)',
 'Jumanji (1995)',
 'Grumpier Old Men (1995)',
 'Waiting to Exhale (1995)',
 'Father of the Bride Part II (1995)',
 'Heat (1995)',
 'Sabrina (1995)',
 'Tom and Huck (1995)',
 'Sudden Death (1995)',
 'GoldenEye (1995)',
 'American President, The (1995)',
 'Dracula: Dead and Loving It (1995)',
 'Balto (1995)',
 'Nixon (1995)',
 'Cutthroat Island (1995)',
 'Casino (1995)',
 'Sense and Sensibility (1995)',
 'Four Rooms (1995)',
 'Ace Ventura: When Nature Calls (1995)',
 'Money Train (1995)',
 'Get Shorty (1995)',
 'Copycat (1995)',
 'Assassins (1995)',
 'Powder (1995)',
 'Leaving Las Vegas (1995)',
 'Othello (1995)',
 'Now and Then (1995)',
 'Persuasion (1995)',
 'City of Lost Children, The (Cité des enfants perdus, La) (1995)',
 'Shanghai Triad (Yao a yao yao dao waipo qiao) (1995)',
 'Dangerous Minds (1995)',
 'Twelve Monkeys (a.k.a. 12 Monkeys) (1995)',
 'Wings of Courage (1995)',
 'Babe (1995)',
 'Carrington (1995)',
 'Dead Man Walking (1995)',
 'Across the Sea of 

In [19]:
import pickle

In [20]:
pickle.dump(model, open('C_filtering_model.pkl', 'wb'))

In [21]:
pickle.dump(rating, open('C_rating.pkl', 'wb'))

In [22]:
movies = pd.read_csv('dataset/movie.csv')
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [23]:
pickle.dump(movies, open('C_movies.pkl', 'wb'))