# Movie Recommendation System

### Part 2: Recommendation Models

In this project, I used Content-Based Filtering model, Collaborative Filtering and Hybrid models to recommend movies.

### 2-1. Data split

For cross validation, I split data. Also, I needed to define the function to filter items already reviewed as recommended items shouldn't be in train set.

In [15]:
rating_train_df, rating_test_df = train_test_split(rating_full_df, stratify =rating_full_df['person_id'], test_size=0.3)
rating_indexed_train = rating_train_df.set_index('person_id')
rating_indexed_test = rating_test_df.set_index('person_id')

In [16]:
def get_items_reviewed(person_id, rating_full_df):
    return rating_train_df[rating_train_df['person_id']==person_id].movie_id.tolist()

### 2-2. Content-Based Filtering Model

In [17]:
class CBF_Recommender:
    
    MODEL_NAME='Content-Based Filtering'
    
    def __init__(self, normalized_movies, rating_train_df):
        self.normalized_movies = normalized_movies
        self.rating_train_df = rating_train_df
    
    def get_model_name(self):
        return self.MODEL_NAME
    
    
    def recommend_items(self, person_id, topn):
        user_df = rating_train_df[rating_train_df['person_id']==person_id]
        user_norm = []
        
        for i in range(user_df.shape[0]):
            r = normalized_movies[user_df['movie_id']][i]*user_df['rating_norm'].values[i]
            user_norm.append(r)
        user_profile = np.sum(user_norm, axis=0)
        
        dists = np.dot(normalized_movies, user_profile)
        similar_items = np.argsort(dists)[-topn:]
        
        items_to_ignore = get_items_reviewed(person_id, rating_full_df)
        similar_items_filtered = list(filter(lambda x: x not in items_to_ignore, similar_items))
                
        recommend = [(x,dists[x]) for x in similar_items_filtered]
        cbf_recs_df = pd.DataFrame(recommend, columns=['movie_id','cbf_recStrength'])
        
        return cbf_recs_df.sort_values('cbf_recStrength', ascending=False)


In [18]:
cbf_rec = CBF_Recommender(normalized_movies, rating_train_df)
cbf_rec.recommend_items(0,10)

Unnamed: 0,movie_id,cbf_recStrength
8,8952,0.305348
7,8901,0.305338
6,7559,0.305255
5,1798,0.304793
4,7049,0.304742
3,6397,0.304657
2,5071,0.30456
1,9557,0.304554
0,6515,0.30454


### 2-3. Collaborative Filtering Model

In [36]:
NUMBER_OF_FACTORS=32

class CF_Recommender:
    
    MODEL_NAME = 'Collaborative Filtering'
    
    def __init__(self, normalized_movies, rating_train_df):
        self.normalized_movies = normalized_movies
        self.rating_train_df = rating_train_df
        self.all_prediction = self.get_user_item(rating_train_df)
        
    def get_model_name(self):
        return self.MODEL_NAME
    
    def get_user_item(self, rating_train_df):
        user_item = pd.pivot(rating_train_df, index='person_id', columns='movie_id', values='rating_norm').fillna(0)
        user_item_matrix = user_item.values
        U, s, Vt = svds(user_item_matrix, k=NUMBER_OF_FACTORS)
        sigma = np.diag(s)
        all_prediction = np.dot(np.dot(U, sigma), Vt)
        return all_prediction
    
    def recommend_items(self, person_id, topn):
        person_items = self.all_prediction[person_id]
        best_indices = np.argsort(person_items)[-topn:]
        items_to_ignore = get_items_reviewed(person_id, rating_train_df)
        
        recommended_filtered = list(filter(lambda x: x not in items_to_ignore, best_indices))
        recommend = [(x,person_items[x]) for x in recommended_filtered]
        
        cf_recs_df = pd.DataFrame(recommend, columns=['movie_id','cf_recStrength'])
        
        return cf_recs_df.sort_values('cf_recStrength', ascending=False)
    

In [38]:
cf_rec = CF_Recommender(normalized_movies, rating_full_df)
cf_rec.recommend_items(2,10)

Unnamed: 0,movie_id,cf_recStrength
8,13,0.000796
7,2241,0.000734
6,252,0.000645
5,1082,0.000471
4,81,0.00044
3,6946,0.000423
2,880,0.000386
1,1619,0.000381
0,8848,0.000377


### 2-4. Hybrid Model

In [63]:
class Hybrid_Recommender:
    
    MODEL_NAME = 'Hybrid Model'
    
    
    def __init__(self, cf_rec_model, cbf_rec_model):
        self.cf_rec_model = cf_rec_model
        self.cbf_rec_model = cbf_rec_model
        
    def get_model_name(self):
        return self.MODEL_NAME
        
    def recommend_items(self, person_id, topn):
        
        cf_recs_df = self.cf_rec_model.recommend_items(person_id, 4000)
        cbf_recs_df = self.cbf_rec_model.recommend_items(person_id, 4000)

        
        hybrid_df = cf_recs_df.merge(cbf_recs_df, on='movie_id', how='inner')
        hybrid_df['hb_recStrength'] = hybrid_df['cf_recStrength']*hybrid_df['cbf_recStrength']
        
        recommended_items = hybrid_df.sort_values('hb_recStrength', ascending=False)[:10]
        items_to_ignore = get_items_reviewed(person_id, rating_train_df)
        
        recommended_filtered = recommended_items[~recommended_items.movie_id.isin(items_to_ignore)]

        return recommended_filtered[['hb_recStrength','movie_id']]

In [65]:
hb_rec = Hybrid_Recommender(cf_rec, cbf_rec)
hb_rec.recommend_items(0,1000)

Unnamed: 0,hb_recStrength,movie_id
0,0.000187,4873
1,0.00014,7578
2,0.000115,3896
4,8.5e-05,8296
3,8.4e-05,2611
5,8.3e-05,9778
7,8.3e-05,7554
6,8.3e-05,6795
9,7.8e-05,7985
8,7.7e-05,5829
