<a href="https://colab.research.google.com/github/myrondza/Data-Science-Machine-Learning-Deep-Learning-AI-Guide-Algorithms/blob/master/Recommendation_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Recommendation System

In [0]:
import numpy as np
import pandas

#Class for Popularity based Recommender System model
class Recommender():
    def __init__(self):
        self.train_data = None
        self.user_id = None
        self.item_id = None
        self.popularity_recommendations = None
        
    #Create the popularity based recommender system model
    def create(self, train_data, user_id, item_id):
        self.train_data = train_data
        self.user_id = user_id
        self.item_id = item_id
        #Get a count of user_ids for each unique item as recommendation score
        train_data_grouped = train_data.groupby([self.item_id]).agg({self.user_id: 'count'}).reset_index()
        train_data_grouped.rename(columns = {'user_id': 'score'},inplace=True)
        #Sort the items based upon recommendation score
        train_data_sort = train_data_grouped.sort_values(['score', self.item_id], ascending = [0,1])
        #Generate a recommendation rank based upon score
        train_data_sort['Rank'] = train_data_sort['score'].rank(ascending=0, method='first')
        #Get the top 10 recommendations
        self.popularity_recommendations = train_data_sort.head(10)

    #Use the popularity based recommender system model to make recommendations
    
    def recommend(self, user_id):    
        user_recommendations = self.popularity_recommendations
        user_recommendations['user_id'] = user_id
        cols = user_recommendations.columns.tolist()
        cols = cols[-1:] + cols[:-1]
        user_recommendations = user_recommendations[cols]
        return user_recommendations
    

#Class for Item similarity based Recommender System model
class item_similarity_recommender_py():
    def __init__(self):
        self.train_data = None
        self.user_id = None
        self.item_id = None
        self.cooccurence_matrix = None
        self.items_dict = None
        self.rev_items_dict = None
        self.item_similarity_recommendations = None
        
    #Get unique items (items) corresponding to a given user
    def get_user_items(self, user):
        user_data = self.train_data[self.train_data[self.user_id] == user]
        user_items = list(user_data[self.item_id].unique())        
        return user_items
        
    #Get unique users for a given item (item)
    def get_item_users(self, item):
        item_data = self.train_data[self.train_data[self.item_id] == item]
        item_users = set(item_data[self.user_id].unique())       
        return item_users
        
    #Get unique items (items) in the data
    def get_all_items_train_data(self):
        all_items = list(self.train_data[self.item_id].unique())    
        return all_items
        
    #Construct cooccurence matrix
    def construct_cooccurence_matrix(self, user_items, all_items):  
        user_items_users = []        
        for i in range(0, len(user_items)):
            user_items_users.append(self.get_item_users(user_items[i]))

        cooccurence_matrix = np.matrix(np.zeros(shape=(len(user_items), len(all_items))), float)

        for i in range(0,len(all_items)):
            items_i_data = self.train_data[self.train_data[self.item_id] == all_items[i]]
            users_i = set(items_i_data[self.user_id].unique())
            for j in range(0,len(user_items)):       
                users_j = user_items_users[j]
                users_intersection = users_i.intersection(users_j)
                if len(users_intersection) != 0:
                    users_union = users_i.union(users_j)
                    cooccurence_matrix[j,i] = float(len(users_intersection))/float(len(users_union))
                else:
                    cooccurence_matrix[j,i] = 0
        return cooccurence_matrix

    
    #Use the cooccurence matrix to make top recommendations
    
    def generate_top_recommendations(self, user, cooccurence_matrix, all_items, user_items):
        print("Non zero values in cooccurence_matrix :%d" % np.count_nonzero(cooccurence_matrix))
        
        user_sim_scores = cooccurence_matrix.sum(axis=0)/float(cooccurence_matrix.shape[0])
        user_sim_scores = np.array(user_sim_scores)[0].tolist()
        sort_index = sorted(((e,i) for i,e in enumerate(list(user_sim_scores))), reverse=True)
    
        columns = ['user_id', 'item', 'score', 'rank']
        df = pandas.DataFrame(columns=columns)
        rank = 1 
        for i in range(0,len(sort_index)):
            if ~np.isnan(sort_index[i][0]) and all_items[sort_index[i][1]] not in user_items and rank <= 10:
                df.loc[len(df)]=[user,all_items[sort_index[i][1]],sort_index[i][0],rank]
                rank = rank+1
        
        if df.shape[0] == 0:
            print("The current user has no items in the item similarity based recommendation model.")
            return -1
        else:
            return df
    
    def create(self, train_data, user_id, item_id):
        self.train_data = train_data
        self.user_id = user_id
        self.item_id = item_id

    def recommend(self, user):
        user_items = self.get_user_items(user)    
        print("No. of unique items for the user: %d" % len(user_items))
        all_items = self.get_all_items_train_data()
        print("no. of unique items in the training set: %d" % len(all_items))
        cooccurence_matrix = self.construct_cooccurence_matrix(user_items, all_items)
        df_recommendations = self.generate_top_recommendations(user, cooccurence_matrix, all_items, user_items)
        return df_recommendations
    
    
    def get_similar_items(self, item_list):
        user_items = item_list
        all_items = self.get_all_items_train_data()
        print("no. of unique items in the training set: %d" % len(all_items)) 
        cooccurence_matrix = self.construct_cooccurence_matrix(user_items, all_items)
        user = ""
        df_recommendations = self.generate_top_recommendations(user, cooccurence_matrix, all_items, user_items)
        return df_recommendations

In [0]:
import pandas as pd
item_df =  pd.read_csv("recomm.csv")

In [0]:
item_df.head(5)

Unnamed: 0,user_id,item_id,item_count,title
0,101,1,1,Pizza
1,102,2,2,Momo
2,103,3,4,Burger
3,104,4,1,French Fries
4,105,5,1,Roll


In [0]:
item_grouped = item_df.groupby(['title']).agg({'item_count': 'count'}).reset_index()
grouped_sum = item_grouped['item_count'].sum()
item_grouped['percentage']  = item_grouped['item_count'].div(grouped_sum)*100
item_grouped.sort_values(['item_count', 'title'], ascending = [0,1])

Unnamed: 0,title,item_count,percentage
5,Ice Cream,5,12.195122
9,Roll,5,12.195122
0,Burger,4,9.756098
6,Momo,4,9.756098
7,Noodles,4,9.756098
8,Pizza,4,9.756098
12,Wraps,3,7.317073
1,Chicken Wings,2,4.878049
2,Cold Drink,2,4.878049
3,French Fries,2,4.878049


In [0]:
pm = Recommender()
pm.create(item_df,'user_id', 'title')

In [0]:
users = item_df['user_id'].unique()

##  User based Collaborative filtering

In [0]:
item_df[item_df["user_id"]==111]

Unnamed: 0,user_id,item_id,item_count,title
10,111,11,2,Chicken Wings
24,111,25,1,Noodles


### Recommendations based on Popular Items Purchased

In [0]:
user_id = users[10]
pm.recommend(user_id)

Unnamed: 0,user_id,title,score,Rank
5,111,Ice Cream,5,1.0
9,111,Roll,5,2.0
0,111,Burger,4,3.0
6,111,Momo,4,4.0
7,111,Noodles,4,5.0
8,111,Pizza,4,6.0
12,111,Wraps,3,7.0
1,111,Chicken Wings,2,8.0
2,111,Cold Drink,2,9.0
3,111,French Fries,2,10.0


### Recommendations based on Similar Items Purchased

In [0]:
model = item_similarity_recommender_py() 
model.create(item_df, 'user_id', 'title')

In [0]:
#Print the items for the user in data
user_id = users[10]
user_items = model.get_user_items(user_id)
#
print("------------------------------------------------------------------------------------")
print("Data items for the user userid: %s:" % user_id)
print("------------------------------------------------------------------------------------")

for user_item in user_items:
    print(user_item)

print("----------------------------------------------------------------------")
print("Recommendation process going on:")
print("----------------------------------------------------------------------")

#Recommend items for the user using personalized model
model.recommend(user_id)

------------------------------------------------------------------------------------
Data items for the user userid: 111:
------------------------------------------------------------------------------------
Chicken Wings
Noodles
----------------------------------------------------------------------
Recommendation process going on:
----------------------------------------------------------------------
No. of unique items for the user: 2
no. of unique items in the training set: 13
Non zero values in cooccurence_matrix :8


Unnamed: 0,user_id,item,score,rank
0,111,Subway,0.1,1
1,111,Momo,0.1,2
2,111,Ice Cream,0.083333,3
3,111,Roll,0.0625,4
4,111,Wraps,0.0,5
5,111,Cold Drink,0.0,6
6,111,Fried Rice,0.0,7
7,111,Thai Food,0.0,8
8,111,French Fries,0.0,9
9,111,Burger,0.0,10


In [0]:
model.get_similar_items(['Chicken Wings'])

no. of unique items in the training set: 13
Non zero values in cooccurence_matrix :4


Unnamed: 0,user_id,item,score,rank
0,,Noodles,0.2,1
1,,Momo,0.2,2
2,,Ice Cream,0.166667,3
3,,Wraps,0.0,4
4,,Cold Drink,0.0,5
5,,Fried Rice,0.0,6
6,,Thai Food,0.0,7
7,,Subway,0.0,8
8,,Roll,0.0,9
9,,French Fries,0.0,10


## Item based Collaborative filtering

In [0]:
from scipy.sparse import csr_matrix 

item_data = item_df[["user_id","title","item_count"]]
wide_item_data = item_data.pivot(index = 'title', columns = 'user_id', values = 'item_count').fillna(0)
wide_item_sparse = csr_matrix(wide_item_data.values)

from sklearn.neighbors import NearestNeighbors

model_knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute')
model_knn.fit(wide_item_sparse)

NearestNeighbors(algorithm='brute', leaf_size=30, metric='cosine',
                 metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                 radius=1.0)

In [0]:
wide_item_data

user_id,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
Burger,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,1.0,0.0
Chicken Wings,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Cold Drink,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
French Fries,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Fried Rice,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Ice Cream,0.0,1.0,4.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
Momo,0.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Noodles,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
Pizza,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0
Roll,0.0,0.0,0.0,5.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [0]:
query_index = np.random.choice(wide_item_data.shape[0])

distances, indices = model_knn.kneighbors(wide_item_data.iloc[query_index, :].values.reshape(1,-1), n_neighbors = 6)

for i in range(0, len(distances.flatten())):
    if i == 0:
        print ('Recommendations for {0}:\n'.format(wide_item_data.index[query_index]))
    else:
        print ('{0}:{1}, with distance of {2}:'.format(i, wide_item_data.index[indices.flatten()[i]], distances.flatten()[i]))


Recommendations for Cold Drink:

1:Wraps, with distance of 0.7113248654051871:
2:Ice Cream, with distance of 0.8804771390665607:
3:Thai Food, with distance of 1.0:
4:Pizza, with distance of 1.0:
5:Subway, with distance of 1.0:


## Binary Count Data

Another approach would be convert each vector into a binary (1 or 0) : We can do this by applying the sign function in numpy to each column in the dataframe.


In [0]:
wide_item_data_zero_one = wide_item_data.apply(np.sign)
wide_item_data_zero_one_sparse = csr_matrix(wide_item_data_zero_one.values)

In [0]:
wide_item_data_zero_one

user_id,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
Burger,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0
Chicken Wings,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Cold Drink,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
French Fries,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Fried Rice,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Ice Cream,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
Momo,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Noodles,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
Pizza,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
Roll,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [0]:
model_nn_binary = NearestNeighbors(metric='cosine', algorithm='brute')
model_nn_binary.fit(wide_item_data_zero_one_sparse)

NearestNeighbors(algorithm='brute', leaf_size=30, metric='cosine',
                 metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                 radius=1.0)

In [0]:
distances, indices = model_nn_binary.kneighbors(wide_item_data_zero_one.iloc[query_index, :].values.reshape(1, -1), n_neighbors = 6)

for i in range(0, len(distances.flatten())):
    if i == 0:
        print('Recommendations with binary purchased data with {0}:\n'.format(wide_item_data_zero_one.index[query_index]))
    else:
        print('{0}: {1}, with distance of {2}:'.format(i, wide_item_data_zero_one.index[indices.flatten()[i]], distances.flatten()[i]))


Recommendations with binary purchased data with Cold Drink:

1: Wraps, with distance of 0.591751709536137:
2: Ice Cream, with distance of 0.6837722339831621:
3: Thai Food, with distance of 1.0:
4: Pizza, with distance of 1.0:
5: Subway, with distance of 1.0:


In [0]:
item_it = item_df[["item_id","title"]]
item_rat = item_df[["user_id","item_id","item_count"]]

## Matrix Factorization via Singular Value Decomposition¶

Matrix factorization is the breaking down of one matrix in a product of multiple matrices. It's extremely well studied in mathematics, and it's highly useful. There are many different ways to factor matrices, but singular value decomposition is particularly useful for making recommendations.

So what is singular value decomposition (SVD)? At a high level, SVD is an algorithm that decomposes a matrix RR into the best lower rank (i.e. smaller/simpler) approximation of the original matrix RR. Mathematically, it decomposes R into a two unitary matrices and a diagonal matrix:
R=UΣVT
R=UΣVT

where R is users's ratings matrix, UU is the user "features" matrix, ΣΣ is the diagonal matrix of singular values (essentially weights), and VTVT is the movie "features" matrix. UU and VTVT are orthogonal, and represent different things. UU represents how much users "like" each feature and VTVT represents how relevant each feature is to each movie.

To get the lower rank approximation, we take these matrices and keep only the top kk features, which we think of as the underlying tastes and preferences vectors.

In [0]:
item_it.head(5)

Unnamed: 0,item_id,title
0,1,Pizza
1,2,Momo
2,3,Burger
3,4,French Fries
4,5,Roll


In [0]:
item_rat.head(5)

Unnamed: 0,user_id,item_id,item_count
0,101,1,1
1,102,2,2
2,103,3,4
3,104,4,1
4,105,5,1


In [0]:
R_df = item_rat.pivot(columns ='item_id', values = 'item_count').fillna(0)
R_df.head()

item_id,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [0]:
R = R_df.as_matrix()
user_ratings_mean = np.mean(R, axis = 1)
R_demeaned = R - user_ratings_mean.reshape(-1, 1)

  """Entry point for launching an IPython kernel.


In [0]:
from scipy.sparse.linalg import svds
U, sigma, Vt = svds(R_demeaned, k = 10)

sigma = np.diag(sigma)

all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1)

preds_df = pd.DataFrame(all_user_predicted_ratings, columns = R_df.columns)
preds_df.head()
preds_df.index = R_df.index

preds_df.head()

item_id,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25
0,0.058665,0.007144,0.001007,0.058665,0.058665,0.058665,0.007144,0.058665,0.058665,0.058665,0.007144,0.001007,0.000609,0.058665,0.007144,0.058665,0.066894,0.066894,0.066894,0.056502,0.002384,0.001349,0.056502,0.066894,0.056502
1,0.014288,1.993653,-0.001055,0.014288,0.014288,0.014288,-0.006347,0.014288,0.014288,0.014288,-0.006347,-0.001055,-0.000649,0.014288,-0.006347,0.014288,-0.045462,-0.045462,-0.045462,0.02839,-0.002371,-0.001393,0.02839,-0.045462,0.02839
2,0.004028,-0.002109,3.999646,0.004028,0.004028,0.004028,-0.002109,0.004028,0.004028,0.004028,-0.002109,-0.000354,-0.000218,0.004028,-0.002109,0.004028,-0.014797,-0.014797,-0.014797,0.011188,-0.000793,-0.000467,0.011188,-0.014797,0.011188
3,0.058665,0.007144,0.001007,0.058665,0.058665,0.058665,0.007144,0.058665,0.058665,0.058665,0.007144,0.001007,0.000609,0.058665,0.007144,0.058665,0.066894,0.066894,0.066894,0.056502,0.002384,0.001349,0.056502,0.066894,0.056502
4,0.058665,0.007144,0.001007,0.058665,0.058665,0.058665,0.007144,0.058665,0.058665,0.058665,0.007144,0.001007,0.000609,0.058665,0.007144,0.058665,0.066894,0.066894,0.066894,0.056502,0.002384,0.001349,0.056502,0.066894,0.056502


In [0]:
def recommend_items(preds_df, user_id, item_it, item_rat, num_recommendations=5):
    
    # Get and sort the user's predictions
    sorted_user_predictions = preds_df.iloc[user_id].sort_values(ascending=False) # UserID starts at 1
    tuser_id = user_id + 100
    # Get the user's data and merge in the movie information.
    user_data = item_rat[item_rat.user_id == (tuser_id)]
    
    user_full = (user_data.merge(item_it, how = 'left', left_on = 'item_id', right_on = 'item_id').
                     sort_values(['item_count'], ascending=False)
                 )

    print ('User {0} has already bought {1} items.'.format(user_id, user_full.shape[0]))
    print ('Recommending highest {0} predicted count item not already bought.'.format(num_recommendations))
    
    #Recommend the highest predicted rating movies that the user hasn't seen yet.
    recommendations = (item_it[~item_it["item_id"].isin(user_full["item_id"])].merge(pd.DataFrame(sorted_user_predictions).reset_index(), how = 'left',
                   left_on = 'item_id',
                   right_on = 'item_id')).iloc[:num_recommendations, :-1]


    return user_full , recommendations

In [0]:
already_rated, predictions = recommend_items(preds_df,11, item_it, item_rat, 10)

User 11 has already bought 3 items.
Recommending highest 10 predicted count item not already bought.


In [0]:
already_rated.head(10)

Unnamed: 0,user_id,item_id,item_count,title
0,111,11,2,Chicken Wings
1,111,11,2,Chicken Wings
2,111,25,1,Noodles


In [0]:
predictions

Unnamed: 0,item_id,title
0,1,Pizza
1,2,Momo
2,3,Burger
3,4,French Fries
4,5,Roll
5,6,Ice Cream
6,7,Subway
7,8,Thai Food
8,9,Fried Rice
9,10,Cold Drink


### Conclusion

We've seen that we can make good recommendations with raw data based collaborative filtering methods (neighborhood models) and latent features from low-rank matrix factorization methods (factorization models).

Low-dimensional matrix recommenders try to capture the underlying features driving the raw data (which we understand as tastes and preferences). From a theoretical perspective, if we want to make recommendations based on people's tastes, this seems like the better approach. This technique also scales significantly better to larger datasets.
