# Collaborative Filtering

**Item Based**: which takes similarities between items’ consumption histories

**User Based**: that considers similarities between user consumption histories and item similarities

In [1]:
#Import libraries
import pandas as pd
from scipy.spatial.distance import cosine

In [2]:
data = pd.read_csv("datasets/groceries.csv")

In [3]:
data.head()

Unnamed: 0,Person,item
0,1,citrus fruit
1,1,semi-finished bread
2,1,margarine
3,1,ready soups
4,2,tropical fruit


In [4]:
data.Person.nunique()

9835

In [5]:
data.shape

(43367, 2)

In [6]:
#Assume that for all items only one quantity was bought 

**Exercise 1** Add a column to data : `Quantity` that has value 1 

In [7]:
data["Quantity"] = 1

In [8]:
data.head()

Unnamed: 0,Person,item,Quantity
0,1,citrus fruit,1
1,1,semi-finished bread,1
2,1,margarine,1
3,1,ready soups,1
4,2,tropical fruit,1


In [9]:
len(pd.unique(data.item))

169

In [10]:
#This particular view isn't very helpful for us for analysis.
#This way of data being arranged is called LONG
#We need it in wide format

In [11]:
#Converting data from long to wide format
dataWide = data.pivot("Person", "item", "Quantity")

In [12]:
dataWide.shape

(9835, 169)

In [13]:
dataWide.head()

item,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,beef,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
Person,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,1.0,
3,,,,,,,,,,,...,,,,,,,,1.0,,
4,,,,,,,,,,,...,,,,,,,,,1.0,
5,,,,,,,,,,,...,,,,,,,,1.0,,


In [14]:
dataWide

item,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,beef,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
Person,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,1.0,
3,,,,,,,,,,,...,,,,,,,,1.0,,
4,,,,,,,,,,,...,,,,,,,,,1.0,
5,,,,,,,,,,,...,,,,,,,,1.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9831,,,,,,,,,,1.0,...,,,,1.0,,,,1.0,,
9832,,,,,,,,,,,...,,,,,,,,,,
9833,,,,,,,,,,,...,,,,,,,,,1.0,
9834,,,,,,,,,,,...,,,,,,,,,,


**Exercise 2**
Print the data for Person number 2

In [15]:
dataWide[dataWide.index==2]

item,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,beef,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
Person,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,,,,,,,,,,,...,,,,,,,,,1.0,


In [16]:
# dataWide.iloc[1:2,:]

In [17]:
# dataWide.loc[2,:]

**Exercise 3** Print the data for row number 2

In [18]:
# dataWide.iloc[1,:]

In [19]:
#Replace NA with 0 
dataWide.fillna(0, inplace=True)

In [20]:
dataWide.head()

item,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,beef,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
Person,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


# Item-based Collaborative Filtering

In item based collaborative filtering we do not care about the user column

In [21]:
#Drop the Person column
data_ib = dataWide.copy()

In [22]:
data_ib.shape

(9835, 169)

In [23]:
data_ib.head()

item,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,beef,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
Person,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [24]:
data_ib = data_ib.reset_index()

In [25]:
data_ib.head()


item,Person,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [26]:
#Drop the Person column
#data_ib = data_ib.iloc[:,1:]
data_ib = data_ib.drop("Person", axis=1)

In [27]:
data_ib.head()

item,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,beef,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [28]:
# Create a placeholder dataframe listing item vs. item
data_ibs = pd.DataFrame(index=data_ib.columns,
                        columns=data_ib.columns)

In [29]:
data_ibs.head()

item,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,beef,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Instant food products,,,,,,,,,,,...,,,,,,,,,,
UHT-milk,,,,,,,,,,,...,,,,,,,,,,
abrasive cleaner,,,,,,,,,,,...,,,,,,,,,,
artif. sweetener,,,,,,,,,,,...,,,,,,,,,,
baby cosmetics,,,,,,,,,,,...,,,,,,,,,,


## Similarity Measure 

We will now find similarities.

We will use `cosine similarity`

<img src="img/cosine.png" >

The resulting similarity ranges from −1 meaning exactly opposite, to 1 meaning exactly the same, with 0 indicating orthogonality (decorrelation), and in-between values indicating intermediate similarity or dissimilarity.

*src* https://en.wikipedia.org/wiki/Cosine_similarity

In essense the cosine similarity takes the sum product of the first and second column, then divides that by the product of the square root of the sum of squares of each column.

In [30]:
data_ib.iloc[:,1]

0       0.0
1       0.0
2       0.0
3       0.0
4       0.0
       ... 
9830    0.0
9831    0.0
9832    0.0
9833    0.0
9834    0.0
Name: UHT-milk, Length: 9835, dtype: float64

In [31]:
for i in range(0,len(data_ibs.columns)) :
    # Loop through the columns for each column
    for j in range(0,len(data_ibs.columns)) :
      # Fill in placeholder with cosine similarities
      data_ibs.iloc[i,j] = 1-cosine(data_ib.iloc[:,i],data_ib.iloc[:,j])

In [32]:
data_ibs.head()

item,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,beef,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Instant food products,1.0,0.0248112,0.0,0.0,0,0,0,0.0255878,0.0,0.0396234,...,0.0251577,0.0140636,0.0173605,0.0296613,0,0.044236,0.0,0.0673304,0.0425243,0.0409311
UHT-milk,0.0248112,1.0,0.0,0.0389841,0,0,0,0.0376158,0.0212202,0.0339786,...,0.0308196,0.0275659,0.0595491,0.0622915,0,0.0352245,0.0120949,0.0428914,0.108655,0.0401143
abrasive cleaner,0.0,0.0,1.0,0.0298807,0,0,0,0.0,0.03253,0.044647,...,0.0,0.0422577,0.017388,0.0509286,0,0.0249222,0.0123608,0.0539498,0.045634,0.0
artif. sweetener,0.0,0.0389841,0.0298807,1.0,0,0,0,0.0402042,0.0,0.00778216,...,0.0,0.0220971,0.00909241,0.0066578,0,0.0173762,0.0,0.0387901,0.0524977,0.0
baby cosmetics,0.0,0.0,0.0,0.0,1,0,0,0.0309492,0.0,0.0,...,0.0,0.0,0.020998,0.0153755,0,0.0200643,0.0,0.0244315,0.0,0.0


With our similarity matrix filled out we can look for each items “neighbour” by looping through ‘data_ibs’, sorting each column in descending order, and grabbing the name of each of the top 3 products.

In [39]:
# data_neighbours = pd.DataFrame(index=data_ibs.columns,columns=range(1,4))
 
# # Loop through our similarity dataframe and fill in neighbouring item names
# for i in range(0,len(data_ibs.columns)):
#     data_neighbours.iloc[i,:3] = data_ibs.iloc[0:,i].sort_values(ascending=False)[:3].index

In [40]:
# data_neighbours

**Exercise 4** Modify the above code to print the top 10 similar products for each product

In [38]:
data_neighbours = pd.DataFrame(index=data_ibs.columns,columns=range(1,11))
 
# Loop through our similarity dataframe and fill in neighbouring item names
for i in range(0,len(data_ibs.columns)):
    data_neighbours.iloc[i,:10] = data_ibs.iloc[0:,i].sort_values(ascending=False)[:10].index
    
data_neighbours

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10
item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Instant food products,Instant food products,hamburger meat,canned fish,other vegetables,whole milk,root vegetables,curd,rolls/buns,butter,kitchen utensil
UHT-milk,UHT-milk,bottled water,yogurt,other vegetables,soda,coffee,margarine,domestic eggs,brown bread,citrus fruit
abrasive cleaner,abrasive cleaner,preservation products,cleaner,curd cheese,root vegetables,dish cleaner,other vegetables,salad dressing,rice,berries
artif. sweetener,artif. sweetener,potato products,salad dressing,skin care,candles,flour,rum,yogurt,frankfurter,mustard
baby cosmetics,baby cosmetics,cream,dish cleaner,cookware,syrup,kitchen towels,soups,butter milk,oil,sweet spreads
...,...,...,...,...,...,...,...,...,...,...
white bread,white bread,whole milk,processed cheese,ham,other vegetables,fruit/vegetable juice,tropical fruit,soda,yogurt,pip fruit
white wine,white wine,bottled water,shopping bags,soda,sausage,bottled beer,prosecco,canned beer,rolls/buns,fruit/vegetable juice
whole milk,whole milk,other vegetables,yogurt,root vegetables,rolls/buns,tropical fruit,whipped/sour cream,domestic eggs,butter,curd
yogurt,yogurt,whole milk,other vegetables,tropical fruit,rolls/buns,root vegetables,whipped/sour cream,citrus fruit,curd,fruit/vegetable juice


# User Based collaborative Filtering

The process for creating a User Based recommendation system is as follows:

1. Have Item-Based similarity matrix
2. Check which items the user has consumed
3. For each item the user has consumed, get the top X neighbours
4. Get the consumption record of the user for each neighbour.
5. Compute similarity score
6. Recommend the items with the highest score

In [41]:
#Helper function to get similarity scores
def getScore(history, similarities):
    return sum(history*similarities)/sum(similarities)

#Understand what this function does ! 

In [42]:
data_sims1 = dataWide.reset_index()

In [43]:
data_sims1.head()

item,Person,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [46]:
# Create a place holder matrix for similarities, and fill in the user name column
data_sims = pd.DataFrame(index=data_sims1.index,columns=data_sims1.columns)
data_sims.iloc[:,:1] = data_sims1.iloc[:,:1]

In [47]:
#This is the same as our original data but with nothing filled in except the headers
data_sims.head()

item,Person,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
0,1,,,,,,,,,,...,,,,,,,,,,
1,2,,,,,,,,,,...,,,,,,,,,,
2,3,,,,,,,,,,...,,,,,,,,,,
3,4,,,,,,,,,,...,,,,,,,,,,
4,5,,,,,,,,,,...,,,,,,,,,,


In [48]:
data_sims12 = data_sims1.iloc[:500,:]

In [49]:
data_sims11 = data_sims.iloc[:500,:]

In [57]:
data_neighbours.loc['UHT-milk']

1             UHT-milk
2        bottled water
3               yogurt
4     other vegetables
5                 soda
6               coffee
7            margarine
8        domestic eggs
9          brown bread
10        citrus fruit
Name: UHT-milk, dtype: object

In [72]:
data_neighbours.head()

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10
item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Instant food products,Instant food products,hamburger meat,canned fish,other vegetables,whole milk,root vegetables,curd,rolls/buns,butter,kitchen utensil
UHT-milk,UHT-milk,bottled water,yogurt,other vegetables,soda,coffee,margarine,domestic eggs,brown bread,citrus fruit
abrasive cleaner,abrasive cleaner,preservation products,cleaner,curd cheese,root vegetables,dish cleaner,other vegetables,salad dressing,rice,berries
artif. sweetener,artif. sweetener,potato products,salad dressing,skin care,candles,flour,rum,yogurt,frankfurter,mustard
baby cosmetics,baby cosmetics,cream,dish cleaner,cookware,syrup,kitchen towels,soups,butter milk,oil,sweet spreads


In [63]:
#Need to run this for only 500 users. Might be slow beyond that. 
#Subset it to 500 users before running this
for i in range(0,len(data_sims11.index)):
    for j in range(1,len(data_sims11.columns)):
        user = data_sims11.index[i]
        product = data_sims11.columns[j]
 
        if data_sims12.iloc[i][j] == 1:
            data_sims11.iloc[i][j] = 0
        else:
            product_top_names = data_neighbours.loc[product][1:10]
            product_top_sims = data_ibs.loc[product].sort_values(ascending=False)[1:10]
            user_purchases = data_ib.loc[user,product_top_names]
 
            data_sims11.iloc[i][j] = getScore(user_purchases,product_top_sims)

In [65]:
# Get the top products
data_recommend = pd.DataFrame(index=data_sims.index, columns=['Person','1','2','3','4','5','6'])
data_recommend.iloc[0:,0] = data_sims.iloc[:,0]


In [66]:
# Instead of top product scores, we want to see names
for i in range(0,len(data_sims.index)):
    data_recommend.iloc[i,1:] = data_sims.iloc[i,:].sort_values(ascending=False).iloc[1:7,].index.transpose()

In [68]:
# Print a sample
print(data_recommend.iloc[:10,:4])

  Person                  1               2                  3
0      1            candles      hair spray                oil
1      2  seasonal products       pip fruit          beverages
2      3            cereals            curd      domestic eggs
3      4        butter milk  tropical fruit        canned fish
4      5             coffee         cereals          chocolate
5      6               curd         cleaner      domestic eggs
6      7        frankfurter         sausage      spread cheese
7      8     red/blush wine          dishes      bottled water
8      9           prosecco        dog food  finished products
9     10               soap            curd      domestic eggs


This case/code was inspired from
http://www.salemmarafi.com/code/collaborative-filtering-with-python/

Look into that link for more information

More links:
http://blogs.gartner.com/martin-kihn/how-to-build-a-recommender-system-in-python/