# LUSH: Recommendation system

#### Libraries & dependencies

In [304]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from faker import Faker
import random
from sklearn.metrics.pairwise import cosine_similarity 

## i. Lush synthetic  Database

### Transactions

In [305]:
T = pd.read_csv('Lush_transactions.csv')

In [422]:
T

Unnamed: 0,Transaction ID,Item ID,Customer ID,Purchase Date,Quantity,Rating
0,aSPCv0AGUW,2e809516-0451-49ac-84da-41154df0012f,e6a39996-d2f3-4713-a417-083f19002944,2022-05-18,1,3.0
1,iRza4mz7BC,7acfe326-8535-4eef-be41-6943e4f776ff,18d46cd2-f847-4b68-b678-659e8ce80ef0,2022-06-13,2,4.7
2,MYBd66rYsX,fc29a8c3-e8a2-41ef-9313-cd664db81f74,6c8c7479-a7af-4ac1-8559-86e359bfa96e,2022-09-17,2,2.3
3,oVBCsl9tZc,3c5b28de-a03f-44a2-9705-6a50ff712411,bc643b49-4220-41ad-b2d5-f2bce4e6192f,2022-09-13,5,1.3
4,UiRhFxTGS3,2d925dfa-0bb2-4a6b-9e2f-1dc04aaa2fb8,55b54e66-203d-4e0a-a1ba-50a50c836ca1,2023-08-03,4,4.7
...,...,...,...,...,...,...
995,NK9zESjU6i,87ba7bb7-5c1d-4446-99ff-e65b0e58e931,a425aff4-872a-43ec-b31e-565ef129bbd3,2021-11-17,2,4.1
996,xnNb68iShT,1e96b906-4ea2-44c6-add8-04f9a583d7be,94cd3c7b-9ac4-4e3d-add8-1b369d10c469,2022-06-24,5,1.4
997,2MzwwD2Vib,20d3ddfb-ceab-40d1-bae2-7982df53c84e,30fdfd38-300a-4f75-a9d0-19184c52746c,2022-05-14,3,3.4
998,Fbf6MCpedz,3d0d5c7d-5d68-4094-8d95-c097473b709a,1da0d5ae-5e96-4f79-80be-7016f6839ae9,2022-06-12,3,0.6


### Customers

In [307]:
C = pd.read_csv('Lush_customers.csv')

In [421]:
C

Unnamed: 0,Customer ID,Phone Number,Pronouns,Name,Email
0,cbbbfe03-08a5-453c-9544-af17bcb31b00,581-943-2571x750,Not listed Above,Donald Ortiz,timothy53@example.com
1,0494ff8a-297c-45b4-b594-67839f8ec5e2,001-747-561-7184x18435,,Haley Hawkins,tammychen@example.org
2,096abc2e-849f-419c-9911-4bec5e049033,(990)802-2967,,Leslie Anderson,matthewsrobert@example.net
3,70abf380-d8c3-43ae-8015-be5dfa8e8b9e,001-609-203-8484x92971,He/Him/His,Emily Peterson,jeremy83@example.org
4,4e64984f-bcaf-4bb2-b364-7d36f3f26886,382-338-5658,,Candace Phillips,imorgan@example.net
...,...,...,...,...,...
995,082b84d5-e0f6-439a-83b6-835c14ccc27f,(798)340-9825x72575,He/Him/His,Carly Bullock,grivera@example.net
996,699008b0-0e3c-4858-af58-9eeb3420c5aa,+1-609-780-1490x51973,Not listed Above,Joseph Sullivan,vancelaura@example.net
997,b75a0368-ff55-4953-999c-33c798c6366b,711-561-5440x9850,He/Him/His,Lee Farrell,qcervantes@example.com
998,6e773ab7-0602-4e3d-ac83-7d97473a800e,847-839-7167x598,They/Them/Theirs,Stephanie Armstrong,mhernandez@example.com


### Inventory

In [309]:
I = pd.read_csv('Lush_inventory.csv')

In [423]:
I

Unnamed: 0,Item ID,Name of the Item,Categories,Price
0,8aa63377-3c25-46a1-afef-05dfd30f600c,Lavender Dream,Bath Bombs,9.56
1,db0249fc-9693-4f54-9707-5b4ad75fe865,Rose Bliss,Bath Bombs,8.05
2,f2e11ea9-862f-4ccd-8099-94be05031aa4,Citrus Burst,Bath Bombs,15.70
3,900d7870-6ecd-4ae1-9065-100108ac6c7d,Eucalyptus Breeze,Bath Bombs,20.07
4,4919b896-07b3-4cf6-803c-aae3ebb54025,Ocean Mist,Bath Bombs,17.74
...,...,...,...,...
160,793224c3-7caf-4835-95a3-5a64f8435470,Fresh Citrus,Deodorants and Dusting Powders,23.69
161,1bd66c7a-169f-4c4d-8e64-10bf0959f4f9,Lavender Calm,Deodorants and Dusting Powders,19.69
162,d1e8f1b8-fe08-44e3-9d0b-c41448df4fc7,Rose Bliss,Deodorants and Dusting Powders,20.31
163,97ebb357-2eaf-4842-8f4d-32f43d35772f,Mint Fresh,Deodorants and Dusting Powders,6.46


### Consolidating into one dataframe

In [393]:
# Merging transactions dataframe (LT) with inventory dataframe (LI) on "Item ID"
TI = pd.merge(T, I, on="Item ID", how="left")
df = pd.merge(TI, C, on="Customer ID", how="left")

In [394]:
df.head()

Unnamed: 0,Transaction ID,Item ID,Customer ID,Purchase Date,Quantity,Rating,Name of the Item,Categories,Price,Phone Number,Pronouns,Name,Email
0,aSPCv0AGUW,2e809516-0451-49ac-84da-41154df0012f,e6a39996-d2f3-4713-a417-083f19002944,2022-05-18,1,3.0,Charcoal Detox,Bar Soap,20.94,(293)820-2077x75865,Not listed Above,Jesse Morgan,allison55@example.com
1,iRza4mz7BC,7acfe326-8535-4eef-be41-6943e4f776ff,18d46cd2-f847-4b68-b678-659e8ce80ef0,2022-06-13,2,4.7,Rose Revival,Shower Bombs,21.74,247-357-4983,,Ashlee Carpenter,cblackwell@example.com
2,MYBd66rYsX,fc29a8c3-e8a2-41ef-9313-cd664db81f74,6c8c7479-a7af-4ac1-8559-86e359bfa96e,2022-09-17,2,2.3,Energizing Peppermint,Bath Oils,16.15,966.600.3906,,Connor Meyer,wagnerjill@example.net
3,oVBCsl9tZc,3c5b28de-a03f-44a2-9705-6a50ff712411,bc643b49-4220-41ad-b2d5-f2bce4e6192f,2022-09-13,5,1.3,Shea Smooth,Body Butters and Conditioners,11.98,399.219.9406,They/Them/Theirs,Lisa White,stephenszachary@example.org
4,UiRhFxTGS3,2d925dfa-0bb2-4a6b-9e2f-1dc04aaa2fb8,55b54e66-203d-4e0a-a1ba-50a50c836ca1,2023-08-03,4,4.7,Lavender Calm,Toners,7.04,+1-351-441-6232x7340,,Lisa Smith,jimenezandrew@example.org


## 1) A User Independent System
We'll start with building recommendations for the case where we have no knowledge about a user. In this case, we probably would find the most popular items, and recommend them to our user.

The simplest system would just sort the items based on their average score and present a list in that order:

In [566]:
rating_df = df.groupby(['Item ID','Name of the Item', 'Categories']).agg({'Rating': ['mean', 'count']}).reset_index()
# Rename columns with two levels to a single-level column name
rating_df.columns = ['_'.join(col).strip() if col[1] != '' else col[0] for col in rating_df.columns]

In [567]:
threshold = 4
product_with_many_reviews_df = rating_df[rating_df['Rating_count'] >= threshold]

In [568]:
top_rated = product_with_many_reviews_df.sort_values(by=['Rating_mean'], ascending=False)
top_rated.head(5)

Unnamed: 0,Item ID,Name of the Item,Categories,Rating_mean,Rating_count
141,e0c5ad34-0fcf-4e3e-955a-169bf44e52b6,Lifting Serum,Eye Skincare,4.025,4
85,846e0162-8bea-46c2-bed4-47c8ac6ca64c,Lavender Calm,Body Lotions,4.0,5
112,aa36d271-2e0e-4918-b848-127bec1cfbcf,Nourish Plus,Moisturizers,3.94,5
136,daee7362-acc9-4fda-8118-3dfd15d28937,Chamomile Soothe,Shampoo Bars,3.8,5
162,fc29a8c3-e8a2-41ef-9313-cd664db81f74,Energizing Peppermint,Bath Oils,3.733333,9


In [571]:
quantity_df = df.groupby(['Item ID','Name of the Item', 'Categories']).agg({'Quantity': ['sum']}).reset_index()
# Rename columns with two levels to a single-level column name
quantity_df.columns = ['_'.join(col).strip() if col[1] != '' else col[0] for col in quantity_df.columns]

In [572]:
top_seller = quantity_df.sort_values(by=['Quantity_sum'], ascending=False)
top_seller.head(5)

Unnamed: 0,Item ID,Name of the Item,Categories,Quantity_sum
32,36820ff6-74a3-4125-aa9e-4d956d0f9be7,Copper Radiance,Henna Hair Dyes,41
68,67998094-e6b3-403c-915a-1ed2243f9f5a,Lavender Calm,Shaving Creams,40
57,59d22c7a-a7a8-4828-ace4-46fb3d2ce394,Damage Repair,Shampoo,36
148,e9f21936-be64-437e-837b-00eadb4a68c9,Coffee Revive,Body Scrubs,35
138,dcc11d19-c063-4851-945e-f23041a78e01,Hydrating Mask,Hair Treatments,35


## 2) Similar Users
What makes two users similar? The simplest way to measure if user $a$ and $b$ are similar is to look at the items both of them reviewed. For example consider the following two users:

In [429]:
similar_df = df.groupby(['Customer ID','Item ID']).agg({'Rating': ['mean']}).reset_index()
# Rename columns with two levels to a single-level column name
similar_df.columns = ['_'.join(col).strip() if col[1] != '' else col[0] for col in similar_df.columns]

In [430]:
similar_df

Unnamed: 0,Customer ID,Item ID,Rating_mean
0,014d31fe-aed7-4548-a85c-cae27576df3c,41ed7e23-487e-4500-9621-8d46a494162e,2.3
1,014d31fe-aed7-4548-a85c-cae27576df3c,adc86607-5031-4026-b37e-76dc48584114,2.4
2,0211a20d-7192-4d33-9dd2-523380c2cea4,20aea34e-68eb-48b7-910a-098dcad72cfe,1.8
3,0211a20d-7192-4d33-9dd2-523380c2cea4,509a15a2-b9f2-4a12-8389-33e52f2f149f,3.9
4,0211a20d-7192-4d33-9dd2-523380c2cea4,cebc0561-e72d-43e1-beb6-e05885a9a230,2.6
...,...,...,...
990,fbe08ad4-3a72-4e92-87d6-c7edd0996f29,a673aec6-fe72-47ba-886e-7af2c760d04d,0.9
991,fbe08ad4-3a72-4e92-87d6-c7edd0996f29,fc29a8c3-e8a2-41ef-9313-cd664db81f74,3.9
992,fce278ab-08fa-4fa5-95a5-998632fb04ae,969c790a-bb7f-4295-9374-e978871d0829,2.2
993,feebd102-2e6a-4199-b28a-07e8072e63b0,f47d09b9-06fe-48b5-bb7f-b39eb83fb64b,3.3


In [460]:
# Create dictionaries to map item IDs and customer IDs to unique integers
item_id_to_int = {item_id: idx for idx, item_id in enumerate(similar_df['Item ID'].unique(), start=1)}
customer_id_to_int = {customer_id: idx for idx, customer_id in enumerate(similar_df['Customer ID'].unique(), start=1)}

# Assign unique integers to each item and customer in the DataFrame
similar_df['product_id'] = similar_df['Item ID'].map(item_id_to_int)
similar_df['customer_id'] = similar_df['Customer ID'].map(customer_id_to_int)

In [479]:
similar_unique_user_df=similar_df.drop_duplicates(subset=['Customer ID'])

In [464]:
customer_id = similar_df['customer_id'].unique()
product_id = similar_df['product_id'].unique()

num_customer = len(customer_id)
num_product = len(product_id)

# np.nan means they user has not reviewed the movie
R = np.full((num_customer, num_product), np.nan)

#Build the user-item matrix
for row in similar_df.itertuples(): # same as zip(df.index, df["user"], df["movie"], df["score"])
    customer = row[4]
    product = row[5]
    rating = row[3]
    R[customer-1, product-1] = rating
    
R_df = pd.DataFrame(data=R, index=range(1, num_customer+1), columns=range(1, num_product+1))

In [465]:
R_df

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,156,157,158,159,160,161,162,163,164,165
1,2.3,2.4,,,,,,,,,...,,,,,,,,,,
2,,,1.8,3.9,2.6,,,,,,...,,,,,,,,,,
3,,,,,,4.2,,,,,...,,,,,,,,,,
4,,,,,,,4.1,3.2,,,...,,,,,,,,,,
5,,,,,,,,,4.9,0.9,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
633,,,,,,,,,,,...,,,,,,,,,,
634,,,,,,,,,,,...,,,,,,,,,,
635,,,,,,,,,,,...,,,2.2,,,,,,,
636,,,,,,,,,,,...,,,,,,,,,,


In [473]:
# Remove duplicate rows based on 'Customer ID'
unique_customers_df = df.drop_duplicates(subset=['Customer ID'])

# Select only the 'Customer ID' and 'Pronouns' columns
result_df = unique_customers_df[['Customer ID', 'Pronouns']]

# Create dummy variables for user_pronoun and user_gender
result_df = pd.get_dummies(result_df, columns=['Pronouns'])

In [480]:
# Merging transactions dataframe (LT) with inventory dataframe (LI) on "Item ID"
resultmerged_df = pd.merge(result_df, similar_unique_user_df, on="Customer ID", how="left")

In [508]:
# Select only the dummy columns related to 'pronoun' and 'gender'
columns_to_keep = ['customer_id'] + [col for col in unique_users_df.columns if col.startswith('Pronouns_')]

# Create a new DataFrame with only the selected dummy columns
dummies_df = resultmerged_df[columns_to_keep]

# Set 'customer_id' as the index
dummies_df.set_index('customer_id', inplace=True)

In [509]:
dummies_df = dummies_df.sort_values(by='customer_id', ascending = True)

# Get a list of columns that start with "Pronouns_"
pronouns_columns = [col for col in dummies_df.columns if col.startswith('Pronouns_')]

# Create a mapping from original column names to new integer names
column_mapping = {col: str(idx) for idx, col in enumerate(pronouns_columns, start=num_product+1)}

# Rename the columns using the mapping
dummies_df.rename(columns=column_mapping, inplace=True)

# Replace zeros with NaN
dummies_df.replace(0, np.nan, inplace=True)

In [511]:
# Merge the DataFrames on 'customer_id'
merged_df = pd.concat([R_df, dummies_df], axis=1)

In [536]:
def find_customer_similarity(customer_1, customer_2, R_df):
    
    # Define the mask which finds all items they rated together
    feature_1st_customer_rated = ~R_df.loc[customer_1, :].isna()
    feature_2nd_customer_rated = ~R_df.loc[customer_2, :].isna()

    feature_both_customers_rated = feature_1st_customer_rated & feature_2nd_customer_rated

    # Sum boolean to get the counts
    number_of_features_rated_together = feature_both_customers_rated.sum()
    
    # Check if there are no features rated together
    if number_of_features_rated_together == 0:
        return 0, 0  # Return 0 similarity and 0 rated features
    
    # Find the ratings of both customers for items they both watched
    ratings_of_customer1 = R_df.loc[customer_1, feature_both_customers_rated].values.reshape(1, -1)
    ratings_of_customer2 = R_df.loc[customer_2, feature_both_customers_rated].values.reshape(1, -1)
    
    # Finally, calculate the similarity between them
    similarity = cosine_similarity(ratings_of_customer1, ratings_of_customer2)[0][0]
    
    return similarity, number_of_features_rated_together

In [537]:
R_df2 = merged_df[~merged_df.iloc[:, 3].isna()].copy()

In [538]:
R_df2 

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,160,161,162,163,164,165,166,167,168,169
2,,,1.8,3.9,2.6,,,,,,...,,,,,,,,,,
17,,,,0.9,,,,,,,...,,,,,,,,,,1.0
51,,,,1.6,,,,,,,...,,,,,,,,,1.0,
275,,,,2.0,,,,,,,...,,,,,,,,,,
363,,,,4.8,,,,,,,...,,,,,,,,,,
470,,,,4.8,,,,,,,...,,,,,,,1.0,,,
603,,,,1.9,,,,,,,...,,,,,,,1.0,,,


In [539]:
current_customer = 1 # we will only do this to customer 1 for demonstration purposes
customer_predicted_rating_per_feature=[]

for feature in range(num_product):
    current_feature = feature
    similarities_to_customer = []
    ratings_given_to_feature = []

    # Find only the customers who rated same feature (rows)
    R_df2 = merged_df[~merged_df.iloc[:, feature].isna()].copy()

    for other_customer in R_df2.index:

        similarity, number_of_features_rated_together = find_customer_similarity(current_customer, other_customer, merged_df)
        similarities_to_customer.append(similarity)
        ratings_given_to_feature.append(merged_df.loc[other_customer, current_feature+1])

    # Finally, let's turn these into numpy arrays so life is easier
    similarities_to_customer = np.array(similarities_to_customer)
    ratings_given_to_feature = np.array(ratings_given_to_feature)
    
    predicted_rating = np.dot(ratings_given_to_feature, similarities_to_customer)/np.sum(similarities_to_customer)
    customer_predicted_rating_per_feature.append(predicted_rating)

  predicted_rating = np.dot(ratings_given_to_feature, similarities_to_customer)/np.sum(similarities_to_customer)
  predicted_rating = np.dot(ratings_given_to_feature, similarities_to_customer)/np.sum(similarities_to_customer)
  predicted_rating = np.dot(ratings_given_to_feature, similarities_to_customer)/np.sum(similarities_to_customer)
  predicted_rating = np.dot(ratings_given_to_feature, similarities_to_customer)/np.sum(similarities_to_customer)
  predicted_rating = np.dot(ratings_given_to_feature, similarities_to_customer)/np.sum(similarities_to_customer)
  predicted_rating = np.dot(ratings_given_to_feature, similarities_to_customer)/np.sum(similarities_to_customer)
  predicted_rating = np.dot(ratings_given_to_feature, similarities_to_customer)/np.sum(similarities_to_customer)
  predicted_rating = np.dot(ratings_given_to_feature, similarities_to_customer)/np.sum(similarities_to_customer)
  predicted_rating = np.dot(ratings_given_to_feature, similarities_to_customer)/np.sum(similarit

In [540]:
# Create a DataFrame
customer_product_prediction_df = pd.DataFrame({'product_id': range(1, num_product+1), 'customer_predicted_rating_per_feature': customer_predicted_rating_per_feature})

In [541]:
customer_product_prediction_df

Unnamed: 0,product_id,customer_predicted_rating_per_feature
0,1,3.009094
1,2,3.433333
2,3,
3,4,3.350000
4,5,
...,...,...
160,161,
161,162,
162,163,
163,164,3.700000


In [548]:
# Sort the DataFrame by 'user_predicted_rating_per_feature' in ascending order
sorted_df = customer_product_prediction_df.sort_values(by='customer_predicted_rating_per_feature', ascending = False)
sorted_df.head(5)

Unnamed: 0,product_id,customer_predicted_rating_per_feature
81,82,4.9
101,102,4.9
61,62,4.9
25,26,4.8
140,141,4.8


## 3) Similar Items

What makes two items similar? As with measuring if users are similar there are many ways of measuring item similarity. For our simple method to find the similarity of two items, $x$ and $y$, we look at every user who reviewed both items. For example let us consider the items in columns 15 and 18.

In [549]:
def find_item_similarity(item_1, item_2, R_df):
    
    # Define the mask which finds users who rated both items
    users_who_reviewed_1st_item = R_df.loc[:, item_1].notna()
    users_who_reviewed_2nd_item = R_df.loc[:, item_2].notna()

    users_who_reviewed_both_items = users_who_reviewed_1st_item & users_who_reviewed_2nd_item
    
    # Find how many users rated both items
    number_of_users = users_who_reviewed_both_items.sum()
    
    # Check if there are no features rated together
    if number_of_users == 0:
        return 0, 0  # Return 0 similarity and 0 rated features
    
    # Find the ratings of both users for movies they both watched
    ratings_of_item1 = R_df.loc[users_who_reviewed_both_items, item_1].values.reshape(1, -1)
    ratings_of_item2 = R_df.loc[users_who_reviewed_both_items, item_2].values.reshape(1, -1)
    
    # Finally, calculate the similarity between them
    similarity = cosine_similarity(ratings_of_item1, ratings_of_item2)[0][0]
    
    return similarity, number_of_users

In [550]:
customer= 1 # we will only do this to customer 1 for demonstration purposes
item_predicted_rating=[]

for item in range(num_product):
    current_item = item
    
    similarities_to_item = []
    ratings_given_by_customer = []

    # Find only the items rated by customer 
    R_df3 = R_df.loc[:,~R_df.iloc[customer-1, :].isna()].copy()

    for other_item in R_df3.columns:

        similarity, number_of_customer_rated = find_item_similarity(current_item+1, other_item, R_df)
        similarities_to_item.append(similarity)
        ratings_given_by_customer.append(R_df.loc[customer, other_item])

    # Finally, let's turn these into numpy arrays so life is easier
    similarities_to_item = np.array(similarities_to_item)
    ratings_given_by_customer = np.array(ratings_given_by_customer)
    
    predicted_rating = np.dot(ratings_given_by_customer, similarities_to_item)/np.sum(similarities_to_item)
    item_predicted_rating.append(predicted_rating)

  predicted_rating = np.dot(ratings_given_by_customer, similarities_to_item)/np.sum(similarities_to_item)
  predicted_rating = np.dot(ratings_given_by_customer, similarities_to_item)/np.sum(similarities_to_item)
  predicted_rating = np.dot(ratings_given_by_customer, similarities_to_item)/np.sum(similarities_to_item)
  predicted_rating = np.dot(ratings_given_by_customer, similarities_to_item)/np.sum(similarities_to_item)
  predicted_rating = np.dot(ratings_given_by_customer, similarities_to_item)/np.sum(similarities_to_item)
  predicted_rating = np.dot(ratings_given_by_customer, similarities_to_item)/np.sum(similarities_to_item)
  predicted_rating = np.dot(ratings_given_by_customer, similarities_to_item)/np.sum(similarities_to_item)
  predicted_rating = np.dot(ratings_given_by_customer, similarities_to_item)/np.sum(similarities_to_item)
  predicted_rating = np.dot(ratings_given_by_customer, similarities_to_item)/np.sum(similarities_to_item)
  predicted_rating = np.dot(ratings_given_by_c

In [551]:
# Create a DataFrame
item_customer_prediction_df = pd.DataFrame({'product_id': range(1, num_product+1),  'item_predicted_rating': item_predicted_rating})

In [554]:
item_customer_prediction_df.sort_values(by='item_predicted_rating', ascending = False)

Unnamed: 0,product_id,item_predicted_rating
6,7,2.4
60,61,2.4
77,78,2.4
78,79,2.4
89,90,2.4
...,...,...
160,161,
161,162,
162,163,
163,164,


In [553]:
# Sort the DataFrame by 'user_predicted_rating_per_feature' in ascending order
sorted2_df = item_customer_prediction_df.sort_values(by='item_predicted_rating', ascending = False)
sorted2_df.head(5)

Unnamed: 0,product_id,item_predicted_rating
6,7,2.4
60,61,2.4
77,78,2.4
78,79,2.4
89,90,2.4


## i) Appendix in case we need this in the future

In [None]:
def find_item_similarity(item_1, item_2, R_df):
    
    # Define the mask which finds users who rated both items
    users_who_reviewed_1st_item = R_df.loc[:, item_1].notna()
    users_who_reviewed_2nd_item = R_df.loc[:, item_2].notna()

    users_who_reviewed_both_items = users_who_reviewed_1st_item & users_who_reviewed_2nd_item
    
    # Find how many users rated both items
    number_of_users = users_who_reviewed_both_items.sum()
    
    # Find the ratings of both users for movies they both watched
    ratings_of_item1 = R_df.loc[users_who_reviewed_both_items, item_1].values.reshape(1, -1)
    ratings_of_item2 = R_df.loc[users_who_reviewed_both_items, item_2].values.reshape(1, -1)
    
    # Finally, calculate the similarity between them
    similarity = cosine_similarity(ratings_of_item1, ratings_of_item2)[0][0]
    
    return similarity, number_of_users

In [None]:
current_item = 1 # we will only do this to customer 1 for demonstration purposes
item_predicted_rating_per_customer=[]

for customer in range(num_customer):
    current_customer = customer
    
    similarities_to_item = []
    ratings_given_by_customer = []

    # Find only the items rated by customer 
    R_df3 = R_df.loc[:,~R_df.iloc[customer, :].isna()].copy()

    for other_item in R_df3.columns:

        similarity, number_of_customer_rated = find_item_similarity(current_item, other_item, R_df)
        similarities_to_item.append(similarity)
        ratings_given_by_customer.append(R_df.loc[current_customer+1, other_item])

    # Finally, let's turn these into numpy arrays so life is easier
    similarities_to_item = np.array(similarities_to_item)
    ratings_given_by_customer = np.array(ratings_given_by_customer)
    
    predicted_rating = np.dot(ratings_given_by_customer, similarities_to_item)/np.sum(similarities_to_item)
    item_predicted_rating_per_customer.append(predicted_rating)

In [None]:
# Create a DataFrame
item_customer_prediction_df = pd.DataFrame({'customer_id': range(1, num_customer + 1), 'item_predicted_rating_per_customer': item_predicted_rating_per_customer})

In [None]:
item_customer_prediction_df