### Loading Libraries

In [1]:
import pandas as pd
import numpy as np
from scipy.sparse.linalg import svds
import matplotlib.pyplot as plt
import seaborn as sns

### Data Input

In [2]:
data = pd.read_csv("reviews.csv")
data.head()

Unnamed: 0,Reviewer,Recommend,Stars,Date,Product,Category,Date Scrape,Url
0,Ayuika,No,3,5 days ago,Perfect 3D Gel,Night Cream,2020-02-24 13:35:12.875052,https://reviews.femaledaily.com/products/moist...
1,yustinalupit,Yes,4,16 Feb 2020,Perfect 3D Gel,Night Cream,2020-02-24 13:35:12.876053,https://reviews.femaledaily.com/products/moist...
2,evikdanny,No,3,13 Feb 2020,Perfect 3D Gel,Night Cream,2020-02-24 13:35:12.878054,https://reviews.femaledaily.com/products/moist...
3,daniskhoirun,Yes,3,08 Feb 2020,Perfect 3D Gel,Night Cream,2020-02-24 13:35:12.880038,https://reviews.femaledaily.com/products/moist...
4,hulahup19,Yes,5,08 Feb 2020,Perfect 3D Gel,Night Cream,2020-02-24 13:35:12.882056,https://reviews.femaledaily.com/products/moist...


### Checking for missing values

In [3]:
data.isna().sum()

Reviewer       0
Recommend      0
Stars          0
Date           0
Product        0
Category       0
Date Scrape    0
Url            0
dtype: int64

### Checking for duplicated values

In [4]:
data.duplicated().sum()

0

In [5]:
id_count = pd.crosstab(index=data.Reviewer,columns='count').sort_values(by='count',ascending=True)
id_count

col_0,count
Reviewer,Unnamed: 1_level_1
Tipee,2
celindriana,2
muna19,2
murinaarg,2
murti123,2
...,...
fitput,102
SathyaTika,103
mailaul,107
rency,113


In [6]:
name_r = id_count[id_count['count']>1]
name_u = name_r.index.to_list()
data = data[data.Reviewer.isin(name_u)]

### dropping unused columns

In [7]:
data.drop(['Recommend','Date','Date Scrape','Url','Category'], axis=1, inplace=True)
data.rename(columns={'Reviewer':'reviewer','Product':'product','Stars':'rating'}, inplace=True)
data = (data[~(data['reviewer'] == ' ')])
data.head()

Unnamed: 0,reviewer,rating,product
0,Ayuika,3,Perfect 3D Gel
1,yustinalupit,4,Perfect 3D Gel
2,evikdanny,3,Perfect 3D Gel
3,daniskhoirun,3,Perfect 3D Gel
4,hulahup19,5,Perfect 3D Gel


### Creating matrix pivot

In [8]:
matrix_pivot = pd.pivot_table(data,values='rating',index='reviewer',columns='product').fillna(0)
matrix_pivot.head()

product,"0,2 mm Therapy Air Mask Sheet",10% Lactic Acid (Infused with Rose),100% Cold-Pressed Virgin Marula Oil,100% Organic Cold-Pressed Borage Seed Oil,100% Organic Cold-Pressed Rose Hip Seed Oil,100% Plant-Derived Squalane,100% Pure Camellia Dry Oil,100% Snail mucus Soothing Gel,14% AHA Swipes,2 Minutes Brightening Gel,...,wardah seaweed intensive night cream,wardah suncare,watermelon glow,white beauty aqua facial wash with plant extract,whitening plus complex,whitening soap,wonder pore,zaitun oil,“Buffet”,“B” oil
reviewer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
01lely,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
01putrisalma,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
01sary,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
123hayoapa,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15ayusafitri,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


### Normalizing rating values

In [9]:
matrix_pivot_ = matrix_pivot.values
user_ratings_mean = np.mean(matrix_pivot_, axis = 1)
user_rating = matrix_pivot_ - user_ratings_mean.reshape(-1,1)

### Singular Value Decomposition

In [10]:
from scipy.sparse.linalg import svds
U, sigma, Vt = svds(user_rating, k = 50)

In [11]:
sigma = np.diag(sigma)

### Predictive rating

In [12]:
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1)

In [13]:
preds_df = pd.DataFrame(all_user_predicted_ratings, columns = matrix_pivot.columns, index=matrix_pivot.index)
preds_df

product,"0,2 mm Therapy Air Mask Sheet",10% Lactic Acid (Infused with Rose),100% Cold-Pressed Virgin Marula Oil,100% Organic Cold-Pressed Borage Seed Oil,100% Organic Cold-Pressed Rose Hip Seed Oil,100% Plant-Derived Squalane,100% Pure Camellia Dry Oil,100% Snail mucus Soothing Gel,14% AHA Swipes,2 Minutes Brightening Gel,...,wardah seaweed intensive night cream,wardah suncare,watermelon glow,white beauty aqua facial wash with plant extract,whitening plus complex,whitening soap,wonder pore,zaitun oil,“Buffet”,“B” oil
reviewer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
01lely,-0.033156,-0.004630,-0.003819,-0.009635,0.049237,0.014808,-0.006243,0.008375,-0.006995,-0.016372,...,0.051089,0.095433,-0.023493,-0.006377,-0.011017,-0.006941,-0.016992,-0.016342,0.024577,-4.931434e-03
01putrisalma,0.010625,0.000792,0.002254,0.000406,0.019684,0.011667,-0.000244,-0.000047,-0.000289,0.007549,...,-0.005508,-0.006456,0.005506,0.000164,-0.000872,-0.002055,-0.000035,0.005095,0.025665,7.925640e-04
01sary,0.001551,-0.001476,-0.000705,0.000370,0.005365,-0.010139,-0.000186,0.000662,-0.001281,-0.000038,...,-0.001220,0.000716,-0.003138,-0.000447,0.000854,-0.005362,0.007784,-0.002074,0.006341,-1.701105e-03
123hayoapa,0.015527,0.003253,-0.000977,0.002198,0.016848,0.000153,0.001836,0.003370,0.003455,0.015547,...,0.001470,-0.005247,0.002946,-0.006080,-0.002993,-0.000281,-0.008534,0.002337,0.013906,5.204980e-07
15ayusafitri,0.003480,0.002537,0.003241,0.002244,0.001346,-0.003409,0.002856,0.002452,0.002358,-0.000510,...,0.005033,0.011756,0.001465,0.007603,0.004751,0.015957,0.006410,0.011011,-0.009111,1.148843e-03
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zvnazole,-0.005587,0.001457,0.004361,0.001318,0.015749,0.006207,0.001661,0.001480,0.000950,0.001608,...,-0.000520,0.006899,0.002039,0.004319,0.001492,0.009487,-0.003006,0.007397,0.005029,3.074178e-03
zyshalu,-0.013294,-0.001248,-0.001276,-0.002949,0.003175,-0.008640,-0.002109,0.004431,-0.002174,-0.007665,...,0.011587,0.027956,-0.003444,0.000228,-0.004142,-0.005065,0.012997,0.002577,0.011154,2.718208e-04
zzarahs,-0.003167,0.001298,0.004880,0.002201,0.007945,0.011677,0.001442,0.003583,0.000730,0.001116,...,0.001149,0.000714,0.003517,0.002938,0.002578,-0.000124,0.005001,0.000291,0.017409,4.559316e-03
zzfatimah,0.002338,0.005823,0.013274,0.007428,0.037032,0.034630,0.004491,0.010501,0.004569,0.012420,...,0.005850,0.001047,0.008600,0.005341,0.004740,0.005966,-0.000579,0.006010,0.059716,1.161053e-02


### Creating recommendation

In [14]:
 def recommend_product(predictions_df, user, data_,num_recommendations):
   
    user_row_number = user
    sorted_user_predictions = preds_df.loc[user_row_number].sort_values(ascending=False)

    user_data = data_[data_.reviewer == (user)]
    user_full = user_data


    print('User {0} has already rated {1} product'.format(user, user_full.shape[0]))

    a = data.drop_duplicates(subset='product', keep='last')
    recommendations = (a[~a['product'].isin(user_full['product'])].
         merge(pd.DataFrame(sorted_user_predictions).reset_index(), how = 'left',
               left_on = 'product',
               right_on = 'product').
         rename(columns = {user_row_number: 'Predictions'}).
         sort_values('Predictions', ascending = False).
                       iloc[:num_recommendations, :-1]
                      )

    return user_full, recommendations

In [15]:
user = str(input("Enter the user id to whom you want to recommend : "))
already_rated, predictions = recommend_product(preds_df, user, data,5)

Enter the user id to whom you want to recommend : zzulia
User zzulia has already rated 3 product


In [16]:
already_rated

Unnamed: 0,reviewer,rating,product
70013,zzulia,3,Facial Mask
88179,zzulia,2,Facial Mask
115840,zzulia,5,Pembersih Two In One Bengkoang Whitening


In [18]:
prod_pred = predictions['product']
prod_pred

2040    Prominent Essence Facial Mask
2225         Facial Mask Bedak Dingin
1988                 Oil Control Mask
2000      White Aqua Serum Sheet Mask
1661                Essential Vitamin
Name: product, dtype: object