In [1]:
#import necessary libraries and load the dataset

import pandas as pd
import numpy as np

rating = pd.read_csv(r"C:\Users\hp\Downloads\cloths-rating.csv")
rating.head()

Unnamed: 0,ProductID,UserID,Rating,Text
0,777,AV1YnR7wglJLPUi8IJmi,4,Great taffy at a great price.
1,767,AVpfpK8KLJeJML43BCuD,4,Absolutely wonderful - silky and sexy and comf...
2,1080,AVqkIdntQMlgsOJE6fuB,5,Love this dress! it's sooo pretty.
3,1077,AVpfpK8KLJeJML43BCuD,3,I had such high hopes for this dress and reall...
4,1049,AVpfpK8KLJeJML43BCuD,5,"I love, love, love this jumpsuit. it's fun, fl..."


In [2]:
print(rating["UserID"].nunique())
print(rating["ProductID"].nunique())

42
27


In [3]:
rating.isnull().sum()

ProductID    0
UserID       0
Rating       0
Text         0
dtype: int64

In [4]:
# give sentiment score to text based on text and create new column for sentiment score in dataset

from textblob import TextBlob

def sentiment_score(text):
    try:
        return TextBlob(str(text)).sentiment.polarity
    except:
        return None

rating["Sentiment"] = rating["Text"].apply(sentiment_score)
rating.head()

Unnamed: 0,ProductID,UserID,Rating,Text,Sentiment
0,777,AV1YnR7wglJLPUi8IJmi,4,Great taffy at a great price.,0.8
1,767,AVpfpK8KLJeJML43BCuD,4,Absolutely wonderful - silky and sexy and comf...,0.633333
2,1080,AVqkIdntQMlgsOJE6fuB,5,Love this dress! it's sooo pretty.,0.4375
3,1077,AVpfpK8KLJeJML43BCuD,3,I had such high hopes for this dress and reall...,0.12
4,1049,AVpfpK8KLJeJML43BCuD,5,"I love, love, love this jumpsuit. it's fun, fl...",0.55


In [5]:
# apply multiplication between ratings and sentiment

rating["updated_score"] = rating["Rating"]*rating["Sentiment"]
rating.head()

Unnamed: 0,ProductID,UserID,Rating,Text,Sentiment,updated_score
0,777,AV1YnR7wglJLPUi8IJmi,4,Great taffy at a great price.,0.8,3.2
1,767,AVpfpK8KLJeJML43BCuD,4,Absolutely wonderful - silky and sexy and comf...,0.633333,2.533333
2,1080,AVqkIdntQMlgsOJE6fuB,5,Love this dress! it's sooo pretty.,0.4375,2.1875
3,1077,AVpfpK8KLJeJML43BCuD,3,I had such high hopes for this dress and reall...,0.12,0.36
4,1049,AVpfpK8KLJeJML43BCuD,5,"I love, love, love this jumpsuit. it's fun, fl...",0.55,2.75


In [6]:
# make userid into normal form

from sklearn import preprocessing
le = preprocessing.LabelEncoder()
rating["UserID"] = le.fit_transform(rating["UserID"])
rating.head()

Unnamed: 0,ProductID,UserID,Rating,Text,Sentiment,updated_score
0,777,0,4,Great taffy at a great price.,0.8,3.2
1,767,3,4,Absolutely wonderful - silky and sexy and comf...,0.633333,2.533333
2,1080,13,5,Love this dress! it's sooo pretty.,0.4375,2.1875
3,1077,3,3,I had such high hopes for this dress and reall...,0.12,0.36
4,1049,3,5,"I love, love, love this jumpsuit. it's fun, fl...",0.55,2.75


In [7]:
# make function to classify updated score

class_dict = {"1": [-5.0, -4.75, -4.5, 4.25, -4, -3.75, -3.5, -3.25],
       "2": [-3, -2.75, -2.5, -2.25, -2, -1.75, -1.5, -1.25],
       "3": [-1, -0.75, -0.5, -0.25, 0, 0.25, 0.5, 0.75],
       "4":[1, 1.25, 1.5, 1.75, 2, 2.25, 2.5, 2.75],
       "5": [3, 3.25, 3.5, 3.75, 4, 4.25, 4.5, 4.75, 5]}

def fun(val):
    for i in class_dict:
        if val >= class_dict[i][0] and val <= class_dict[i][-1]:
            return int(i)

print(fun(4))

5


In [8]:
# apply function to updated score and create new column new score

rating["new_score"] = rating["updated_score"].apply(fun)
rating["new_score"] = pd.to_numeric(rating["new_score"])
rating.head()

Unnamed: 0,ProductID,UserID,Rating,Text,Sentiment,updated_score,new_score
0,777,0,4,Great taffy at a great price.,0.8,3.2,5.0
1,767,3,4,Absolutely wonderful - silky and sexy and comf...,0.633333,2.533333,4.0
2,1080,13,5,Love this dress! it's sooo pretty.,0.4375,2.1875,4.0
3,1077,3,3,I had such high hopes for this dress and reall...,0.12,0.36,3.0
4,1049,3,5,"I love, love, love this jumpsuit. it's fun, fl...",0.55,2.75,4.0


In [9]:
# Pivot table of ProductID, UserID and New_score

rating_pivot = pd.pivot_table(rating, index = "ProductID", columns = "UserID", values = "new_score").fillna(0)
rating_pivot

UserID,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
ProductID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
89,0.0,0.0,0.0,4.0,0.0,0.0,5.0,3.0,3.0,0.0,...,0.0,4.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,4.25
333,0.0,3.5,0.0,0.0,0.0,0.0,0.0,0.0,4.666667,3.0,...,0.0,2.0,3.666667,0.0,0.0,3.333333,0.0,0.0,0.0,0.0
369,5.0,3.0,0.0,4.0,0.0,4.0,5.0,4.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,2.666667,0.0,5.0,0.0
444,3.0,3.0,0.0,0.0,3.0,0.0,4.0,5.0,3.0,0.0,...,4.0,0.0,4.0,0.0,0.0,3.666667,0.0,0.0,4.333333,0.0
684,0.0,0.0,4.0,4.5,3.0,0.0,3.0,4.0,3.0,0.0,...,4.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,3.666667
697,0.0,3.0,3.0,0.0,0.0,0.0,5.0,0.0,3.0,3.0,...,5.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
767,3.0,0.0,0.0,4.0,0.0,0.0,3.0,0.0,0.0,0.0,...,0.0,3.0,0.0,0.0,4.0,0.0,3.0,3.0,0.0,0.0
777,5.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,...,0.0,3.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0
823,0.0,0.0,0.0,5.0,0.0,0.0,5.0,3.0,0.0,0.0,...,0.0,5.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,3.0
847,0.0,0.0,0.0,4.0,0.0,0.0,4.0,4.0,5.0,0.0,...,0.0,4.0,0.0,0.0,5.0,0.0,0.0,4.0,0.0,3.0


In [10]:
# sparse and csr matrix

from scipy.sparse import csr_matrix

In [11]:
rating_pivot_matrix = csr_matrix(rating_pivot.values)
print(rating_pivot_matrix)

  (0, 3)	4.0
  (0, 6)	5.0
  (0, 7)	3.0
  (0, 8)	3.0
  (0, 11)	4.0
  (0, 16)	4.0
  (0, 18)	4.25
  (0, 20)	3.0
  (0, 22)	5.0
  (0, 28)	4.0
  (0, 33)	4.0
  (0, 37)	4.0
  (0, 41)	4.25
  (1, 1)	3.5
  (1, 8)	4.666666666666667
  (1, 9)	3.0
  (1, 10)	3.0
  (1, 13)	3.0
  (1, 17)	3.0
  (1, 22)	5.0
  (1, 25)	3.25
  (1, 29)	3.5
  (1, 33)	2.0
  (1, 34)	3.6666666666666665
  (1, 37)	3.3333333333333335
  :	:
  (25, 17)	5.0
  (25, 21)	3.0
  (25, 22)	3.0
  (25, 25)	3.3333333333333335
  (25, 30)	4.333333333333333
  (25, 38)	3.6666666666666665
  (25, 40)	3.0
  (26, 0)	3.0
  (26, 1)	4.0
  (26, 3)	4.0
  (26, 6)	3.0
  (26, 7)	4.0
  (26, 8)	3.0
  (26, 9)	3.0
  (26, 11)	4.0
  (26, 16)	4.0
  (26, 17)	3.4
  (26, 18)	4.0
  (26, 19)	3.0
  (26, 22)	3.0
  (26, 23)	3.0
  (26, 25)	4.333333333333333
  (26, 28)	4.0
  (26, 33)	5.0
  (26, 37)	3.25


In [12]:
# fitting data into NearestNeighbor model

from sklearn.neighbors import NearestNeighbors

model_knn = NearestNeighbors(metric = "cosine", n_neighbors = 20, radius = 1)
model_knn.fit(rating_pivot_matrix)

NearestNeighbors(metric='cosine', n_neighbors=20, radius=1)

In [13]:
# matrix of cosine similarity

from sklearn.metrics.pairwise import cosine_similarity
similarity_matrix = cosine_similarity(rating_pivot)
similarity_matrix

array([[1.        , 0.34481804, 0.33609835, 0.45307418, 0.66588781,
        0.41508482, 0.37222129, 0.39188913, 0.77623954, 0.42303009,
        0.23668506, 0.34270171, 0.51789441, 0.40491548, 0.2962993 ,
        0.09845251, 0.30352758, 0.10696092, 0.21098881, 0.15835598,
        0.43972422, 0.39507832, 0.19065099, 0.        , 0.56647747,
        0.17754288, 0.7351269 ],
       [0.34481804, 1.        , 0.35080221, 0.41221865, 0.24502557,
        0.54218653, 0.24715382, 0.04202322, 0.22597343, 0.26207695,
        0.11910935, 0.30053142, 0.45362555, 0.54466372, 0.31546149,
        0.25233658, 0.16516564, 0.11185922, 0.37405616, 0.        ,
        0.32042446, 0.25854048, 0.24376259, 0.        , 0.4892192 ,
        0.42035471, 0.51714313],
       [0.33609835, 0.35080221, 1.        , 0.49029491, 0.27574146,
        0.35941285, 0.53389146, 0.40623879, 0.34368008, 0.42288594,
        0.14392888, 0.27490441, 0.31820725, 0.19772705, 0.26771578,
        0.34757433, 0.10691904, 0.45699659, 0.3648

In [14]:
product_Id = int(input("Enter product ID according to dataset: "))
data = list(rating_pivot.index) #shows the productId list inside the data
print(data) 

Enter product ID according to dataset: 777
[89, 333, 369, 444, 684, 697, 767, 777, 823, 847, 853, 858, 862, 910, 949, 1002, 1003, 1049, 1060, 1065, 1077, 1080, 1095, 1120, 6969, 8001, 9696]


In [15]:
query_index = data.index(product_Id) # shows index of product id given by user.
print(query_index)

7


In [16]:
similarity, indices = model_knn.kneighbors(rating_pivot.iloc[query_index].values.reshape(1, -1), n_neighbors = 8)
print(similarity) # calculate distance through other productid to the id given by user
print()
print(indices) # print the index of product id given by user

[[0.         0.38617007 0.5235722  0.56637357 0.57412872 0.59376121
  0.59584587 0.60811087]]

[[ 7  6 21  3 26  2 20  0]]


In [17]:

# create dictionary and store the recommendation given by model to the user

data_dict = {}
for i in range(0, len(similarity.flatten())):
    if i == 0:
        print("Recommendation for {} is: \n".format(rating_pivot.index[query_index]))
    else:
        data_dict[str(rating_pivot.index[indices.flatten()[i]])] = float(similarity.flatten()[i])
        print(f"{rating_pivot.index[indices.flatten()[i]]}, is similarity distance = with {similarity.flatten()[i]:.20f}")
print()
print(data_dict)

Recommendation for 777 is: 

767, is similarity distance = with 0.38617006952374388629
1080, is similarity distance = with 0.52357220255218694938
444, is similarity distance = with 0.56637356650542436398
9696, is similarity distance = with 0.57412872258472391618
369, is similarity distance = with 0.59376121029888251357
1077, is similarity distance = with 0.59584586854940713163
89, is similarity distance = with 0.60811087118048101630

{'767': 0.3861700695237439, '1080': 0.523572202552187, '444': 0.5663735665054244, '9696': 0.5741287225847239, '369': 0.5937612102988825, '1077': 0.5958458685494071, '89': 0.608110871180481}
