# PRODUCT RECOMMENDATION SYSTEM

#### Product based system for new customers

In [None]:
import numpy as np
import pandas as pd

In [None]:
# read the data from csv file using pandas

In [None]:
dataset = pd.read_csv('Musical_instruments_reviews.csv')
dataset.head(10)

In [None]:
#check the shape of dataset
dataset.shape

In [None]:
#describe the datset
dataset.describe()

In [None]:
#clean the dataset
dataset.isnull()

In [None]:
dataset.dropna(inplace=True)

In [None]:
dataset.shape

In [None]:
#simple scatterplot
import matplotlib.pyplot as plt
plt.style.use("ggplot")
plt.scatter(dataset['asin'], dataset['overall'], c="r", alpha=0.5, marker=r'o')
plt.xlabel("ASIN")
plt.ylabel("RATINGS")
plt.show()

In [None]:
#plot histogram of  overall ratings

In [None]:
dataset['overall'].hist()

In [None]:
#get and group popular products
popular_products = pd.DataFrame(dataset.groupby('asin')['overall'].count())
most_popular = popular_products.sort_values('overall', ascending=False)
most_popular.head(10)

In [None]:
#The below graph gives us the most popular products (arranged in descending order) sold by the business
most_popular.head(30).plot(kind = "bar")

#### system based on customer's purchase history and ratings provided by other users who bought items similar items

In [None]:
ratings = dataset

In [None]:
# building a utility matrix for recommendation system
ratings_utility_matrix = ratings.pivot_table(values='overall', index='reviewerID', columns='asin')
ratings_utility_matrix.head()

In [None]:
ratings_utility_matrix = ratings_utility_matrix.dropna(thresh=10, axis=1).fillna(0)
ratings_utility_matrix.head()

In [None]:
#get the shape of utility matrix
ratings_utility_matrix.shape

In [None]:
#transpose of the utility matrix
X = ratings_utility_matrix.T
X.head()

In [None]:
X.shape

In [None]:
#Unique products in subset of data
X1 = X

In [None]:
import sklearn
from sklearn.decomposition import TruncatedSVD

In [None]:
#decompose the matrix
SVD = TruncatedSVD(n_components=10)
decomposed_matrix = SVD.fit_transform(X)
decomposed_matrix.shape

In [None]:
#correlation matrix
correlation_matrix = np.corrcoef(decomposed_matrix)
correlation_matrix.shape

In [None]:
# isolate a product from matrix
X.index[50]

In [None]:
#index of asin purchased by customer
i = input("Enter product id:")
product_names = list(X.index)
product_ID = product_names.index(i)
product_ID

In [None]:
#Correlation for all items with the item purchased by this customer 
#based on items rated by other customers people who bought the same product
correlation_product_ID = correlation_matrix[product_ID]
correlation_product_ID.shape

In [None]:
# recommend top 10 product in sequence
Recommend = list(X.index[correlation_product_ID > 0.90])
print("The recommendations are:")
# Removes the item already bought by the customer
Recommend.remove(i) 
Recommend[0:9]

#### product recommendations can be based on textual clustering analysis given in product description

In [None]:
# Importing libraries
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score

In [None]:
#get a product description having product id and review text
df=pd.DataFrame(ratings)
product_descriptions=df.drop(['reviewerID', 'reviewerName','helpful','overall','summary','unixReviewTime','reviewTime'],axis=1)

In [None]:
product_descriptions.shape

In [None]:
#clean the dataset
product_descriptions = product_descriptions.dropna()
print(product_descriptions.shape)
product_descriptions.head()

In [None]:
product_descriptions1 = product_descriptions.head(500)
# product_descriptions1.iloc[:,1]

product_descriptions1["reviewText"].head(10)

In [None]:
#Converting the text in product description into numerical data for analysis
vectorizer = TfidfVectorizer(stop_words='english')
X1 = vectorizer.fit_transform(product_descriptions1["reviewText"])
X1

In [None]:
#Visualizing product clusters in subset of data
# Fitting K-Means to the dataset
X=X1
kmeans = KMeans(n_clusters = 10, init = 'k-means++')
y_kmeans = kmeans.fit_predict(X)
plt.plot(y_kmeans, "^")
plt.show()

In [None]:
def print_cluster(i):
    print("Cluster %d:" % i),
    for ind in order_centroids[i, :10]:
        print(' %s' % terms[ind]),
    print

In [None]:
#Recommendation of product based on the current product selected by user.
#To recommend related product based on, Frequently bought together.

In [None]:
# # Optimal clusters is 

true_k = 10

model = KMeans(n_clusters=true_k, init='k-means++', max_iter=100, n_init=1)
model.fit(X1)

print("Top terms per cluster:")
order_centroids = model.cluster_centers_.argsort()[:, ::-1]
terms = vectorizer.get_feature_names()
for i in range(true_k):
    print_cluster(i)

In [None]:
#Predicting clusters based on key search words

def show_recommendations(product):
    #print("Cluster ID:")
    Y = vectorizer.transform([product])
    prediction = model.predict(Y)
    print_cluster(prediction[0])

In [None]:
show_recommendations("tabla")

In [None]:
show_recommendations("water")

In [None]:
show_recommendations("piano")

In [None]:
show_recommendations("keyboard")

In [None]:
show_recommendations("pedal")

In [None]:
show_recommendations("drum")

In [None]:
show_recommendations("pop")

In [None]:
show_recommendations("xlr")

In [None]:
show_recommendations("guitar")