# Recommender System


### Project Description:
Using a sample dataset of 20,000 products from an e-commerce store, create a system that when given a specific product, will recommend similar products for the user.
### Model Used:
KNearest Neighbour used to recommend closest neigbours to specific product.
### Date:
October 2020
### Author:
Marek Brzozowki

In [1]:
#Importing packages and libraries
import numpy as np
import pandas as pd
import sklearn

import re
import nltk
import random

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors


In [2]:
def lower_all(dataframe):
    ### Converts all characters to lowercase
    return dataframe.applymap(lambda s:s.lower() if type(s) == str else s)

In [4]:
def remove_special(dataframe):
    ### Keeps only spacing, letters and numbers
    for column in dataframe:
        dataframe[column] = dataframe[column].str.strip()
        
        # Removes possession term
        dataframe[column] = dataframe[column].str.replace("'s",'')

        #Keeps all alpha-numeric and spacing terms
        dataframe[column] = dataframe[column].str.replace(r"[^a-zA-Z\d\_]+", " ") 

        #Removes extras term in the last column
        dataframe[column] = dataframe[column].str.replace("product_specification","")     

    return dataframe

In [5]:
def flatten(x):
    ### Flattens nested loops
    result = []
    for i in x:
        if hasattr(i, "__iter__") and not isinstance(i, str):
            result.extend(flatten(i))
        else:
            result.append(i)
    return result

In [3]:
def Convert(string):
    ### Converts string to array 
    li = list(string.split(" ")) 
    return li

In [33]:
def random_item(length):
    return (random.randint(0, length))

In [6]:
# Loading raw data into editor
raw_data = pd.read_csv('flipkart_com-ecommerce_sample.csv')
raw_data.head(2)

Unnamed: 0,uniq_id,crawl_timestamp,product_url,product_name,product_category_tree,pid,retail_price,discounted_price,image,is_FK_Advantage_product,description,product_rating,overall_rating,brand,product_specifications
0,c2d766ca982eca8304150849735ffef9,2016-03-25 22:59:23 +0000,http://www.flipkart.com/alisha-solid-women-s-c...,Alisha Solid Women's Cycling Shorts,"[""Clothing >> Women's Clothing >> Lingerie, Sl...",SRTEH2FF9KEDEFGF,999.0,379.0,"[""http://img5a.flixcart.com/image/short/u/4/a/...",False,Key Features of Alisha Solid Women's Cycling S...,No rating available,No rating available,Alisha,"{""product_specification""=>[{""key""=>""Number of ..."
1,7f7036a6d550aaa89d34c77bd39a5e48,2016-03-25 22:59:23 +0000,http://www.flipkart.com/fabhomedecor-fabric-do...,FabHomeDecor Fabric Double Sofa Bed,"[""Furniture >> Living Room Furniture >> Sofa B...",SBEEH3QGU7MFYJFY,32157.0,22646.0,"[""http://img6a.flixcart.com/image/sofa-bed/j/f...",False,FabHomeDecor Fabric Double Sofa Bed (Finish Co...,No rating available,No rating available,FabHomeDecor,"{""product_specification""=>[{""key""=>""Installati..."


In [7]:
# Data profiling
print(raw_data.dtypes,end='\n\n')
print(raw_data.isnull().sum(),end='\n\n')
print(raw_data.describe(),end='\n\n')
print(raw_data.columns)

uniq_id                     object
crawl_timestamp             object
product_url                 object
product_name                object
product_category_tree       object
pid                         object
retail_price               float64
discounted_price           float64
image                       object
is_FK_Advantage_product       bool
description                 object
product_rating              object
overall_rating              object
brand                       object
product_specifications      object
dtype: object

uniq_id                       0
crawl_timestamp               0
product_url                   0
product_name                  0
product_category_tree         0
pid                           0
retail_price                 78
discounted_price             78
image                         3
is_FK_Advantage_product       0
description                   2
product_rating                0
overall_rating                0
brand                      5864
product_spec

In [8]:
# Removing unneccesary columns
clean_v1 = raw_data.drop(['uniq_id', 'crawl_timestamp', 'product_url',
       'pid', 'retail_price', 'discounted_price',
       'image', 'is_FK_Advantage_product', 'product_rating',
       'overall_rating'],axis =1)
clean_v1.head(2)

Unnamed: 0,product_name,product_category_tree,description,brand,product_specifications
0,Alisha Solid Women's Cycling Shorts,"[""Clothing >> Women's Clothing >> Lingerie, Sl...",Key Features of Alisha Solid Women's Cycling S...,Alisha,"{""product_specification""=>[{""key""=>""Number of ..."
1,FabHomeDecor Fabric Double Sofa Bed,"[""Furniture >> Living Room Furniture >> Sofa B...",FabHomeDecor Fabric Double Sofa Bed (Finish Co...,FabHomeDecor,"{""product_specification""=>[{""key""=>""Installati..."


In [9]:
# Lowercase transformation
clean_v2 = lower_all(clean_v1)
clean_v2.head(2)

Unnamed: 0,product_name,product_category_tree,description,brand,product_specifications
0,alisha solid women's cycling shorts,"[""clothing >> women's clothing >> lingerie, sl...",key features of alisha solid women's cycling s...,alisha,"{""product_specification""=>[{""key""=>""number of ..."
1,fabhomedecor fabric double sofa bed,"[""furniture >> living room furniture >> sofa b...",fabhomedecor fabric double sofa bed (finish co...,fabhomedecor,"{""product_specification""=>[{""key""=>""installati..."


In [10]:
# Special character removal
clean_v3 = remove_special(clean_v2)
clean_v3.head(2)

Unnamed: 0,product_name,product_category_tree,description,brand,product_specifications
0,alisha solid women cycling shorts,clothing women clothing lingerie sleep swimwe...,key features of alisha solid women cycling sho...,alisha,key number of contents in sales package valu...
1,fabhomedecor fabric double sofa bed,furniture living room furniture sofa beds fut...,fabhomedecor fabric double sofa bed finish col...,fabhomedecor,key installation demo details value installa...


In [11]:
# Cleansed data
final_data = clean_v3.copy()

In [12]:
# Tags to be used a dictionary for TFIDF
tags_raw = [item.split() for item in clean_v3['product_category_tree']]
tags = list(set(flatten(tags_raw)))

print('Number of tags used in TFIDF:', len(tags))

Number of tags used in TFIDF: 8015


In [13]:
# Product category array need to transform TFIDF
product_category = clean_v3['product_category_tree'].values
product_category[0:2]

array([' clothing women clothing lingerie sleep swimwear shorts alisha shorts alisha solid women cycling shorts ',
       ' furniture living room furniture sofa beds futons fabhomedecor fabric double sofa bed finish colo '],
      dtype=object)

In [14]:
# Performing Term Frequency-Inverse Document Frequency
tfidf = TfidfVectorizer(stop_words='english',vocabulary=tags)
transform_data = tfidf.fit_transform(product_category)

In [28]:
# Model: KNearest Neighbours
# Calculating 10 nearest neighbours
model_Knn= NearestNeighbors(metric='cosine',algorithm= 'brute', n_neighbors= 10).fit(transform_data)

In [34]:
# Randomly picks one item to put into the recommender model
# Determines the score of all selections.
rand_item =random_item(len(raw_data))
input_text = Convert(final_data.iloc[rand_item]['product_name'])
result_ratings = model_Knn.kneighbors(tfidf.transform(input_text))

In [42]:
# Nearest Neighbour Results
print('Original Item: \n', raw_data.iloc[rand_item]['product_name'],'\n')

for item in result_ratings[1][0]:
    print('Recommended Item')
    print('Product Number:', item)
    print('Brand: ', raw_data.iloc[item]['brand'])
    print('Product Name:', raw_data.iloc[item]['product_name'])
    print('Product Retail Price:',raw_data.iloc[item]['retail_price'], ('(INR)'))
    print('Product URL:', raw_data.iloc[item]['product_url'])
    print('Tags associated with Brand: ', list(set(product_category[item].split(' '))) ,'\n')

Original Item: 
 Favourite BikerZ FBZ 6LED 59 Cast Iron Front Fog Lamp Unit 

Recommended Item
Product Number: 7644
Brand:  Favourite BikerZ
Product Name: Favourite BikerZ 3514 RAD air filter Ionic Air Filters For Hero HF Dawn
Product Retail Price: 999.0 (INR)
Product URL: http://www.flipkart.com/favourite-bikerz-3514-rad-air-filter-ionic-filters-hero-hf-dawn/p/itmehqzykrhuhnar?pid=VAFEHQZYGH5HRJJP
Tags associated with Brand:  ['', 'f', 'filter', 'rad', 'favourite', 'ionic', 'air', 'bikerz', '3514'] 

Recommended Item
Product Number: 15597
Brand:  Favourite BikerZ
Product Name: Favourite BikerZ FBZ WIPER BLADE 05 Windshield Wiper For Maruti Alto
Product Retail Price: 699.0 (INR)
Product URL: http://www.flipkart.com/favourite-bikerz-fbz-wiper-blade-05-windshield-maruti-alto/p/itmegkpgymkzbv8t?pid=CWIEGKPGR69BBHZ8
Tags associated with Brand:  ['', '05', 'spares', 'performance', 'spare', 'favourite', 'fbz', 'automotive', 'wiper', 'wipers', 'w', 'bikerz', 'windshield', 'parts', 'accessorie