# Setting up

In [1]:
# Prepare dependencies
import numpy as np
import scipy.stats as stats
import pandas as pd
import seaborn as sns

import matplotlib
import matplotlib.pyplot as plt

%config InlineBackend.figure_format = 'retina'
%matplotlib inline

plt.style.use('seaborn')

# # ML dependencies
# from sklearn.datasets import make_regression
# from sklearn.datasets import make_s_curve
# from sklearn.linear_model import LinearRegression
# from sklearn.metrics import mean_squared_error, r2_score
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import StandardScaler
# from sklearn.linear_model import LogisticRegression
# from sklearn.neighbors import KNeighborsClassifier

In [7]:
# Import dataset
dfLego = pd.read_csv('lego_cleaned_set.csv')
dfLego.head(3)

Unnamed: 0,setid,number,variant,theme,subtheme,year,name,pieces,price,image_url,owned_by,wanted_by,num_reviews,play_star_rating,prod_desc,review_difficulty,set_name,star_rating,val_star_rating
0,29682,75267,1,Star Wars,The Mandalorian,2020,Mandalorian Battle Pack,102.0,14.99,https://images.brickset.com/sets/images/75267-...,2794,3080,0.0,0.0,Not Available,Not Available,Not Available,0.0,0.0
1,29491,76895,1,Speed Champions,Not Available,2020,Ferrari F8 Tributo,275.0,19.99,https://images.brickset.com/sets/images/76895-...,857,2055,0.0,0.0,Not Available,Not Available,Not Available,0.0,0.0
2,29669,80105,1,Seasonal,Chinese New Year,2020,Chinese New Year Temple Fair,1664.0,119.99,https://images.brickset.com/sets/images/80105-...,983,1830,0.0,0.0,Not Available,Not Available,Not Available,0.0,0.0


# Peeking at the dataset

In [4]:
dfLego.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
setid,1896.0,28049.233122,1071.934247,24893.0,27235.5,27959.5,29055.25,29965.0
variant,1896.0,2.024789,3.441316,0.0,1.0,1.0,1.0,23.0
year,1896.0,2018.205169,0.991039,2017.0,2017.0,2018.0,2019.0,2020.0
pieces,1896.0,291.780063,546.540136,0.0,28.75,120.0,333.0,7541.0
price,1896.0,30.878966,50.399194,0.0,3.99,14.99,39.99,799.99
owned_by,1896.0,2010.380802,2012.589338,0.0,425.0,1282.0,3206.0,15558.0
wanted_by,1896.0,672.995781,621.724365,9.0,245.75,499.5,910.75,5897.0
num_reviews,1896.0,3.216245,12.650265,0.0,0.0,0.0,1.0,367.0
play_star_rating,1896.0,1.173154,1.952983,0.0,0.0,0.0,3.5,5.0
star_rating,1896.0,1.23539,2.028659,0.0,0.0,0.0,4.0,5.0


In [11]:
# Calculate the total rating counts based on set names
setNameRatingCount = dfLego.groupby('set_name')['play_star_rating'].count()
setNameRatingCount = pd.DataFrame(setNameRatingCount).reset_index().rename(columns={'play_star_rating': 'totalRatingCount'})
setNameRatingCount

Unnamed: 0,set_name,totalRatingCount
0,1968 Ford Mustang Fastback,1
1,2016 Ford GT & 1966 Ford GT40,1
2,4 x 4 Response Unit,1
3,6x6 All Terrain Tow Truck,1
4,A-Wing™ vs. TIE Silencer™ Microfighters,1
...,...,...
510,Yoda's Hut,1
511,Yoda's Jedi Starfighter™,1
512,Zane - Spinjitzu Master,1
513,Zane's Ninja Boat Pursuit,1


In [12]:
# Merge total rating count to 'dfLego'
dfLego = pd.merge(dfLego, setNameRatingCount, how='left', on='set_name')
dfLego.head()

Unnamed: 0,setid,number,variant,theme,subtheme,year,name,pieces,price,image_url,owned_by,wanted_by,num_reviews,play_star_rating,prod_desc,review_difficulty,set_name,star_rating,val_star_rating,totalRatingCount
0,29682,75267,1,Star Wars,The Mandalorian,2020,Mandalorian Battle Pack,102.0,14.99,https://images.brickset.com/sets/images/75267-...,2794,3080,0.0,0.0,Not Available,Not Available,Not Available,0.0,0.0,1304
1,29491,76895,1,Speed Champions,Not Available,2020,Ferrari F8 Tributo,275.0,19.99,https://images.brickset.com/sets/images/76895-...,857,2055,0.0,0.0,Not Available,Not Available,Not Available,0.0,0.0,1304
2,29669,80105,1,Seasonal,Chinese New Year,2020,Chinese New Year Temple Fair,1664.0,119.99,https://images.brickset.com/sets/images/80105-...,983,1830,0.0,0.0,Not Available,Not Available,Not Available,0.0,0.0,1304
3,29396,76140,1,Marvel Super Heroes,Avengers,2020,Iron Man Mech,148.0,9.99,https://images.brickset.com/sets/images/76140-...,518,930,0.0,0.0,Not Available,Not Available,Not Available,0.0,0.0,1304
4,29397,76142,1,Marvel Super Heroes,Avengers,2020,Avengers Speeder Bike Attack,226.0,19.99,https://images.brickset.com/sets/images/76142-...,219,914,0.0,0.0,Not Available,Not Available,Not Available,0.0,0.0,1304


# Building the recommendation engine

## 1. k-Nearest Neighbors (kNN)

In [None]:
# Prepare dependencies
