# Sorting Product Using Python

In this section, we will be sorting the product with python.

We tried three different methods:

  * Weighted Sorting Score
  * Bayesian Average Rating
  * Hybrid Sorting: Weighted Sorting Score + Bayesian Average Rating

In [1]:
import numpy as np
import pandas as pd
import scipy.stats as st
from sklearn.preprocessing import MinMaxScaler
pd.set_option("display.max_columns",None)
pd.set_option("display.max_rows",None)
pd.set_option("display.width",500)
pd.set_option("display.expand_frame_repr",False)
pd.set_option("display.float_format",lambda x: '%.5f' % x)

df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/datasets/product_sorting.csv")
df.head()

Unnamed: 0,course_name,instructor_name,purchase_count,rating,commment_count,5_point,4_point,3_point,2_point,1_point
0,(50+ Saat) Python A-Z™: Veri Bilimi ve Machine...,Veri Bilimi Okulu,17380,4.8,4621,3466,924,185,46,6
1,Python: Yapay Zeka ve Veri Bilimi için Python ...,Veri Bilimi Okulu,48291,4.6,4488,2962,1122,314,45,45
2,5 Saatte Veri Bilimci Olun (Valla Billa),Instructor_1,18693,4.4,2362,1582,567,165,24,24
3,R ile Veri Bilimi ve Machine Learning (35 Saat),Veri Bilimi Okulu,6626,4.6,1027,688,257,51,10,21
4,(2020) Python ile Makine Öğrenmesi (Machine Le...,Veri Bilimi Okulu,11314,4.6,969,717,194,38,10,10


# Weighted Sorting Score

In [9]:
def weighted_sorting_score(dataframe,w1=32,w2=26,w3=42):
  dataframe["purchased_count_scaled"] = MinMaxScaler(feature_range=(1,5)).fit(dataframe[["purchase_count"]]).transform(dataframe[["purchase_count"]])
  dataframe["comment_count_scaled"] = MinMaxScaler(feature_range=(1,5)).fit(dataframe[["commment_count"]]).transform(dataframe[["commment_count"]])
  return dataframe["comment_count_scaled"] * w1/100 + dataframe["purchased_count_scaled"] * w2/100 + dataframe["rating"] * w3/100

In [11]:
df["weighted_sorting_score"] = weighted_sorting_score(df)

In [12]:
df.head()

Unnamed: 0,course_name,instructor_name,purchase_count,rating,commment_count,5_point,4_point,3_point,2_point,1_point,purchased_count_scaled,comment_count_scaled,weighted_sorting_score
0,(50+ Saat) Python A-Z™: Veri Bilimi ve Machine...,Veri Bilimi Okulu,17380,4.8,4621,3466,924,185,46,6,2.43801,5.0,4.24988
1,Python: Yapay Zeka ve Veri Bilimi için Python ...,Veri Bilimi Okulu,48291,4.6,4488,2962,1122,314,45,45,5.0,4.8847,4.7951
2,5 Saatte Veri Bilimci Olun (Valla Billa),Instructor_1,18693,4.4,2362,1582,567,165,24,24,2.54684,3.04161,3.48349
3,R ile Veri Bilimi ve Machine Learning (35 Saat),Veri Bilimi Okulu,6626,4.6,1027,688,257,51,10,21,1.54669,1.88427,2.93711
4,(2020) Python ile Makine Öğrenmesi (Machine Le...,Veri Bilimi Okulu,11314,4.6,969,717,194,38,10,10,1.93525,1.83398,3.02204


In [13]:
df[df["course_name"].str.contains("Veri Bilimi")].sort_values("weighted_sorting_score",ascending=False).head()

Unnamed: 0,course_name,instructor_name,purchase_count,rating,commment_count,5_point,4_point,3_point,2_point,1_point,purchased_count_scaled,comment_count_scaled,weighted_sorting_score
1,Python: Yapay Zeka ve Veri Bilimi için Python ...,Veri Bilimi Okulu,48291,4.6,4488,2962,1122,314,45,45,5.0,4.8847,4.7951
0,(50+ Saat) Python A-Z™: Veri Bilimi ve Machine...,Veri Bilimi Okulu,17380,4.8,4621,3466,924,185,46,6,2.43801,5.0,4.24988
3,R ile Veri Bilimi ve Machine Learning (35 Saat),Veri Bilimi Okulu,6626,4.6,1027,688,257,51,10,21,1.54669,1.88427,2.93711
7,Veri Bilimi için İstatistik: Python ile İstati...,Veri Bilimi Okulu,929,4.5,126,88,26,9,0,3,1.07451,1.10316,2.52239


# Bayesian Average Rating

Sorting Products with 5 Star Rated

Sorting Products According to Distribution of 5 Star Rating

In [16]:
import math
def bayesian_average_rating(n, confidence=0.95):
    if sum(n) == 0:
        return 0
    K = len(n)
    z = st.norm.ppf(1 - (1 - confidence) / 2)
    N = sum(n)
    first_part = 0.0
    second_part = 0.0
    for k, n_k in enumerate(n):
        first_part += (k + 1) * (n[k] + 1) / (N + K)
        second_part += (k + 1) * (k + 1) * (n[k] + 1) / (N + K)
    score = first_part - z * math.sqrt((second_part - first_part * first_part) / (N + K + 1))
    return score

In [17]:
df["bar_score"] = df.apply(lambda x: bayesian_average_rating(x[["1_point",
                                                                "2_point",
                                                                "3_point",
                                                                "4_point",
                                                                "5_point"]]), axis=1)

In [19]:
df.sort_values("bar_score",ascending=False).head()

Unnamed: 0,course_name,instructor_name,purchase_count,rating,commment_count,5_point,4_point,3_point,2_point,1_point,purchased_count_scaled,comment_count_scaled,weighted_sorting_score,bar_score
19,Alıştırmalarla SQL Öğreniyorum,Veri Bilimi Okulu,3155,4.8,235,200,31,4,0,0,1.25901,1.19766,2.72659,4.72913
0,(50+ Saat) Python A-Z™: Veri Bilimi ve Machine...,Veri Bilimi Okulu,17380,4.8,4621,3466,924,185,46,6,2.43801,5.0,4.24988,4.66586
10,İleri Düzey Excel|Dashboard|Excel İp Uçları,Veri Bilimi Okulu,9554,4.8,2266,1654,499,91,22,0,1.78937,2.95839,3.42792,4.64168
5,Course_1,Instructor_2,4601,4.8,213,164,45,4,0,0,1.37886,1.17859,2.75165,4.63448
4,(2020) Python ile Makine Öğrenmesi (Machine Le...,Veri Bilimi Okulu,11314,4.6,969,717,194,38,10,10,1.93525,1.83398,3.02204,4.59567


# Hybrid Sorting: Weighted Sorting Score + Bayesian Average Rating

In [20]:
def hybrid_sorting_score(dataframe, bar_w=60, wss_w=40):
  bar_score = dataframe.apply(lambda x: bayesian_average_rating(x[["1_point",
                                                                "2_point",
                                                                "3_point",
                                                                "4_point",
                                                                "5_point"]]), axis=1)
  wss_score = weighted_sorting_score(dataframe)
  return bar_score*bar_w/100 + wss_score*wss_w/100

In [21]:
df["hybrid_sorting_score"] = hybrid_sorting_score(df)

In [23]:
df.sort_values("hybrid_sorting_score",ascending=False).head()

Unnamed: 0,course_name,instructor_name,purchase_count,rating,commment_count,5_point,4_point,3_point,2_point,1_point,purchased_count_scaled,comment_count_scaled,weighted_sorting_score,bar_score,hybrid_sorting_score
1,Python: Yapay Zeka ve Veri Bilimi için Python ...,Veri Bilimi Okulu,48291,4.6,4488,2962,1122,314,45,45,5.0,4.8847,4.7951,4.51604,4.62766
0,(50+ Saat) Python A-Z™: Veri Bilimi ve Machine...,Veri Bilimi Okulu,17380,4.8,4621,3466,924,185,46,6,2.43801,5.0,4.24988,4.66586,4.49947
20,Course_9,Instructor_3,12946,4.5,3371,2191,877,203,33,67,2.07051,3.91634,3.68156,4.48063,4.161
10,İleri Düzey Excel|Dashboard|Excel İp Uçları,Veri Bilimi Okulu,9554,4.8,2266,1654,499,91,22,0,1.78937,2.95839,3.42792,4.64168,4.15618
14,Uçtan Uca SQL Server Eğitimi,Veri Bilimi Okulu,12893,4.7,2425,1722,510,145,24,24,2.06612,3.09623,3.50198,4.56816,4.14169
