In [21]:
# Karma Sıralama ( Hybrid Sorting )
# Önceki bölümlerdeki 2 hesaplama fonksiyonunu birleştirip karma bir puan değerlendirme ve sonuç çıkardık.
import pandas as pd
import math
import scipy.stats as st
from sklearn.preprocessing import MinMaxScaler
df = pd.read_csv("product_sorting.csv")

In [22]:
def bayesian_average_rating(n,confidence=0.95):
    if sum(n) == 0:
        return 0
    K = len(n)
    z = st.norm.ppf(1-(1-confidence) / 2)
    N = sum(n)
    first_part = 0.0
    second_part = 0.0
    for k, n_k in enumerate(n):
        first_part += (k+1) * (n[k] + 1) / (N + K)
        second_part += (k+1) * (k+1) * (n[k] + 1) / (N + K)
    
    score = first_part - z * math.sqrt((second_part-first_part * first_part) / (N+K+1))
    return score

In [23]:
df["purchase_count_scaled"] = MinMaxScaler(feature_range=(1,5)). \
fit(df[["purchase_count"]]).transform(df[["purchase_count"]])

df["comment_count_scaled"] = MinMaxScaler(feature_range=(1,5)). \
fit(df[["commment_count"]]).transform(df[["commment_count"]])

def weighted_sorting_score(df,w1=32,w2=26,w3=42):
    return (df["comment_count_scaled"] * w1 / 100 +
            df["purchase_count_scaled"] * w2 / 100 +
            df["rating"] * w3 / 100)

In [24]:
# wss 3 değişkenli sorgulama, bar ise bayesian
def hybrid_sorting_score(df,bar_w=60,wss_w=40):
    bar_score = df.apply(lambda x:bayesian_average_rating(x[[
                                                                "1_point",
                                                                "2_point",
                                                                "3_point",
                                                                "4_point",
                                                                "5_point"]]) ,axis=1)
    wss_score = weighted_sorting_score(df)
    
    return bar_score*bar_w/100 + wss_score*wss_w/100

In [25]:
df["hybrid_sorting_score"] = hybrid_sorting_score(df)

In [27]:
df.sort_values("hybrid_sorting_score",ascending=False).head(20)

Unnamed: 0,course_name,instructor_name,purchase_count,rating,commment_count,5_point,4_point,3_point,2_point,1_point,purchase_count_scaled,comment_count_scaled,hybrid_sorting_score
1,Python: Yapay Zeka ve Veri Bilimi için Python ...,Veri Bilimi Okulu,48291,4.6,4488,2962,1122,314,45,45,5.0,4.884699,4.627664
0,(50+ Saat) Python A-Z™: Veri Bilimi ve Machine...,Veri Bilimi Okulu,17380,4.8,4621,3466,924,185,46,6,2.438014,5.0,4.499468
20,Course_9,Instructor_3,12946,4.5,3371,2191,877,203,33,67,2.070512,3.916342,4.161001
10,İleri Düzey Excel|Dashboard|Excel İp Uçları,Veri Bilimi Okulu,9554,4.8,2266,1654,499,91,22,0,1.789374,2.958388,4.156176
14,Uçtan Uca SQL Server Eğitimi,Veri Bilimi Okulu,12893,4.7,2425,1722,510,145,24,24,2.06612,3.096229,4.141691
2,5 Saatte Veri Bilimci Olun (Valla Billa),Instructor_1,18693,4.4,2362,1582,567,165,24,24,2.546839,3.041612,4.102524
15,Uygulamalarla SQL Öğreniyorum,Veri Bilimi Okulu,11397,4.5,2353,1435,705,165,24,24,1.942127,3.03381,4.019195
4,(2020) Python ile Makine Öğrenmesi (Machine Le...,Veri Bilimi Okulu,11314,4.6,969,717,194,38,10,10,1.935248,1.833984,3.96622
19,Alıştırmalarla SQL Öğreniyorum,Veri Bilimi Okulu,3155,4.8,235,200,31,4,0,0,1.259008,1.197659,3.928114
5,Course_1,Instructor_2,4601,4.8,213,164,45,4,0,0,1.378857,1.178587,3.881346


In [28]:
df[df["course_name"].str.contains("Veri Bilimi")].sort_values("hybrid_sorting_score",ascending=False).head(20)

Unnamed: 0,course_name,instructor_name,purchase_count,rating,commment_count,5_point,4_point,3_point,2_point,1_point,purchase_count_scaled,comment_count_scaled,hybrid_sorting_score
1,Python: Yapay Zeka ve Veri Bilimi için Python ...,Veri Bilimi Okulu,48291,4.6,4488,2962,1122,314,45,45,5.0,4.884699,4.627664
0,(50+ Saat) Python A-Z™: Veri Bilimi ve Machine...,Veri Bilimi Okulu,17380,4.8,4621,3466,924,185,46,6,2.438014,5.0,4.499468
3,R ile Veri Bilimi ve Machine Learning (35 Saat),Veri Bilimi Okulu,6626,4.6,1027,688,257,51,10,21,1.546694,1.884265,3.86409
7,Veri Bilimi için İstatistik: Python ile İstati...,Veri Bilimi Okulu,929,4.5,126,88,26,9,0,3,1.074512,1.103164,3.614267
