In [None]:
# SORTING REVIEWS

In [1]:
import pandas as pd
import math
import scipy.stats as st

pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.float_format', lambda x: '%.5f' % x)

In [None]:
# Up-Down Diff Score = (up ratings) − (down ratings)

In [None]:
# Review 1: 600 up 400 down total 1000
# Review 2: 5500 up 4500 down total 10000

In [3]:
def score_up_down_diff(up,down):
return up - down 

# Review 1 Score:
score_up_down_diff(600, 400)

# Review 2 Score
score_up_down_diff(5500, 4500)

1000

In [None]:
# Score = Average rating = (up ratings) / (all ratings)

In [4]:
def score_average_rating(up, down):
    if up + down == 0:
        return 0
    return up / (up + down)

score_average_rating(600, 400)
score_average_rating(5500, 4500)

0.55

In [5]:
# Review 1: 2 up 0 down total 2
# Review 2: 100 up 1 down total 101

score_average_rating(2, 0)
score_average_rating(100, 1)

0.9900990099009901

In [None]:
# Wilson Lower Bound Score

    """
    Wilson Lower Bound Score hesapla

    - Bernoulli parametresi p için hesaplanacak güven aralığının alt sınırı WLB skoru olarak kabul edilir.
    - Hesaplanacak skor ürün sıralaması için kullanılır.
    - Not:
    Eğer skorlar 1-5 arasıdaysa 1-3 negatif, 4-5 pozitif olarak işaretlenir ve bernoulli'ye uygun hale getirilebilir.
    Bu beraberinde bazı problemleri de getirir. Bu sebeple bayesian average rating yapmak gerekir.

    Parameters
    ----------
    up: int
        up count
    down: int
        down count
    confidence: float
        confidence

    Returns
    -------
    wilson score: float

In [9]:
# 600-400
# 0.6
# 0.5 0.7
# 0.5

def wilson_lower_bound(up, down, confidence=0.95):
    n = up + down
    if n == 0:
        return 0
    z = st.norm.ppf(1 - (1 - confidence) / 2)
    phat = 1.0 * up / n
    return (phat + z * z / (2 * n) - z * math.sqrt((phat * (1 - phat) + z * z / (4 * n)) / n)) / (1 + z * z / n)


wilson_lower_bound(600, 400)
wilson_lower_bound(5500, 4500)

wilson_lower_bound(2, 0)
wilson_lower_bound(100, 1)


0.9460328420055449

In [None]:
# Case Study

In [10]:
up = [15, 70, 14, 4, 2, 5, 8, 37, 21, 52, 28, 147, 61, 30, 23, 40, 37, 61, 54, 18, 12, 68]
down = [0, 2, 2, 2, 15, 2, 6, 5, 23, 8, 12, 2, 1, 1, 5, 1, 2, 6, 2, 0, 2, 2]

comments = pd.DataFrame({"up": up, "down": down})

In [15]:
# score_pos_neg_diff
comments["score_pos_neg_diff"] = comments.apply(lambda x: score_up_down_diff(x["up"],
                                            x["down"]), axis=1)
comments["score_pos_neg_diff"]

0     15.00000
1     68.00000
2     12.00000
3      2.00000
4    -13.00000
5      3.00000
6      2.00000
7     32.00000
8     -2.00000
9     44.00000
10    16.00000
11   145.00000
12    60.00000
13    29.00000
14    18.00000
15    39.00000
16    35.00000
17    55.00000
18    52.00000
19    18.00000
20    10.00000
21    66.00000
Name: score_pos_neg_diff, dtype: float64

In [13]:
# score_average_rating
comments["score_average_rating"] = comments.apply(lambda x: score_average_rating(x["up"], x["down"]), axis=1)

comments["score_average_rating"] 

0    1.00000
1    0.97222
2    0.87500
3    0.66667
4    0.11765
5    0.71429
6    0.57143
7    0.88095
8    0.47727
9    0.86667
10   0.70000
11   0.98658
12   0.98387
13   0.96774
14   0.82143
15   0.97561
16   0.94872
17   0.91045
18   0.96429
19   1.00000
20   0.85714
21   0.97143
Name: score_average_rating, dtype: float64

In [16]:
# wilson_lower_bound
comments["wilson_lower_bound"] = comments.apply(lambda x: wilson_lower_bound(x["up"], x["down"]), axis=1)

comments["wilson_lower_bound"]

0    0.79612
1    0.90426
2    0.63977
3    0.29999
4    0.03288
5    0.35893
6    0.32591
7    0.75000
8    0.33755
9    0.75835
10   0.54570
11   0.95238
12   0.91413
13   0.83806
14   0.64409
15   0.87405
16   0.83114
17   0.81807
18   0.87881
19   0.82412
20   0.60059
21   0.90168
Name: wilson_lower_bound, dtype: float64

In [17]:
comments.sort_values("wilson_lower_bound", ascending=False)

Unnamed: 0,up,down,score_pos_neg_diff,score_average_rating,wilson_lower_bound
11,147,2,145.0,0.98658,0.95238
12,61,1,60.0,0.98387,0.91413
1,70,2,68.0,0.97222,0.90426
21,68,2,66.0,0.97143,0.90168
18,54,2,52.0,0.96429,0.87881
15,40,1,39.0,0.97561,0.87405
13,30,1,29.0,0.96774,0.83806
16,37,2,35.0,0.94872,0.83114
19,18,0,18.0,1.0,0.82412
17,61,6,55.0,0.91045,0.81807
