In [15]:
# IMDB Film Puanlama ve Sıralama ( IMDB Movie Scoring and Sort)
import pandas as pd
import math
import scipy.stats as st
from sklearn.preprocessing import MinMaxScaler
df = pd.read_csv("movies_metadata.csv",low_memory=False)

In [16]:
df = df[["title","vote_average","vote_count"]]

In [9]:
# Vote Average'e göre sıralama
df.sort_values("vote_average",ascending=False).head(20)

Unnamed: 0,title,vote_average,vote_count
21642,Ice Age Columbus: Who Were the First Americans?,10.0,1.0
15710,If God Is Willing and da Creek Don't Rise,10.0,1.0
22396,Meat the Truth,10.0,1.0
22395,Marvin Hamlisch: What He Did For Love,10.0,1.0
35343,Elaine Stritch: At Liberty,10.0,1.0
186,Reckless,10.0,1.0
45047,The Human Surge,10.0,1.0
22377,The Guide,10.0,1.0
22346,هیچ کجا هیچ کس,10.0,1.0
1634,Other Voices Other Rooms,10.0,1.0


In [17]:
# Yorum sayısını böldük ve ortlama vs baktık.
df["vote_count"].describe([0.10,0.25,0.50,0.70,0.80,0.90,0.95,0.99]).T

count    45460.000000
mean       109.897338
std        491.310374
min          0.000000
10%          1.000000
25%          3.000000
50%         10.000000
70%         25.000000
80%         50.000000
90%        160.000000
95%        434.000000
99%       2183.820000
max      14075.000000
Name: vote_count, dtype: float64

In [11]:
# Yorum sayısını belirli bir sayı üzerinde olanları listeledik
df[df["vote_count"]>400].sort_values("vote_average",ascending=False).head(20)

Unnamed: 0,title,vote_average,vote_count
10309,Dilwale Dulhania Le Jayenge,9.1,661.0
40251,Your Name.,8.5,1030.0
834,The Godfather,8.5,6024.0
314,The Shawshank Redemption,8.5,8358.0
1152,One Flew Over the Cuckoo's Nest,8.3,3001.0
1176,Psycho,8.3,2405.0
1178,The Godfather: Part II,8.3,3418.0
292,Pulp Fiction,8.3,8670.0
1184,Once Upon a Time in America,8.3,1104.0
5481,Spirited Away,8.3,3968.0


In [18]:
# Yorumları 1 ile 10 arasında değerlere çevirdik.
df["vote_count_score"] = MinMaxScaler(feature_range=(1,10)). \
    fit(df[["vote_count"]]). \
    transform(df[["vote_count"]])

In [19]:
# 1 ile 10 arasına çevirdiğimiz değerleri yıldız sayısı ile çarptık.
df["average_count_score"] = df["vote_average"] * df["vote_count_score"]

In [21]:
# Çıkan sonuca göre sıraladık.
df.sort_values("average_count_score",ascending=False).head(20)

Unnamed: 0,title,vote_average,vote_count,vote_count_score,average_count_score
15480,Inception,8.1,14075.0,10.0,81.0
12481,The Dark Knight,8.3,12269.0,8.845187,73.415048
22879,Interstellar,8.1,11187.0,8.153321,66.041904
17818,The Avengers,7.4,12000.0,8.673179,64.181528
14551,Avatar,7.2,12114.0,8.746075,62.971737
26564,Deadpool,7.4,11444.0,8.317655,61.55065
2843,Fight Club,8.3,9678.0,7.188419,59.663879
20051,Django Unchained,7.8,10297.0,7.584227,59.156973
23753,Guardians of the Galaxy,7.9,10014.0,7.403268,58.485819
292,Pulp Fiction,8.3,8670.0,6.543872,54.314139


In [22]:
# Konu : 2
# IMDB Ağırlıklı Derecelendirme ( IMDB Weighted Rating )
# IMDB'nin kendi förmülü

In [24]:
# weighted_rating = (v/(v+M) * r) + (M/(v+M) * C)
# r = rate average 
# v = vote count # Filmin aldığı oy sayısı
# M = minimum votes required to be listed in the Top 250 # Gereken oy sayısı
# C = the mean vote across the whole report (currently 7.0) # Kitle ortalaması

In [25]:
M = 2500
C = df["vote_average"].mean()

In [26]:
def weight_rating(r,v,M,C):
    return (v/(v+M) * r) + (M/(v+M) * C)

In [27]:
# Formülü kullandık yeni bir değişkene attık.
df["weighted_rating"] = weight_rating(df["vote_average"],df["vote_count"],M,C)

In [30]:
df.sort_values("weighted_rating",ascending=False).head(10)

Unnamed: 0,title,vote_average,vote_count,vote_count_score,average_count_score,weighted_rating
12481,The Dark Knight,8.3,12269.0,8.845187,73.415048,7.846044
314,The Shawshank Redemption,8.5,8358.0,6.344369,53.92714,7.836482
2843,Fight Club,8.3,9678.0,7.188419,59.663879,7.74946
15480,Inception,8.1,14075.0,10.0,81.0,7.725672
292,Pulp Fiction,8.3,8670.0,6.543872,54.314139,7.699778
834,The Godfather,8.5,6024.0,4.851936,41.241456,7.6548
22879,Interstellar,8.1,11187.0,8.153321,66.041904,7.646688
351,Forrest Gump,8.2,8147.0,6.209449,50.917485,7.593775
7000,The Lord of the Rings: The Return of the King,8.1,8226.0,6.259964,50.705712,7.521547
4863,The Lord of the Rings: The Fellowship of the Ring,8.0,8892.0,6.685826,53.486607,7.47731


In [None]:
# Konu : 3 
# Bayes Ortalama Derecelendirme Puanı ( BAR Score )
# 2015 sonrası kullandığı sıralama yöntemi

In [31]:
def bayesian_average_rating(n,confidence=0.95):
    if sum(n) == 0:
        return 0
    K = len(n)
    z = st.norm.ppf(1-(1-confidence) / 2)
    N = sum(n)
    first_part = 0.0
    second_part = 0.0
    for k, n_k in enumerate(n):
        first_part += (k+1) * (n[k] + 1) / (N + K)
        second_part += (k+1) * (k+1) * (n[k] + 1) / (N + K)
    
    score = first_part - z * math.sqrt((second_part-first_part * first_part) / (N+K+1))
    return score

In [33]:
# Puan dağılımı ayrı bir dosyadan
df = pd.read_csv("imdb_ratings.csv")
df = df.iloc[0:,1:]

In [37]:
# Aldığı yıldız dağılımlarına göre sıralama yaptık.
df["bar_score"] = df.apply(lambda x: bayesian_average_rating(x[["one","two","three","four","five","six","seven","eight","nine","ten"]]),axis=1)

In [38]:
df.sort_values("bar_score",ascending=False).head(10)

Unnamed: 0,id,movieName,rating,ten,nine,eight,seven,six,five,four,three,two,one,bar_score
0,111161,1. The Shawshank Redemption (1994),9.2,1295382,600284,273091,87368,26184,13515,6561,4704,4355,34733,9.145389
1,68646,2. The Godfather (1972),9.1,837932,402527,199440,78541,30016,16603,8419,6268,5879,37128,8.940016
3,468569,4. The Dark Knight (2008),9.0,1034863,649123,354610,137748,49483,23237,11429,8082,7173,30345,8.895962
2,71562,3. The Godfather: Part II (1974),9.0,486356,324905,175507,70847,26349,12657,6210,4347,3892,20469,8.812499
4,50083,5. 12 Angry Men (1957),8.9,246765,225437,133998,48341,15773,6278,2866,1723,1478,8318,8.767934
6,167260,7. The Lord of the Rings: The Return of ...,8.9,703093,433087,270113,117411,44760,21818,10873,7987,6554,28990,8.752038
5,108052,6. Schindler's List (1993),8.9,453906,383584,220586,82367,27219,12922,6234,4572,4289,19328,8.743609
11,109830,12. Forrest Gump (1994),8.8,622104,553654,373644,151284,51140,22720,11692,7647,5941,12110,8.699152
12,1375666,13. Inception (2010),8.7,724798,627987,408686,174229,60668,26910,13436,8703,6932,17621,8.693148
10,137523,11. Fight Club (1999),8.8,637087,572654,371752,152295,53059,24755,12648,8606,6948,17435,8.674475
