In [2]:
import pandas as pd
import numpy as np

df_raw = pd.read_csv("../data/netflix_tv_clean.csv")
df = df_raw.copy()

df.shape

(2020, 19)

In [3]:
df[["popularity", "vote_average", "vote_count"]].describe()

Unnamed: 0,popularity,vote_average,vote_count
count,2020.0,2020.0,2020.0
mean,5.821791,6.360776,228.4
std,32.098094,2.191729,1035.580796
min,0.0143,0.0,0.0
25%,1.328275,6.1,8.0
50%,2.79805,6.9495,30.0
75%,5.2424,7.56475,111.0
max,1393.7498,10.0,20079.0


A “hit” is a title in the top 10% of popularity with at least median vote_count, to avoid noisy spikes.

In [4]:
pop_threshold = df["popularity"].quantile(0.90)
vc_threshold = df["vote_count"].median()

pop_threshold, vc_threshold

(np.float64(10.992040000000003), np.float64(30.0))

In [None]:
df["hit"] = (
    (df["popularity"] >= pop_threshold) &
    (df["vote_count"] >= vc_threshold)
).astype(int)

df["hit"].value_counts()

hit
0    1837
1     183
Name: count, dtype: int64

In [9]:
print("==========HIT==========")
display(df[df["hit"] == 1][["name", "popularity", "vote_count"]].head())
print("========NOT HIT========")
display(df[df["hit"] == 0][["name", "popularity", "vote_count"]].head())



Unnamed: 0,name,popularity,vote_count
0,Stranger Things,1393.7498,20079
1,Cashero,127.2497,42
2,Raw,120.4042,347
3,Lucifer,96.9141,15239
4,The Witcher,88.908,6470




Unnamed: 0,name,popularity,vote_count
5,Land of Sin,74.8078,24
10,CID,59.1553,27
14,Run Away,54.6964,24
36,Time Flies,30.5668,2
57,Pen Tor,24.3957,10


In [10]:
df.to_csv("../data/netflix_tv_with_hit.csv", index=False)

In [11]:
df.loc[df["hit"] == 1].head()

Unnamed: 0,adult,backdrop_path,genre_ids,id,origin_country,original_language,original_name,overview,popularity,poster_path,first_air_date,name,vote_average,vote_count,genres,year,month,day,primary_country,hit
0,False,/8zbAoryWbtH0DKdev8abFAjdufy.jpg,"[10765, 9648, 10759]",66732,['US'],en,Stranger Things,"When a young boy vanishes, a small town uncove...",1393.7498,/uOOtwVbSr4QDjAGIifLDwpb2Pdl.jpg,2016-07-15,Stranger Things,8.588,20079,"['Sci-Fi & Fantasy', 'Mystery', 'Action & Adve...",2016.0,7.0,15.0,US,1
1,False,/lr0h0OBj7QOEvkaX5g3JWoaKZsI.jpg,"[10759, 18]",238458,['KR'],ko,캐셔로,An ordinary man who inherits super strength di...,127.2497,/dm6RoaKgkGUxckamMDzsbqtLhFv.jpg,2025-12-26,Cashero,7.274,42,"['Action & Adventure', 'Drama']",2025.0,12.0,26.0,KR,1
2,False,/n69QxkZ0RRc6PMNnxD9U0MZnLzl.jpg,[10764],4656,['US'],en,Raw,"A regularly scheduled, live, year-round progra...",120.4042,/pv5WNnLUo7mpT8k901Lo8UovrqI.jpg,1993-01-11,Raw,6.787,347,['Reality'],1993.0,1.0,11.0,US,1
3,False,/ncftkNAjIz2PBbUMY7T0CHVJP8d.jpg,"[80, 10765]",63174,['US'],en,Lucifer,"Bored and unhappy as the Lord of Hell, Lucifer...",96.9141,/ekZobS8isE6mA53RAiGDG93hBxL.jpg,2016-01-25,Lucifer,8.437,15239,"['Crime', 'Sci-Fi & Fantasy']",2016.0,1.0,25.0,US,1
4,False,/foGkPxpw9h8zln81j63mix5B7m8.jpg,"[18, 10759]",71912,['US'],en,The Witcher,"Geralt of Rivia, a mutated monster-hunter for ...",88.908,/AoGsDM02UVt0npBA8OvpDcZbaMi.jpg,2019-12-20,The Witcher,7.958,6470,"['Drama', 'Action & Adventure']",2019.0,12.0,20.0,US,1
