# Library

In [114]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import ast

# Import Data

In [115]:
df_data = pd.read_csv('../data/data-raw/netflix_titles.csv', delimiter=';', encoding='latin1')
df_data

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...
...,...,...,...,...,...,...,...,...,...,...,...,...
8804,s8803,Movie,Zodiac,David Fincher,"Mark Ruffalo, Jake Gyllenhaal, Robert Downey J...",United States,"November 20, 2019",2007,R,158,"Cult Movies, Dramas, Thrillers","A political cartoonist, a crime reporter and a..."
8805,s8804,TV Show,Zombie Dumb,,,,"July 1, 2019",2018,TV-Y7,2,"Kids' TV, Korean TV Shows, TV Comedies","While living alone in a spooky town, a young g..."
8806,s8805,Movie,Zombieland,Ruben Fleischer,"Jesse Eisenberg, Woody Harrelson, Emma Stone, ...",United States,"November 1, 2019",2009,R,88,"Comedies, Horror Movies",Looking to survive in a world taken over by zo...
8807,s8806,Movie,Zoom,Peter Hewitt,"Tim Allen, Courteney Cox, Chevy Chase, Kate Ma...",United States,"January 11, 2020",2006,PG,88,"Children & Family Movies, Comedies","Dragged from civilian life, a former superhero..."


# Simple EDA

In [116]:
df_data.dtypes

show_id         object
type            object
title           object
director        object
cast            object
country         object
date_added      object
release_year    object
rating          object
duration        object
listed_in       object
description     object
dtype: object

In [117]:
df_data.shape

(8809, 12)

In [118]:
df_data.isnull().sum()

show_id            0
type               1
title              2
director        2636
cast             826
country          833
date_added        12
release_year       2
rating             6
duration           5
listed_in          3
description        3
dtype: int64

In [119]:
df_data.duplicated().sum()

np.int64(0)

# Data Manipulation

In [120]:
df_data = df_data.drop(columns=['show_id', 'date_added', 'rating'])

print(df_data.shape)
df_data.columns

(8809, 9)


Index(['type', 'title', 'director', 'cast', 'country', 'release_year',
       'duration', 'listed_in', 'description'],
      dtype='object')

In [121]:
df_data[['director', 'cast', 'country']] = df_data[['director', 'cast', 'country']].fillna('Unknow')

df_data.isnull().sum()

type            1
title           2
director        0
cast            0
country         0
release_year    2
duration        5
listed_in       3
description     3
dtype: int64

In [122]:
df_data.dropna(inplace=True)

df_data.isnull().sum()

type            0
title           0
director        0
cast            0
country         0
release_year    0
duration        0
listed_in       0
description     0
dtype: int64

In [124]:
# Ubah release_year menjadi integer
df_data['release_year'] = df_data['release_year'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
df_data['release_year'] = df_data['release_year'].astype(int)

# Ubah duration menjadi menit (integer)
df_data['duration'] = df_data['duration'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
df_data['duration'] = df_data['duration'].astype(int)

# Ubah listed_in menjadi list
df_data['listed_in'] = df_data['listed_in'].apply(lambda x: [i.strip() for i in x.split(',')] if isinstance(x, str) else [])


df_data.dtypes

type            object
title           object
director        object
cast            object
country         object
release_year     int64
duration         int64
listed_in       object
description     object
dtype: object

# Final Preview

In [126]:
df_data

Unnamed: 0,type,title,director,cast,country,release_year,duration,listed_in,description
0,Movie,Dick Johnson Is Dead,Kirsten Johnson,Unknow,United States,2020,90,[Documentaries],"As her father nears the end of his life, filmm..."
1,TV Show,Blood & Water,Unknow,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,2021,2,"[International TV Shows, TV Dramas, TV Mysteries]","After crossing paths at a party, a Cape Town t..."
2,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",Unknow,2021,1,"[Crime TV Shows, International TV Shows, TV Ac...",To protect his family from a powerful drug lor...
3,TV Show,Jailbirds New Orleans,Unknow,Unknow,Unknow,2021,1,"[Docuseries, Reality TV]","Feuds, flirtations and toilet talk go down amo..."
4,TV Show,Kota Factory,Unknow,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,2021,2,"[International TV Shows, Romantic TV Shows, TV...",In a city of coaching centers known to train I...
...,...,...,...,...,...,...,...,...,...
8804,Movie,Zodiac,David Fincher,"Mark Ruffalo, Jake Gyllenhaal, Robert Downey J...",United States,2007,158,"[Cult Movies, Dramas, Thrillers]","A political cartoonist, a crime reporter and a..."
8805,TV Show,Zombie Dumb,Unknow,Unknow,Unknow,2018,2,"[Kids' TV, Korean TV Shows, TV Comedies]","While living alone in a spooky town, a young g..."
8806,Movie,Zombieland,Ruben Fleischer,"Jesse Eisenberg, Woody Harrelson, Emma Stone, ...",United States,2009,88,"[Comedies, Horror Movies]",Looking to survive in a world taken over by zo...
8807,Movie,Zoom,Peter Hewitt,"Tim Allen, Courteney Cox, Chevy Chase, Kate Ma...",United States,2006,88,"[Children & Family Movies, Comedies]","Dragged from civilian life, a former superhero..."


# Export

In [127]:
df_data.to_excel('../data/data-processed/spotify_playlist_2010_2023.xlsx', index=False)