In [46]:
import pandas as ps
from slugify import slugify

# Importar Dados de Títulos do IMDB

In [47]:
df_imdb_titles = ps.read_csv('./data/title.basics.tsv', 
                             sep='\t', 
                             na_values='\\N', 
                             encoding='utf8', 
                             dtype=str)
df_imdb_titles.head(10)

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
0,tt0000001,short,Carmencita,Carmencita,0,1894,,1,"Documentary,Short"
1,tt0000002,short,Le clown et ses chiens,Le clown et ses chiens,0,1892,,5,"Animation,Short"
2,tt0000003,short,Pauvre Pierrot,Pauvre Pierrot,0,1892,,5,"Animation,Comedy,Romance"
3,tt0000004,short,Un bon bock,Un bon bock,0,1892,,12,"Animation,Short"
4,tt0000005,short,Blacksmith Scene,Blacksmith Scene,0,1893,,1,"Comedy,Short"
5,tt0000006,short,Chinese Opium Den,Chinese Opium Den,0,1894,,1,Short
6,tt0000007,short,Corbett and Courtney Before the Kinetograph,Corbett and Courtney Before the Kinetograph,0,1894,,1,"Short,Sport"
7,tt0000008,short,Edison Kinetoscopic Record of a Sneeze,Edison Kinetoscopic Record of a Sneeze,0,1894,,1,"Documentary,Short"
8,tt0000009,movie,Miss Jerry,Miss Jerry,0,1894,,45,Romance
9,tt0000010,short,Leaving the Factory,La sortie de l'usine Lumière à Lyon,0,1895,,1,"Documentary,Short"


## Filtrar apenas filmes e séries

In [48]:
types = df_imdb_titles['titleType'].unique()
types
df_imdb_titles = df_imdb_titles[df_imdb_titles['titleType'].isin(['movie', 'tvSeries'])]

## Converter dados
- `startYear` de float para inteiro
- `endYear` de float para inteiro
- `isAdult` de float para booleano

In [49]:
df_imdb_titles['startYear'] = ps.to_numeric(df_imdb_titles['startYear'], errors='coerce', downcast='integer')
df_imdb_titles['endYear'] = ps.to_numeric(df_imdb_titles['endYear'], errors='coerce', downcast='integer')
df_imdb_titles['isAdult'] = df_imdb_titles['isAdult'].apply(lambda x: False if x == 0 else True)
df_imdb_titles['primaryTitle'] = df_imdb_titles['primaryTitle'].astype(str)

## Criar slug do título

In [50]:
df_imdb_titles.dtypes
df_imdb_titles['primaryTitleSlug'] = df_imdb_titles['primaryTitle'].fillna('').apply(lambda x: slugify(str(x)))

## Criar slug dos gêneros

In [51]:
df_imdb_titles['genres'] = df_imdb_titles['genres'].fillna('').str.split(',').apply(lambda x:[slugify(value) for value in x])

In [52]:
df_imdb_titles.head(10)

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres,primaryTitleSlug
8,tt0000009,movie,Miss Jerry,Miss Jerry,True,1894.0,,45.0,[romance],miss-jerry
144,tt0000147,movie,The Corbett-Fitzsimmons Fight,The Corbett-Fitzsimmons Fight,True,1897.0,,100.0,"[documentary, news, sport]",the-corbett-fitzsimmons-fight
498,tt0000502,movie,Bohemios,Bohemios,True,1905.0,,100.0,[],bohemios
570,tt0000574,movie,The Story of the Kelly Gang,The Story of the Kelly Gang,True,1906.0,,70.0,"[action, adventure, biography]",the-story-of-the-kelly-gang
587,tt0000591,movie,The Prodigal Son,L'enfant prodigue,True,1907.0,,90.0,[drama],the-prodigal-son
610,tt0000615,movie,Robbery Under Arms,Robbery Under Arms,True,1907.0,,,[drama],robbery-under-arms
625,tt0000630,movie,Hamlet,Amleto,True,1908.0,,,[drama],hamlet
668,tt0000675,movie,Don Quijote,Don Quijote,True,1908.0,,,[drama],don-quijote
672,tt0000679,movie,The Fairylogue and Radio-Plays,The Fairylogue and Radio-Plays,True,1908.0,,120.0,"[adventure, fantasy]",the-fairylogue-and-radio-plays
828,tt0000838,movie,A Cultura do Cacau,A Cultura do Cacau,True,1909.0,,,[],a-cultura-do-cacau


## Normalizar tipo

In [53]:
df_imdb_titles['titleType'] = df_imdb_titles['titleType'].fillna('').apply(lambda x: slugify(str(x)))
df_imdb_titles.head(10)

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres,primaryTitleSlug
8,tt0000009,movie,Miss Jerry,Miss Jerry,True,1894.0,,45.0,[romance],miss-jerry
144,tt0000147,movie,The Corbett-Fitzsimmons Fight,The Corbett-Fitzsimmons Fight,True,1897.0,,100.0,"[documentary, news, sport]",the-corbett-fitzsimmons-fight
498,tt0000502,movie,Bohemios,Bohemios,True,1905.0,,100.0,[],bohemios
570,tt0000574,movie,The Story of the Kelly Gang,The Story of the Kelly Gang,True,1906.0,,70.0,"[action, adventure, biography]",the-story-of-the-kelly-gang
587,tt0000591,movie,The Prodigal Son,L'enfant prodigue,True,1907.0,,90.0,[drama],the-prodigal-son
610,tt0000615,movie,Robbery Under Arms,Robbery Under Arms,True,1907.0,,,[drama],robbery-under-arms
625,tt0000630,movie,Hamlet,Amleto,True,1908.0,,,[drama],hamlet
668,tt0000675,movie,Don Quijote,Don Quijote,True,1908.0,,,[drama],don-quijote
672,tt0000679,movie,The Fairylogue and Radio-Plays,The Fairylogue and Radio-Plays,True,1908.0,,120.0,"[adventure, fantasy]",the-fairylogue-and-radio-plays
828,tt0000838,movie,A Cultura do Cacau,A Cultura do Cacau,True,1909.0,,,[],a-cultura-do-cacau


## Renomear colunas

In [54]:
df_imdb_titles.rename(
    {'tconst': 'imdb_id', 'titleType': 'type', 'primaryTitle': 'primary_title', 'originalTitle': 'original_title', 'isAdult': 'is_adult', 'startYear': 'start_year', 'endYear': 'end_year', 'runtimeMinutes': 'runtime_minutes', 'primaryTitleSlug': 'title_slug'},
    axis=1,
    inplace=True)
df_imdb_titles.head(10)

Unnamed: 0,imdb_id,type,primary_title,original_title,is_adult,start_year,end_year,runtime_minutes,genres,title_slug
8,tt0000009,movie,Miss Jerry,Miss Jerry,True,1894.0,,45.0,[romance],miss-jerry
144,tt0000147,movie,The Corbett-Fitzsimmons Fight,The Corbett-Fitzsimmons Fight,True,1897.0,,100.0,"[documentary, news, sport]",the-corbett-fitzsimmons-fight
498,tt0000502,movie,Bohemios,Bohemios,True,1905.0,,100.0,[],bohemios
570,tt0000574,movie,The Story of the Kelly Gang,The Story of the Kelly Gang,True,1906.0,,70.0,"[action, adventure, biography]",the-story-of-the-kelly-gang
587,tt0000591,movie,The Prodigal Son,L'enfant prodigue,True,1907.0,,90.0,[drama],the-prodigal-son
610,tt0000615,movie,Robbery Under Arms,Robbery Under Arms,True,1907.0,,,[drama],robbery-under-arms
625,tt0000630,movie,Hamlet,Amleto,True,1908.0,,,[drama],hamlet
668,tt0000675,movie,Don Quijote,Don Quijote,True,1908.0,,,[drama],don-quijote
672,tt0000679,movie,The Fairylogue and Radio-Plays,The Fairylogue and Radio-Plays,True,1908.0,,120.0,"[adventure, fantasy]",the-fairylogue-and-radio-plays
828,tt0000838,movie,A Cultura do Cacau,A Cultura do Cacau,True,1909.0,,,[],a-cultura-do-cacau


## Dicionário do DataFrame

|Coluna|Tipo|Descrição|
|--|--|--|
| id | string | alphanumeric unique identifier of the title. |
| title_type | string | the type/format of the title (e.g. movie, short,tvseries, tvepisode, video, etc). |
| primary_title | string | the more popular title / the title used by the filmmakers on promotional materials at the point of release. |
| original_title | string | original title, in the original language. |
| is_adult | boolean | 0: non-adult title; 1: adult title. |
| start_year | int | represents the release year of a title. In the case of TV Series, it is the series start year. |
| end_year | int | TV Series end year. for all other title types. |
| runtime_minutes | – | primary runtime of the title, in minutes. |
| genres | string array | includes up to three genres associated with the title. |
| title_slug | string | title slug


# Importar Dados de Notas do IMDB

In [55]:
df_imdb_title_ratings = ps.read_csv('./data/title.ratings.tsv', 
                             sep='\t', 
                             na_values='\\N', 
                             encoding='utf8', 
                             dtype=str)
df_imdb_title_ratings.head(10)

Unnamed: 0,tconst,averageRating,numVotes
0,tt0000001,5.7,2045
1,tt0000002,5.7,273
2,tt0000003,6.5,2003
3,tt0000004,5.4,178
4,tt0000005,6.2,2763
5,tt0000006,5.0,183
6,tt0000007,5.4,855
7,tt0000008,5.4,2189
8,tt0000009,5.3,210
9,tt0000010,6.8,7542


## Converter Dados
- `averageRating` para float
- `numVotes` para int

In [56]:
df_imdb_title_ratings['tconst'] = df_imdb_title_ratings['tconst'].astype(str)
df_imdb_title_ratings['averageRating'] = df_imdb_title_ratings['averageRating'].astype(float)
df_imdb_title_ratings['numVotes'] = df_imdb_title_ratings['numVotes'].astype(int)
df_imdb_title_ratings.dtypes

tconst            object
averageRating    float64
numVotes           int64
dtype: object

## Renomar colunas

In [57]:
df_imdb_title_ratings.rename(
    {'tconst': 'imdb_id', 'averageRating': 'average_rating', 'numVotes': 'num_votes'},
    axis=1,
    inplace=True)
df_imdb_title_ratings.head(10)

Unnamed: 0,imdb_id,average_rating,num_votes
0,tt0000001,5.7,2045
1,tt0000002,5.7,273
2,tt0000003,6.5,2003
3,tt0000004,5.4,178
4,tt0000005,6.2,2763
5,tt0000006,5.0,183
6,tt0000007,5.4,855
7,tt0000008,5.4,2189
8,tt0000009,5.3,210
9,tt0000010,6.8,7542


# Combinando Dados de Filmes e Notas do IMDB

In [58]:
df_imdb = ps.merge(left=df_imdb_titles, right=df_imdb_title_ratings, how='inner', on='imdb_id')
df_imdb.head(10)

Unnamed: 0,imdb_id,type,primary_title,original_title,is_adult,start_year,end_year,runtime_minutes,genres,title_slug,average_rating,num_votes
0,tt0000009,movie,Miss Jerry,Miss Jerry,True,1894.0,,45.0,[romance],miss-jerry,5.3,210
1,tt0000147,movie,The Corbett-Fitzsimmons Fight,The Corbett-Fitzsimmons Fight,True,1897.0,,100.0,"[documentary, news, sport]",the-corbett-fitzsimmons-fight,5.2,510
2,tt0000502,movie,Bohemios,Bohemios,True,1905.0,,100.0,[],bohemios,4.4,17
3,tt0000574,movie,The Story of the Kelly Gang,The Story of the Kelly Gang,True,1906.0,,70.0,"[action, adventure, biography]",the-story-of-the-kelly-gang,6.0,886
4,tt0000591,movie,The Prodigal Son,L'enfant prodigue,True,1907.0,,90.0,[drama],the-prodigal-son,5.4,24
5,tt0000615,movie,Robbery Under Arms,Robbery Under Arms,True,1907.0,,,[drama],robbery-under-arms,4.3,25
6,tt0000630,movie,Hamlet,Amleto,True,1908.0,,,[drama],hamlet,2.9,27
7,tt0000675,movie,Don Quijote,Don Quijote,True,1908.0,,,[drama],don-quijote,4.2,20
8,tt0000679,movie,The Fairylogue and Radio-Plays,The Fairylogue and Radio-Plays,True,1908.0,,120.0,"[adventure, fantasy]",the-fairylogue-and-radio-plays,5.2,73
9,tt0000862,movie,Faldgruben,Faldgruben,True,1909.0,,,[],faldgruben,4.4,17


# Importar Dados de Títulos do Netflix

In [59]:
df_netflix_titles = ps.read_csv('./data/netflix_titles.csv', sep=',', encoding='utf8', dtype=str)
df_netflix_titles.head(20)

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...
5,s6,TV Show,Midnight Mass,Mike Flanagan,"Kate Siegel, Zach Gilford, Hamish Linklater, H...",,"September 24, 2021",2021,TV-MA,1 Season,"TV Dramas, TV Horror, TV Mysteries",The arrival of a charismatic young priest brin...
6,s7,Movie,My Little Pony: A New Generation,"Robert Cullen, José Luis Ucha","Vanessa Hudgens, Kimiko Glenn, James Marsden, ...",,"September 24, 2021",2021,PG,91 min,Children & Family Movies,Equestria's divided. But a bright-eyed hero be...
7,s8,Movie,Sankofa,Haile Gerima,"Kofi Ghanaba, Oyafunmike Ogunlano, Alexandra D...","United States, Ghana, Burkina Faso, United Kin...","September 24, 2021",1993,TV-MA,125 min,"Dramas, Independent Movies, International Movies","On a photo shoot in Ghana, an American model s..."
8,s9,TV Show,The Great British Baking Show,Andy Devonshire,"Mel Giedroyc, Sue Perkins, Mary Berry, Paul Ho...",United Kingdom,"September 24, 2021",2021,TV-14,9 Seasons,"British TV Shows, Reality TV",A talented batch of amateur bakers face off in...
9,s10,Movie,The Starling,Theodore Melfi,"Melissa McCarthy, Chris O'Dowd, Kevin Kline, T...",United States,"September 24, 2021",2021,PG-13,104 min,"Comedies, Dramas",A woman adjusting to life after a loss contend...


## Criar slug do título

In [60]:
df_netflix_titles['title_slug'] = df_netflix_titles['title'].fillna('').apply(lambda x: slugify(str(x)))
df_netflix_titles.head(10)

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,title_slug
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm...",dick-johnson-is-dead
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t...",blood-water
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...,ganglands
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo...",jailbirds-new-orleans
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...,kota-factory
5,s6,TV Show,Midnight Mass,Mike Flanagan,"Kate Siegel, Zach Gilford, Hamish Linklater, H...",,"September 24, 2021",2021,TV-MA,1 Season,"TV Dramas, TV Horror, TV Mysteries",The arrival of a charismatic young priest brin...,midnight-mass
6,s7,Movie,My Little Pony: A New Generation,"Robert Cullen, José Luis Ucha","Vanessa Hudgens, Kimiko Glenn, James Marsden, ...",,"September 24, 2021",2021,PG,91 min,Children & Family Movies,Equestria's divided. But a bright-eyed hero be...,my-little-pony-a-new-generation
7,s8,Movie,Sankofa,Haile Gerima,"Kofi Ghanaba, Oyafunmike Ogunlano, Alexandra D...","United States, Ghana, Burkina Faso, United Kin...","September 24, 2021",1993,TV-MA,125 min,"Dramas, Independent Movies, International Movies","On a photo shoot in Ghana, an American model s...",sankofa
8,s9,TV Show,The Great British Baking Show,Andy Devonshire,"Mel Giedroyc, Sue Perkins, Mary Berry, Paul Ho...",United Kingdom,"September 24, 2021",2021,TV-14,9 Seasons,"British TV Shows, Reality TV",A talented batch of amateur bakers face off in...,the-great-british-baking-show
9,s10,Movie,The Starling,Theodore Melfi,"Melissa McCarthy, Chris O'Dowd, Kevin Kline, T...",United States,"September 24, 2021",2021,PG-13,104 min,"Comedies, Dramas",A woman adjusting to life after a loss contend...,the-starling


In [61]:
types = df_netflix_titles['type'].unique()
types

array(['Movie', 'TV Show'], dtype=object)

## Normalizar tipo

In [62]:
def normalize_netflix_type(value):
    if value == 'Movie':
        return 'movie'
    elif value == 'TV Show':
        return 'tvseries'
    else:
        return None

In [63]:
df_netflix_titles['type'] = df_netflix_titles['type'].apply(lambda x: normalize_netflix_type(x))
df_netflix_titles.head(10)

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,title_slug
0,s1,movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm...",dick-johnson-is-dead
1,s2,tvseries,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t...",blood-water
2,s3,tvseries,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...,ganglands
3,s4,tvseries,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo...",jailbirds-new-orleans
4,s5,tvseries,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...,kota-factory
5,s6,tvseries,Midnight Mass,Mike Flanagan,"Kate Siegel, Zach Gilford, Hamish Linklater, H...",,"September 24, 2021",2021,TV-MA,1 Season,"TV Dramas, TV Horror, TV Mysteries",The arrival of a charismatic young priest brin...,midnight-mass
6,s7,movie,My Little Pony: A New Generation,"Robert Cullen, José Luis Ucha","Vanessa Hudgens, Kimiko Glenn, James Marsden, ...",,"September 24, 2021",2021,PG,91 min,Children & Family Movies,Equestria's divided. But a bright-eyed hero be...,my-little-pony-a-new-generation
7,s8,movie,Sankofa,Haile Gerima,"Kofi Ghanaba, Oyafunmike Ogunlano, Alexandra D...","United States, Ghana, Burkina Faso, United Kin...","September 24, 2021",1993,TV-MA,125 min,"Dramas, Independent Movies, International Movies","On a photo shoot in Ghana, an American model s...",sankofa
8,s9,tvseries,The Great British Baking Show,Andy Devonshire,"Mel Giedroyc, Sue Perkins, Mary Berry, Paul Ho...",United Kingdom,"September 24, 2021",2021,TV-14,9 Seasons,"British TV Shows, Reality TV",A talented batch of amateur bakers face off in...,the-great-british-baking-show
9,s10,movie,The Starling,Theodore Melfi,"Melissa McCarthy, Chris O'Dowd, Kevin Kline, T...",United States,"September 24, 2021",2021,PG-13,104 min,"Comedies, Dramas",A woman adjusting to life after a loss contend...,the-starling


## Normalizar país

In [64]:
df_netflix_titles['country'] = df_netflix_titles['country'].fillna('').str.split(',')

In [65]:
df_netflix_titles.head(10)

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,title_slug
0,s1,movie,Dick Johnson Is Dead,Kirsten Johnson,,[United States],"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm...",dick-johnson-is-dead
1,s2,tvseries,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",[South Africa],"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t...",blood-water
2,s3,tvseries,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",[],"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...,ganglands
3,s4,tvseries,Jailbirds New Orleans,,,[],"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo...",jailbirds-new-orleans
4,s5,tvseries,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",[India],"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...,kota-factory
5,s6,tvseries,Midnight Mass,Mike Flanagan,"Kate Siegel, Zach Gilford, Hamish Linklater, H...",[],"September 24, 2021",2021,TV-MA,1 Season,"TV Dramas, TV Horror, TV Mysteries",The arrival of a charismatic young priest brin...,midnight-mass
6,s7,movie,My Little Pony: A New Generation,"Robert Cullen, José Luis Ucha","Vanessa Hudgens, Kimiko Glenn, James Marsden, ...",[],"September 24, 2021",2021,PG,91 min,Children & Family Movies,Equestria's divided. But a bright-eyed hero be...,my-little-pony-a-new-generation
7,s8,movie,Sankofa,Haile Gerima,"Kofi Ghanaba, Oyafunmike Ogunlano, Alexandra D...","[United States, Ghana, Burkina Faso, United...","September 24, 2021",1993,TV-MA,125 min,"Dramas, Independent Movies, International Movies","On a photo shoot in Ghana, an American model s...",sankofa
8,s9,tvseries,The Great British Baking Show,Andy Devonshire,"Mel Giedroyc, Sue Perkins, Mary Berry, Paul Ho...",[United Kingdom],"September 24, 2021",2021,TV-14,9 Seasons,"British TV Shows, Reality TV",A talented batch of amateur bakers face off in...,the-great-british-baking-show
9,s10,movie,The Starling,Theodore Melfi,"Melissa McCarthy, Chris O'Dowd, Kevin Kline, T...",[United States],"September 24, 2021",2021,PG-13,104 min,"Comedies, Dramas",A woman adjusting to life after a loss contend...,the-starling


# Combinando Dados do IMDB com Dados da Netflix

In [66]:
# Nova variável = pd.merge(Planilha que recebe os dados, 
#                          Planilha que passa os dados[[‘Coluna em comum’, ‘Coluna com os dados que quero’]], 
#                         unir apartir da=[‘Coluna em comum’], Use as referências do lado= ‘Esquerdo’)

In [67]:
df_netflix_merge = ps.merge(df_netflix_titles, df_imdb[['title_slug', 'average_rating']], on=['title_slug'], how='right')
df_netflix_titles['rating_imdb'] = df_imdb['average_rating']
df_netflix_titles.head(10)

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,title_slug,rating_imdb
0,s1,movie,Dick Johnson Is Dead,Kirsten Johnson,,[United States],"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm...",dick-johnson-is-dead,5.3
1,s2,tvseries,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",[South Africa],"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t...",blood-water,5.2
2,s3,tvseries,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",[],"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...,ganglands,4.4
3,s4,tvseries,Jailbirds New Orleans,,,[],"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo...",jailbirds-new-orleans,6.0
4,s5,tvseries,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",[India],"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...,kota-factory,5.4
5,s6,tvseries,Midnight Mass,Mike Flanagan,"Kate Siegel, Zach Gilford, Hamish Linklater, H...",[],"September 24, 2021",2021,TV-MA,1 Season,"TV Dramas, TV Horror, TV Mysteries",The arrival of a charismatic young priest brin...,midnight-mass,4.3
6,s7,movie,My Little Pony: A New Generation,"Robert Cullen, José Luis Ucha","Vanessa Hudgens, Kimiko Glenn, James Marsden, ...",[],"September 24, 2021",2021,PG,91 min,Children & Family Movies,Equestria's divided. But a bright-eyed hero be...,my-little-pony-a-new-generation,2.9
7,s8,movie,Sankofa,Haile Gerima,"Kofi Ghanaba, Oyafunmike Ogunlano, Alexandra D...","[United States, Ghana, Burkina Faso, United...","September 24, 2021",1993,TV-MA,125 min,"Dramas, Independent Movies, International Movies","On a photo shoot in Ghana, an American model s...",sankofa,4.2
8,s9,tvseries,The Great British Baking Show,Andy Devonshire,"Mel Giedroyc, Sue Perkins, Mary Berry, Paul Ho...",[United Kingdom],"September 24, 2021",2021,TV-14,9 Seasons,"British TV Shows, Reality TV",A talented batch of amateur bakers face off in...,the-great-british-baking-show,5.2
9,s10,movie,The Starling,Theodore Melfi,"Melissa McCarthy, Chris O'Dowd, Kevin Kline, T...",[United States],"September 24, 2021",2021,PG-13,104 min,"Comedies, Dramas",A woman adjusting to life after a loss contend...,the-starling,4.4


In [68]:
df_netflix_titles.drop(columns=['show_id', 'rating'], inplace=True)
df_netflix_titles.head(10)


Unnamed: 0,type,title,director,cast,country,date_added,release_year,duration,listed_in,description,title_slug,rating_imdb
0,movie,Dick Johnson Is Dead,Kirsten Johnson,,[United States],"September 25, 2021",2020,90 min,Documentaries,"As her father nears the end of his life, filmm...",dick-johnson-is-dead,5.3
1,tvseries,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",[South Africa],"September 24, 2021",2021,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t...",blood-water,5.2
2,tvseries,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",[],"September 24, 2021",2021,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...,ganglands,4.4
3,tvseries,Jailbirds New Orleans,,,[],"September 24, 2021",2021,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo...",jailbirds-new-orleans,6.0
4,tvseries,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",[India],"September 24, 2021",2021,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...,kota-factory,5.4
5,tvseries,Midnight Mass,Mike Flanagan,"Kate Siegel, Zach Gilford, Hamish Linklater, H...",[],"September 24, 2021",2021,1 Season,"TV Dramas, TV Horror, TV Mysteries",The arrival of a charismatic young priest brin...,midnight-mass,4.3
6,movie,My Little Pony: A New Generation,"Robert Cullen, José Luis Ucha","Vanessa Hudgens, Kimiko Glenn, James Marsden, ...",[],"September 24, 2021",2021,91 min,Children & Family Movies,Equestria's divided. But a bright-eyed hero be...,my-little-pony-a-new-generation,2.9
7,movie,Sankofa,Haile Gerima,"Kofi Ghanaba, Oyafunmike Ogunlano, Alexandra D...","[United States, Ghana, Burkina Faso, United...","September 24, 2021",1993,125 min,"Dramas, Independent Movies, International Movies","On a photo shoot in Ghana, an American model s...",sankofa,4.2
8,tvseries,The Great British Baking Show,Andy Devonshire,"Mel Giedroyc, Sue Perkins, Mary Berry, Paul Ho...",[United Kingdom],"September 24, 2021",2021,9 Seasons,"British TV Shows, Reality TV",A talented batch of amateur bakers face off in...,the-great-british-baking-show,5.2
9,movie,The Starling,Theodore Melfi,"Melissa McCarthy, Chris O'Dowd, Kevin Kline, T...",[United States],"September 24, 2021",2021,104 min,"Comedies, Dramas",A woman adjusting to life after a loss contend...,the-starling,4.4


## Objetivos da análise:
- Quantidade de lançamentos por ano
- Países que mais lançaram filmes
- Filmes com maior e menor tempo de duração
- Média de rating por diretor **
- Qual o tipo de filme que agrada mais o público?
- Há relação entre rating e duração? **
- O tamanho da sinopse influencia no rating? **
- Qual o gênero mais recorrente na base?
- Separe a base por quartis e analise em grupos de ano de lançamento
- Inserir dicionário explicando o que é cada coluna
- Tratar coluna type

### Quantidade de lançamentos por Diretores por Ano

In [97]:
df_directors = df_netflix_titles.dropna(subset=['director'])
df_directors.head(10)
df_directors.groupby(['director', 'release_year']).size().sort_values(ascending=False)

director                                                   release_year
Raúl Campos, Jan Suter                                     2018            12
Marcus Raboy                                               2017             6
Rajiv Chilaka                                              2013             6
Hidenori Inoue                                             2017             5
Suhas Kadav                                                2017             4
                                                                           ..
Hiroshi Katagiri                                           2016             1
Hiroshi Aoyama, Kazumi Fukushima, Jim Stenstrum            1998             1
Hiromasa Yonebayashi, Yoshiyuki Momose, Akihiko Yamashita  2018             1
Hiromasa Yonebayashi                                       2017             1
Şenol Sönmez                                               2019             1
Length: 5905, dtype: int64

### Países que mais lançaram filmes

In [136]:
df_exploded = df_netflix_titles.explode(column='country')
df_exploded['country'] = df_exploded['country'].apply(lambda x: x.strip())
df_filtered = df_exploded[df_exploded['country'] != '']
df_grouped = df_filtered.groupby(['country']).size().sort_values(ascending=False)
df_grouped

Unnamed: 0,type,title,director,cast,country,date_added,release_year,duration,listed_in,description,title_slug,rating_imdb
0,movie,Dick Johnson Is Dead,Kirsten Johnson,,[United States],"September 25, 2021",2020,90 min,Documentaries,"As her father nears the end of his life, filmm...",dick-johnson-is-dead,5.3
1,tvseries,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",[South Africa],"September 24, 2021",2021,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t...",blood-water,5.2
2,tvseries,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",[],"September 24, 2021",2021,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...,ganglands,4.4
3,tvseries,Jailbirds New Orleans,,,[],"September 24, 2021",2021,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo...",jailbirds-new-orleans,6.0
4,tvseries,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",[India],"September 24, 2021",2021,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...,kota-factory,5.4
...,...,...,...,...,...,...,...,...,...,...,...,...
8802,movie,Zodiac,David Fincher,"Mark Ruffalo, Jake Gyllenhaal, Robert Downey J...",[United States],"November 20, 2019",2007,158 min,"Cult Movies, Dramas, Thrillers","A political cartoonist, a crime reporter and a...",zodiac,4.6
8803,tvseries,Zombie Dumb,,,[],"July 1, 2019",2018,2 Seasons,"Kids' TV, Korean TV Shows, TV Comedies","While living alone in a spooky town, a young g...",zombie-dumb,7.2
8804,movie,Zombieland,Ruben Fleischer,"Jesse Eisenberg, Woody Harrelson, Emma Stone, ...",[United States],"November 1, 2019",2009,88 min,"Comedies, Horror Movies",Looking to survive in a world taken over by zo...,zombieland,6.7
8805,movie,Zoom,Peter Hewitt,"Tim Allen, Courteney Cox, Chevy Chase, Kate Ma...",[United States],"January 11, 2020",2006,88 min,"Children & Family Movies, Comedies","Dragged from civilian life, a former superhero...",zoom,6.4
