In [40]:
import pandas as ps
from slugify import slugify

# Importar Dados de Títulos do IMDB

In [41]:
df_imdb_titles = ps.read_csv('./data/title.basics.tsv', 
                             sep='\t', 
                             na_values='\\N', 
                             encoding='utf8', 
                             dtype=str)
df_imdb_titles.head(10)

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
0,tt0000001,short,Carmencita,Carmencita,0,1894,,1,"Documentary,Short"
1,tt0000002,short,Le clown et ses chiens,Le clown et ses chiens,0,1892,,5,"Animation,Short"
2,tt0000003,short,Pauvre Pierrot,Pauvre Pierrot,0,1892,,5,"Animation,Comedy,Romance"
3,tt0000004,short,Un bon bock,Un bon bock,0,1892,,12,"Animation,Short"
4,tt0000005,short,Blacksmith Scene,Blacksmith Scene,0,1893,,1,"Comedy,Short"
5,tt0000006,short,Chinese Opium Den,Chinese Opium Den,0,1894,,1,Short
6,tt0000007,short,Corbett and Courtney Before the Kinetograph,Corbett and Courtney Before the Kinetograph,0,1894,,1,"Short,Sport"
7,tt0000008,short,Edison Kinetoscopic Record of a Sneeze,Edison Kinetoscopic Record of a Sneeze,0,1894,,1,"Documentary,Short"
8,tt0000009,movie,Miss Jerry,Miss Jerry,0,1894,,45,Romance
9,tt0000010,short,Leaving the Factory,La sortie de l'usine Lumière à Lyon,0,1895,,1,"Documentary,Short"


## Filtrar apenas filmes e séries

In [42]:
types = df_imdb_titles['titleType'].unique()
types
df_imdb_titles = df_imdb_titles[df_imdb_titles['titleType'].isin(['movie', 'tvSeries'])]

## Converter dados
- `startYear` de float para inteiro
- `endYear` de float para inteiro
- `isAdult` de float para booleano

In [43]:
df_imdb_titles['startYear'] = ps.to_numeric(df_imdb_titles['startYear'], errors='coerce', downcast='integer')
df_imdb_titles['endYear'] = ps.to_numeric(df_imdb_titles['endYear'], errors='coerce', downcast='integer')
df_imdb_titles['isAdult'] = df_imdb_titles['isAdult'].apply(lambda x: False if x == 0 else True)
df_imdb_titles['primaryTitle'] = df_imdb_titles['primaryTitle'].astype(str)

## Criar slug do título

In [44]:
df_imdb_titles.dtypes
df_imdb_titles['primaryTitleSlug'] = df_imdb_titles['primaryTitle'].fillna('').apply(lambda x: slugify(str(x)))

## Criar slug dos gêneros

In [45]:
df_imdb_titles['genres'] = df_imdb_titles['genres'].fillna('').str.split(',').apply(lambda x:[slugify(value) for value in x])

In [46]:
df_imdb_titles.head(10)

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres,primaryTitleSlug
8,tt0000009,movie,Miss Jerry,Miss Jerry,True,1894.0,,45.0,[romance],miss-jerry
144,tt0000147,movie,The Corbett-Fitzsimmons Fight,The Corbett-Fitzsimmons Fight,True,1897.0,,100.0,"[documentary, news, sport]",the-corbett-fitzsimmons-fight
498,tt0000502,movie,Bohemios,Bohemios,True,1905.0,,100.0,[],bohemios
570,tt0000574,movie,The Story of the Kelly Gang,The Story of the Kelly Gang,True,1906.0,,70.0,"[action, adventure, biography]",the-story-of-the-kelly-gang
587,tt0000591,movie,The Prodigal Son,L'enfant prodigue,True,1907.0,,90.0,[drama],the-prodigal-son
610,tt0000615,movie,Robbery Under Arms,Robbery Under Arms,True,1907.0,,,[drama],robbery-under-arms
625,tt0000630,movie,Hamlet,Amleto,True,1908.0,,,[drama],hamlet
668,tt0000675,movie,Don Quijote,Don Quijote,True,1908.0,,,[drama],don-quijote
672,tt0000679,movie,The Fairylogue and Radio-Plays,The Fairylogue and Radio-Plays,True,1908.0,,120.0,"[adventure, fantasy]",the-fairylogue-and-radio-plays
828,tt0000838,movie,A Cultura do Cacau,A Cultura do Cacau,True,1909.0,,,[],a-cultura-do-cacau


## Normalizar tipo

In [47]:
df_imdb_titles['titleType'] = df_imdb_titles['titleType'].fillna('').apply(lambda x: slugify(str(x)))
df_imdb_titles.head(10)

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres,primaryTitleSlug
8,tt0000009,movie,Miss Jerry,Miss Jerry,True,1894.0,,45.0,[romance],miss-jerry
144,tt0000147,movie,The Corbett-Fitzsimmons Fight,The Corbett-Fitzsimmons Fight,True,1897.0,,100.0,"[documentary, news, sport]",the-corbett-fitzsimmons-fight
498,tt0000502,movie,Bohemios,Bohemios,True,1905.0,,100.0,[],bohemios
570,tt0000574,movie,The Story of the Kelly Gang,The Story of the Kelly Gang,True,1906.0,,70.0,"[action, adventure, biography]",the-story-of-the-kelly-gang
587,tt0000591,movie,The Prodigal Son,L'enfant prodigue,True,1907.0,,90.0,[drama],the-prodigal-son
610,tt0000615,movie,Robbery Under Arms,Robbery Under Arms,True,1907.0,,,[drama],robbery-under-arms
625,tt0000630,movie,Hamlet,Amleto,True,1908.0,,,[drama],hamlet
668,tt0000675,movie,Don Quijote,Don Quijote,True,1908.0,,,[drama],don-quijote
672,tt0000679,movie,The Fairylogue and Radio-Plays,The Fairylogue and Radio-Plays,True,1908.0,,120.0,"[adventure, fantasy]",the-fairylogue-and-radio-plays
828,tt0000838,movie,A Cultura do Cacau,A Cultura do Cacau,True,1909.0,,,[],a-cultura-do-cacau


## Renomear colunas

In [48]:
df_imdb_titles.rename(
    {'tconst': 'imdb_id', 'titleType': 'type', 'primaryTitle': 'primary_title', 'originalTitle': 'original_title', 'isAdult': 'is_adult', 'startYear': 'start_year', 'endYear': 'end_year', 'runtimeMinutes': 'runtime_minutes', 'primaryTitleSlug': 'title_slug'},
    axis=1,
    inplace=True)
df_imdb_titles.head(10)

Unnamed: 0,imdb_id,type,primary_title,original_title,is_adult,start_year,end_year,runtime_minutes,genres,title_slug
8,tt0000009,movie,Miss Jerry,Miss Jerry,True,1894.0,,45.0,[romance],miss-jerry
144,tt0000147,movie,The Corbett-Fitzsimmons Fight,The Corbett-Fitzsimmons Fight,True,1897.0,,100.0,"[documentary, news, sport]",the-corbett-fitzsimmons-fight
498,tt0000502,movie,Bohemios,Bohemios,True,1905.0,,100.0,[],bohemios
570,tt0000574,movie,The Story of the Kelly Gang,The Story of the Kelly Gang,True,1906.0,,70.0,"[action, adventure, biography]",the-story-of-the-kelly-gang
587,tt0000591,movie,The Prodigal Son,L'enfant prodigue,True,1907.0,,90.0,[drama],the-prodigal-son
610,tt0000615,movie,Robbery Under Arms,Robbery Under Arms,True,1907.0,,,[drama],robbery-under-arms
625,tt0000630,movie,Hamlet,Amleto,True,1908.0,,,[drama],hamlet
668,tt0000675,movie,Don Quijote,Don Quijote,True,1908.0,,,[drama],don-quijote
672,tt0000679,movie,The Fairylogue and Radio-Plays,The Fairylogue and Radio-Plays,True,1908.0,,120.0,"[adventure, fantasy]",the-fairylogue-and-radio-plays
828,tt0000838,movie,A Cultura do Cacau,A Cultura do Cacau,True,1909.0,,,[],a-cultura-do-cacau


## Dicionário do DataFrame

|Coluna|Tipo|Descrição|
|--|--|--|
| id | string | alphanumeric unique identifier of the title. |
| title_type | string | the type/format of the title (e.g. movie, short,tvseries, tvepisode, video, etc). |
| primary_title | string | the more popular title / the title used by the filmmakers on promotional materials at the point of release. |
| original_title | string | original title, in the original language. |
| is_adult | boolean | 0: non-adult title; 1: adult title. |
| start_year | int | represents the release year of a title. In the case of TV Series, it is the series start year. |
| end_year | int | TV Series end year. for all other title types. |
| runtime_minutes | – | primary runtime of the title, in minutes. |
| genres | string array | includes up to three genres associated with the title. |
| title_slug | string | title slug


# Importar Dados de Notas do IMDB

In [49]:
df_imdb_title_ratings = ps.read_csv('./data/title.ratings.tsv', 
                             sep='\t', 
                             na_values='\\N', 
                             encoding='utf8', 
                             dtype=str)
df_imdb_title_ratings.head(10)

Unnamed: 0,tconst,averageRating,numVotes
0,tt0000001,5.7,2045
1,tt0000002,5.7,273
2,tt0000003,6.5,2003
3,tt0000004,5.4,178
4,tt0000005,6.2,2763
5,tt0000006,5.0,183
6,tt0000007,5.4,855
7,tt0000008,5.4,2189
8,tt0000009,5.3,210
9,tt0000010,6.8,7542


## Converter Dados
- `averageRating` para float
- `numVotes` para int

In [50]:
df_imdb_title_ratings['tconst'] = df_imdb_title_ratings['tconst'].astype(str)
df_imdb_title_ratings['averageRating'] = df_imdb_title_ratings['averageRating'].astype(float)
df_imdb_title_ratings['numVotes'] = df_imdb_title_ratings['numVotes'].astype(int)
df_imdb_title_ratings.dtypes

tconst            object
averageRating    float64
numVotes           int64
dtype: object

## Renomar colunas

In [51]:
df_imdb_title_ratings.rename(
    {'tconst': 'imdb_id', 'averageRating': 'average_rating', 'numVotes': 'num_votes'},
    axis=1,
    inplace=True)
df_imdb_title_ratings.head(10)

Unnamed: 0,imdb_id,average_rating,num_votes
0,tt0000001,5.7,2045
1,tt0000002,5.7,273
2,tt0000003,6.5,2003
3,tt0000004,5.4,178
4,tt0000005,6.2,2763
5,tt0000006,5.0,183
6,tt0000007,5.4,855
7,tt0000008,5.4,2189
8,tt0000009,5.3,210
9,tt0000010,6.8,7542


# Combinando Dados de Filmes e Notas do IMDB

In [52]:
df_imdb = ps.merge(left=df_imdb_titles, right=df_imdb_title_ratings, how='inner', on='imdb_id')
df_imdb.head(10)

Unnamed: 0,imdb_id,type,primary_title,original_title,is_adult,start_year,end_year,runtime_minutes,genres,title_slug,average_rating,num_votes
0,tt0000009,movie,Miss Jerry,Miss Jerry,True,1894.0,,45.0,[romance],miss-jerry,5.3,210
1,tt0000147,movie,The Corbett-Fitzsimmons Fight,The Corbett-Fitzsimmons Fight,True,1897.0,,100.0,"[documentary, news, sport]",the-corbett-fitzsimmons-fight,5.2,510
2,tt0000502,movie,Bohemios,Bohemios,True,1905.0,,100.0,[],bohemios,4.4,17
3,tt0000574,movie,The Story of the Kelly Gang,The Story of the Kelly Gang,True,1906.0,,70.0,"[action, adventure, biography]",the-story-of-the-kelly-gang,6.0,886
4,tt0000591,movie,The Prodigal Son,L'enfant prodigue,True,1907.0,,90.0,[drama],the-prodigal-son,5.4,24
5,tt0000615,movie,Robbery Under Arms,Robbery Under Arms,True,1907.0,,,[drama],robbery-under-arms,4.3,25
6,tt0000630,movie,Hamlet,Amleto,True,1908.0,,,[drama],hamlet,2.9,27
7,tt0000675,movie,Don Quijote,Don Quijote,True,1908.0,,,[drama],don-quijote,4.2,20
8,tt0000679,movie,The Fairylogue and Radio-Plays,The Fairylogue and Radio-Plays,True,1908.0,,120.0,"[adventure, fantasy]",the-fairylogue-and-radio-plays,5.2,73
9,tt0000862,movie,Faldgruben,Faldgruben,True,1909.0,,,[],faldgruben,4.4,17


# Importar Dados de Títulos do Netflix

In [53]:
df_netflix_titles = ps.read_csv('./data/netflix_titles.csv', sep=',', encoding='utf8', dtype=str)
df_netflix_titles.head(20)

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...
5,s6,TV Show,Midnight Mass,Mike Flanagan,"Kate Siegel, Zach Gilford, Hamish Linklater, H...",,"September 24, 2021",2021,TV-MA,1 Season,"TV Dramas, TV Horror, TV Mysteries",The arrival of a charismatic young priest brin...
6,s7,Movie,My Little Pony: A New Generation,"Robert Cullen, José Luis Ucha","Vanessa Hudgens, Kimiko Glenn, James Marsden, ...",,"September 24, 2021",2021,PG,91 min,Children & Family Movies,Equestria's divided. But a bright-eyed hero be...
7,s8,Movie,Sankofa,Haile Gerima,"Kofi Ghanaba, Oyafunmike Ogunlano, Alexandra D...","United States, Ghana, Burkina Faso, United Kin...","September 24, 2021",1993,TV-MA,125 min,"Dramas, Independent Movies, International Movies","On a photo shoot in Ghana, an American model s..."
8,s9,TV Show,The Great British Baking Show,Andy Devonshire,"Mel Giedroyc, Sue Perkins, Mary Berry, Paul Ho...",United Kingdom,"September 24, 2021",2021,TV-14,9 Seasons,"British TV Shows, Reality TV",A talented batch of amateur bakers face off in...
9,s10,Movie,The Starling,Theodore Melfi,"Melissa McCarthy, Chris O'Dowd, Kevin Kline, T...",United States,"September 24, 2021",2021,PG-13,104 min,"Comedies, Dramas",A woman adjusting to life after a loss contend...


## Criar slug do título

In [54]:
df_netflix_titles['title_slug'] = df_netflix_titles['title'].fillna('').apply(lambda x: slugify(str(x)))
df_netflix_titles.head(10)

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,title_slug
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm...",dick-johnson-is-dead
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t...",blood-water
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...,ganglands
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo...",jailbirds-new-orleans
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...,kota-factory
5,s6,TV Show,Midnight Mass,Mike Flanagan,"Kate Siegel, Zach Gilford, Hamish Linklater, H...",,"September 24, 2021",2021,TV-MA,1 Season,"TV Dramas, TV Horror, TV Mysteries",The arrival of a charismatic young priest brin...,midnight-mass
6,s7,Movie,My Little Pony: A New Generation,"Robert Cullen, José Luis Ucha","Vanessa Hudgens, Kimiko Glenn, James Marsden, ...",,"September 24, 2021",2021,PG,91 min,Children & Family Movies,Equestria's divided. But a bright-eyed hero be...,my-little-pony-a-new-generation
7,s8,Movie,Sankofa,Haile Gerima,"Kofi Ghanaba, Oyafunmike Ogunlano, Alexandra D...","United States, Ghana, Burkina Faso, United Kin...","September 24, 2021",1993,TV-MA,125 min,"Dramas, Independent Movies, International Movies","On a photo shoot in Ghana, an American model s...",sankofa
8,s9,TV Show,The Great British Baking Show,Andy Devonshire,"Mel Giedroyc, Sue Perkins, Mary Berry, Paul Ho...",United Kingdom,"September 24, 2021",2021,TV-14,9 Seasons,"British TV Shows, Reality TV",A talented batch of amateur bakers face off in...,the-great-british-baking-show
9,s10,Movie,The Starling,Theodore Melfi,"Melissa McCarthy, Chris O'Dowd, Kevin Kline, T...",United States,"September 24, 2021",2021,PG-13,104 min,"Comedies, Dramas",A woman adjusting to life after a loss contend...,the-starling


In [55]:
types = df_netflix_titles['type'].unique()
types

array(['Movie', 'TV Show'], dtype=object)

## Normalizar tipo

In [56]:
def normalize_netflix_type(value):
    if value == 'Movie':
        return 'movie'
    elif value == 'TV Show':
        return 'tvseries'
    else:
        return None

In [57]:
df_netflix_titles['type'] = df_netflix_titles['type'].apply(lambda x: normalize_netflix_type(x))
df_netflix_titles.head(10)

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,title_slug
0,s1,movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm...",dick-johnson-is-dead
1,s2,tvseries,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t...",blood-water
2,s3,tvseries,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...,ganglands
3,s4,tvseries,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo...",jailbirds-new-orleans
4,s5,tvseries,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...,kota-factory
5,s6,tvseries,Midnight Mass,Mike Flanagan,"Kate Siegel, Zach Gilford, Hamish Linklater, H...",,"September 24, 2021",2021,TV-MA,1 Season,"TV Dramas, TV Horror, TV Mysteries",The arrival of a charismatic young priest brin...,midnight-mass
6,s7,movie,My Little Pony: A New Generation,"Robert Cullen, José Luis Ucha","Vanessa Hudgens, Kimiko Glenn, James Marsden, ...",,"September 24, 2021",2021,PG,91 min,Children & Family Movies,Equestria's divided. But a bright-eyed hero be...,my-little-pony-a-new-generation
7,s8,movie,Sankofa,Haile Gerima,"Kofi Ghanaba, Oyafunmike Ogunlano, Alexandra D...","United States, Ghana, Burkina Faso, United Kin...","September 24, 2021",1993,TV-MA,125 min,"Dramas, Independent Movies, International Movies","On a photo shoot in Ghana, an American model s...",sankofa
8,s9,tvseries,The Great British Baking Show,Andy Devonshire,"Mel Giedroyc, Sue Perkins, Mary Berry, Paul Ho...",United Kingdom,"September 24, 2021",2021,TV-14,9 Seasons,"British TV Shows, Reality TV",A talented batch of amateur bakers face off in...,the-great-british-baking-show
9,s10,movie,The Starling,Theodore Melfi,"Melissa McCarthy, Chris O'Dowd, Kevin Kline, T...",United States,"September 24, 2021",2021,PG-13,104 min,"Comedies, Dramas",A woman adjusting to life after a loss contend...,the-starling


## Normalizar país

In [58]:
df_netflix_titles['country'] = df_netflix_titles['country'].fillna('').str.split(',')

In [59]:
df_netflix_titles.head(10)

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,title_slug
0,s1,movie,Dick Johnson Is Dead,Kirsten Johnson,,[United States],"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm...",dick-johnson-is-dead
1,s2,tvseries,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",[South Africa],"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t...",blood-water
2,s3,tvseries,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",[],"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...,ganglands
3,s4,tvseries,Jailbirds New Orleans,,,[],"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo...",jailbirds-new-orleans
4,s5,tvseries,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",[India],"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...,kota-factory
5,s6,tvseries,Midnight Mass,Mike Flanagan,"Kate Siegel, Zach Gilford, Hamish Linklater, H...",[],"September 24, 2021",2021,TV-MA,1 Season,"TV Dramas, TV Horror, TV Mysteries",The arrival of a charismatic young priest brin...,midnight-mass
6,s7,movie,My Little Pony: A New Generation,"Robert Cullen, José Luis Ucha","Vanessa Hudgens, Kimiko Glenn, James Marsden, ...",[],"September 24, 2021",2021,PG,91 min,Children & Family Movies,Equestria's divided. But a bright-eyed hero be...,my-little-pony-a-new-generation
7,s8,movie,Sankofa,Haile Gerima,"Kofi Ghanaba, Oyafunmike Ogunlano, Alexandra D...","[United States, Ghana, Burkina Faso, United...","September 24, 2021",1993,TV-MA,125 min,"Dramas, Independent Movies, International Movies","On a photo shoot in Ghana, an American model s...",sankofa
8,s9,tvseries,The Great British Baking Show,Andy Devonshire,"Mel Giedroyc, Sue Perkins, Mary Berry, Paul Ho...",[United Kingdom],"September 24, 2021",2021,TV-14,9 Seasons,"British TV Shows, Reality TV",A talented batch of amateur bakers face off in...,the-great-british-baking-show
9,s10,movie,The Starling,Theodore Melfi,"Melissa McCarthy, Chris O'Dowd, Kevin Kline, T...",[United States],"September 24, 2021",2021,PG-13,104 min,"Comedies, Dramas",A woman adjusting to life after a loss contend...,the-starling


# Combinando Dados do IMDB com Dados da Netflix

## Separando Filmes e Séries do IMDB

In [60]:
df_imdb_movies = df_imdb[df_imdb['type'] == 'movie']
df_imdb_series = df_imdb[df_imdb['type'] == 'tvseries']

## Combinando Filmes

In [61]:
df_movies = ps.merge(left=df_netflix_titles, right=df_imdb_movies, how='inner', on=['title_slug', 'type'])
df_movies.head(10)

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,...,imdb_id,primary_title,original_title,is_adult,start_year,end_year,runtime_minutes,genres,average_rating,num_votes
0,s1,movie,Dick Johnson Is Dead,Kirsten Johnson,,[United States],"September 25, 2021",2020,PG-13,90 min,...,tt11394180,Dick Johnson Is Dead,Dick Johnson Is Dead,True,2020.0,,89,"[biography, documentary, drama]",7.4,7226
1,s7,movie,My Little Pony: A New Generation,"Robert Cullen, José Luis Ucha","Vanessa Hudgens, Kimiko Glenn, James Marsden, ...",[],"September 24, 2021",2021,PG,91 min,...,tt10101702,My Little Pony: A New Generation,My Little Pony: A New Generation,True,2021.0,,90,"[adventure, animation, comedy]",6.8,4505
2,s8,movie,Sankofa,Haile Gerima,"Kofi Ghanaba, Oyafunmike Ogunlano, Alexandra D...","[United States, Ghana, Burkina Faso, United...","September 24, 2021",1993,TV-MA,125 min,...,tt0108041,Sankofa,Sankofa,True,1993.0,,125,[drama],7.0,805
3,s8,movie,Sankofa,Haile Gerima,"Kofi Ghanaba, Oyafunmike Ogunlano, Alexandra D...","[United States, Ghana, Burkina Faso, United...","September 24, 2021",1993,TV-MA,125 min,...,tt4600776,Sankofa,Sankofa,True,2014.0,,77,[sci-fi],6.9,10
4,s10,movie,The Starling,Theodore Melfi,"Melissa McCarthy, Chris O'Dowd, Kevin Kline, T...",[United States],"September 24, 2021",2021,PG-13,104 min,...,tt5164438,The Starling,The Starling,True,2021.0,,102,"[comedy, drama]",6.4,15273
5,s13,movie,Je Suis Karl,Christian Schwochow,"Luna Wedler, Jannis Niewöhner, Milan Peschel, ...","[Germany, Czech Republic]","September 23, 2021",2021,TV-MA,127 min,...,tt9205538,Je Suis Karl,Je suis Karl,True,2021.0,,126,"[drama, romance, thriller]",5.5,1560
6,s14,movie,Confessions of an Invisible Girl,Bruno Garotti,"Klara Castanho, Lucca Picon, Júlia Gomes, Marc...",[],"September 22, 2021",2021,TV-PG,91 min,...,tt15204288,Confessions of an Invisible Girl,Confissões de uma Garota Excluída,True,2021.0,,91,"[comedy, drama]",5.3,1377
7,s19,movie,Intrusion,Adam Salky,"Freida Pinto, Logan Marshall-Green, Robert Joh...",[],"September 22, 2021",2021,TV-14,94 min,...,tt5563324,Intrusion,Intrusion,True,2021.0,,92,"[action, drama, mystery]",5.3,21058
8,s24,movie,Go! Go! Cory Carson: Chrissy Takes the Wheel,"Alex Woo, Stanley Moore","Maisie Benson, Paul Killam, Kerry Gudjohnsen, ...",[],"September 21, 2021",2021,TV-Y,61 min,...,tt15296598,Go! Go! Cory Carson: Chrissy Takes the Wheel,Go! Go! Cory Carson: Chrissy Takes the Wheel,True,2021.0,,60,"[animation, family]",6.7,58
9,s25,movie,Jeans,S. Shankar,"Prashanth, Aishwarya Rai Bachchan, Sri Lakshmi...",[India],"September 21, 2021",1998,TV-14,166 min,...,tt0151121,Jeans,Jeans,True,1998.0,,175,"[comedy, drama, romance]",6.4,3959


### Limpando Colunas

In [62]:
df_movies.columns

Index(['show_id', 'type', 'title', 'director', 'cast', 'country', 'date_added',
       'release_year', 'rating', 'duration', 'listed_in', 'description',
       'title_slug', 'imdb_id', 'primary_title', 'original_title', 'is_adult',
       'start_year', 'end_year', 'runtime_minutes', 'genres', 'average_rating',
       'num_votes'],
      dtype='object')

In [63]:
df_movies.drop(columns=['show_id', 'cast', 'rating', 'description', 'is_adult'], inplace=True)

In [64]:
df_movies.head(4)

Unnamed: 0,type,title,director,country,date_added,release_year,duration,listed_in,title_slug,imdb_id,primary_title,original_title,start_year,end_year,runtime_minutes,genres,average_rating,num_votes
0,movie,Dick Johnson Is Dead,Kirsten Johnson,[United States],"September 25, 2021",2020,90 min,Documentaries,dick-johnson-is-dead,tt11394180,Dick Johnson Is Dead,Dick Johnson Is Dead,2020.0,,89,"[biography, documentary, drama]",7.4,7226
1,movie,My Little Pony: A New Generation,"Robert Cullen, José Luis Ucha",[],"September 24, 2021",2021,91 min,Children & Family Movies,my-little-pony-a-new-generation,tt10101702,My Little Pony: A New Generation,My Little Pony: A New Generation,2021.0,,90,"[adventure, animation, comedy]",6.8,4505
2,movie,Sankofa,Haile Gerima,"[United States, Ghana, Burkina Faso, United...","September 24, 2021",1993,125 min,"Dramas, Independent Movies, International Movies",sankofa,tt0108041,Sankofa,Sankofa,1993.0,,125,[drama],7.0,805
3,movie,Sankofa,Haile Gerima,"[United States, Ghana, Burkina Faso, United...","September 24, 2021",1993,125 min,"Dramas, Independent Movies, International Movies",sankofa,tt4600776,Sankofa,Sankofa,2014.0,,77,[sci-fi],6.9,10


## Combinando Séries

In [65]:
df_series = ps.merge(left=df_netflix_titles, right=df_imdb_series, how='inner', on=['title_slug', 'type'])
df_series.head(10)

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,...,imdb_id,primary_title,original_title,is_adult,start_year,end_year,runtime_minutes,genres,average_rating,num_votes
0,s2,tvseries,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",[South Africa],"September 24, 2021",2021,TV-MA,2 Seasons,...,tt9839146,Blood & Water,Blood & Water,True,2020.0,,53.0,"[drama, mystery]",6.7,4279
1,s3,tvseries,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",[],"September 24, 2021",2021,TV-MA,1 Season,...,tt13278100,Ganglands,Braqueurs,True,2021.0,,44.0,"[action, crime, drama]",7.2,4520
2,s4,tvseries,Jailbirds New Orleans,,,[],"September 24, 2021",2021,TV-MA,1 Season,...,tt15320436,Jailbirds New Orleans,Jailbirds New Orleans,True,2021.0,,,"[documentary, reality-tv]",6.6,301
3,s5,tvseries,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",[India],"September 24, 2021",2021,TV-MA,2 Seasons,...,tt9432978,Kota Factory,Kota Factory,True,2019.0,2021.0,40.0,"[comedy, drama]",9.0,80331
4,s9,tvseries,The Great British Baking Show,Andy Devonshire,"Mel Giedroyc, Sue Perkins, Mary Berry, Paul Ho...",[United Kingdom],"September 24, 2021",2021,TV-14,9 Seasons,...,tt1877368,The Great British Baking Show,The Great British Bake Off,True,2010.0,,60.0,"[game-show, reality-tv]",8.6,12830
5,s12,tvseries,Bangkok Breaking,Kongkiat Komesiri,"Sukollawat Kanarot, Sushar Manaying, Pavarit M...",[],"September 23, 2021",2021,TV-MA,1 Season,...,tt14202282,Bangkok Breaking,Bangkok Breaking,True,2021.0,2021.0,60.0,"[action, crime, drama]",5.9,364
6,s15,tvseries,Crime Stories: India Detectives,,,[],"September 22, 2021",2021,TV-MA,1 Season,...,tt14178956,Crime Stories: India Detectives,Crime Stories: India Detectives,True,2021.0,,47.0,"[crime, documentary, mystery]",7.0,1173
7,s16,tvseries,Dear White People,,"Logan Browning, Brandon P. Bell, DeRon Horton,...",[United States],"September 22, 2021",2021,TV-MA,4 Seasons,...,tt5707802,Dear White People,Dear White People,True,2017.0,2021.0,30.0,"[comedy, drama]",6.4,26406
8,s20,tvseries,Jaguar,,"Blanca Suárez, Iván Marcos, Óscar Casas, Adriá...",[],"September 22, 2021",2021,TV-MA,1 Season,...,tt11698590,Jaguar,Jaguar,True,2021.0,2021.0,50.0,"[action, drama, history]",5.9,2219
9,s26,tvseries,Love on the Spectrum,,Brooke Satchwell,[Australia],"September 21, 2021",2021,TV-14,2 Seasons,...,tt11904786,Love on the Spectrum,Love on the Spectrum,True,2019.0,2021.0,45.0,"[documentary, reality-tv, romance]",8.6,7543


### Limpando Colunas

In [66]:
df_series.columns

Index(['show_id', 'type', 'title', 'director', 'cast', 'country', 'date_added',
       'release_year', 'rating', 'duration', 'listed_in', 'description',
       'title_slug', 'imdb_id', 'primary_title', 'original_title', 'is_adult',
       'start_year', 'end_year', 'runtime_minutes', 'genres', 'average_rating',
       'num_votes'],
      dtype='object')

In [67]:
df_series.drop(columns=['show_id', 'cast', 'rating', 'description', 'is_adult'], inplace=True)

In [None]:
df_movies.head(4)