In [1]:
import pandas as pd
import numpy as np

import plotly.express as px
import cufflinks as cf
import chart_studio.plotly as py
%matplotlib inline

from plotly.offline import download_plotlyjs, init_notebook_mode,plot,iplot
init_notebook_mode(connected=True)
#to make everything locally
cf.go_offline()

In [2]:
#loading the dataset
data = pd.read_csv("Netflix Dataset.csv")
data.head()

Unnamed: 0,Show_Id,Category,Title,Director,Cast,Country,Release_Date,Rating,Duration,Type,Description
0,s1,TV Show,3%,,"João Miguel, Bianca Comparato, Michel Gomes, R...",Brazil,"August 14, 2020",TV-MA,4 Seasons,"International TV Shows, TV Dramas, TV Sci-Fi &...",In a future where the elite inhabit an island ...
1,s2,Movie,07:19,Jorge Michel Grau,"Demián Bichir, Héctor Bonilla, Oscar Serrano, ...",Mexico,"December 23, 2016",TV-MA,93 min,"Dramas, International Movies",After a devastating earthquake hits Mexico Cit...
2,s3,Movie,23:59,Gilbert Chan,"Tedd Chan, Stella Chung, Henley Hii, Lawrence ...",Singapore,"December 20, 2018",R,78 min,"Horror Movies, International Movies","When an army recruit is found dead, his fellow..."
3,s4,Movie,9,Shane Acker,"Elijah Wood, John C. Reilly, Jennifer Connelly...",United States,"November 16, 2017",PG-13,80 min,"Action & Adventure, Independent Movies, Sci-Fi...","In a postapocalyptic world, rag-doll robots hi..."
4,s5,Movie,21,Robert Luketic,"Jim Sturgess, Kevin Spacey, Kate Bosworth, Aar...",United States,"January 1, 2020",PG-13,123 min,Dramas,A brilliant group of students become card-coun...


In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7789 entries, 0 to 7788
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Show_Id       7789 non-null   object
 1   Category      7789 non-null   object
 2   Title         7789 non-null   object
 3   Director      5401 non-null   object
 4   Cast          7071 non-null   object
 5   Country       7282 non-null   object
 6   Release_Date  7779 non-null   object
 7   Rating        7782 non-null   object
 8   Duration      7789 non-null   object
 9   Type          7789 non-null   object
 10  Description   7789 non-null   object
dtypes: object(11)
memory usage: 669.5+ KB


In [4]:
data.isnull().sum()

Show_Id            0
Category           0
Title              0
Director        2388
Cast             718
Country          507
Release_Date      10
Rating             7
Duration           0
Type               0
Description        0
dtype: int64

# Missing value Treatment
- there could be some relation between Director and Country, Director and Category, 
- H0: There is no relation between Director and Country
- H1: There is some relation between Director and Country

- H0: There is no relation between Director and Category
- H1: There is some relation between Director and Category

In [5]:
data.Director.value_counts()

Raúl Campos, Jan Suter                                                                                                                                                                 18
Marcus Raboy                                                                                                                                                                           16
Jay Karas                                                                                                                                                                              14
Cathy Garcia-Molina                                                                                                                                                                    13
Youssef Chahine                                                                                                                                                                        12
                                                                      

In [6]:
data.Country.value_counts()

United States                                                                          2556
India                                                                                   923
United Kingdom                                                                          397
Japan                                                                                   226
South Korea                                                                             183
                                                                                       ... 
Ireland, France, Iceland, United States, Mexico, Belgium, United Kingdom, Hong Kong       1
United States, Malta, France, United Kingdom                                              1
Canada, United Kingdom, Netherlands                                                       1
Netherlands, Germany, Italy, Canada                                                       1
United States, Greece, United Kingdom                                           

# Task.1. Is there any Duplicate Record in this dataset ? If yes, then remove the duplicate records.

In [7]:
data[data.duplicated()==True]

Unnamed: 0,Show_Id,Category,Title,Director,Cast,Country,Release_Date,Rating,Duration,Type,Description
6300,s684,Movie,Backfire,Dave Patten,"Black Deniro, Byron ""Squally"" Vinson, Dominic ...",United States,"April 5, 2019",TV-MA,97 min,"Dramas, Independent Movies, Thrillers",When two would-be robbers accidentally kill a ...
6622,s6621,Movie,The Lost Okoroshi,Abba T. Makama,"Seun Ajayi, Judith Audu, Tope Tedela, Ifu Enna...",Nigeria,"September 4, 2020",TV-MA,94 min,"Comedies, Dramas, Independent Movies",A disillusioned security guard transforms into...


In [8]:
data.drop(data[data.duplicated()==True].index , inplace =True)

# Task.2. Is there any Null Value present in any column ? Show with Heat-map.

In [9]:
data.isnull().sum()

Show_Id            0
Category           0
Title              0
Director        2388
Cast             718
Country          507
Release_Date      10
Rating             7
Duration           0
Type               0
Description        0
dtype: int64

In [10]:
px.imshow(data.isnull(),title = "Missing Values" )

# Q.1. For 'House of Cards', what is the Show Id and Who is the Director of this show ?

In [11]:
data.loc[data.Title=='House of Cards']

Unnamed: 0,Show_Id,Category,Title,Director,Cast,Country,Release_Date,Rating,Duration,Type,Description
2832,s2833,TV Show,House of Cards,"Robin Wright, David Fincher, Gerald McRaney, J...","Kevin Spacey, Robin Wright, Kate Mara, Corey S...",United States,"November 2, 2018",TV-MA,6 Seasons,"TV Dramas, TV Thrillers",A ruthless politician will stop at nothing to ...


In [12]:
data.loc[data.Title=='House of Cards',['Title', 'Show_Id','Director']]

Unnamed: 0,Title,Show_Id,Director
2832,House of Cards,s2833,"Robin Wright, David Fincher, Gerald McRaney, J..."


# Q.2. In which year highest number of the TV Shows & Movies were released ? Show with Bar Graph.

In [13]:
data.head(1)

Unnamed: 0,Show_Id,Category,Title,Director,Cast,Country,Release_Date,Rating,Duration,Type,Description
0,s1,TV Show,3%,,"João Miguel, Bianca Comparato, Michel Gomes, R...",Brazil,"August 14, 2020",TV-MA,4 Seasons,"International TV Shows, TV Dramas, TV Sci-Fi &...",In a future where the elite inhabit an island ...


In [14]:
data['Release_year'] = pd.to_datetime(data['Release_Date']).dt.year

In [15]:
data['Release_year'].value_counts().sort_values(ascending = False).values

array([2153, 2009, 1685, 1225,  443,  117,   88,   25,   13,   11,    3,
          2,    2,    1], dtype=int64)

In [16]:
fig = px.bar(data_frame=data, x = data['Release_year'].value_counts().index, y = data['Release_year'].value_counts().values, color =data['Release_year'].value_counts().index ,title = "Number of shows Yearwise")
fig

Answer:
    Year ===== 2019 
    total TV shows+Movies ======== 2153

# Q.3. How many Movies & TV Shows are in the dataset ? Show with Bar Graph.

In [17]:
data.head(1)

Unnamed: 0,Show_Id,Category,Title,Director,Cast,Country,Release_Date,Rating,Duration,Type,Description,Release_year
0,s1,TV Show,3%,,"João Miguel, Bianca Comparato, Michel Gomes, R...",Brazil,"August 14, 2020",TV-MA,4 Seasons,"International TV Shows, TV Dramas, TV Sci-Fi &...",In a future where the elite inhabit an island ...,2020.0


In [18]:
data.groupby(['Category'])['Show_Id'].count().values

array([5377, 2410], dtype=int64)

In [19]:
px.bar(data_frame=data , x =data.groupby(['Category'])['Show_Id'].count().index, 
       y =  data.groupby(['Category'])['Show_Id'].count().values, 
       color = data.groupby(['Category'])['Show_Id'].count().index,
       title = "Movies and TVShows In the Dataset",
       text = data.groupby(['Category'])['Show_Id'].count().values, 
      )

# Q.4. Show all the Movies that were released in year 2000.

In [20]:
data.head(1)

Unnamed: 0,Show_Id,Category,Title,Director,Cast,Country,Release_Date,Rating,Duration,Type,Description,Release_year
0,s1,TV Show,3%,,"João Miguel, Bianca Comparato, Michel Gomes, R...",Brazil,"August 14, 2020",TV-MA,4 Seasons,"International TV Shows, TV Dramas, TV Sci-Fi &...",In a future where the elite inhabit an island ...,2020.0


In [21]:
data.loc[(data.Release_year==2020) & (data.Category=='Movie'),['Category','Title','Release_year']]

Unnamed: 0,Category,Title,Release_year
4,Movie,21,2020.0
6,Movie,122,2020.0
14,Movie,3022,2020.0
27,Movie,#Alive,2020.0
28,Movie,#AnneFrank - Parallel Stories,2020.0
...,...,...,...
7762,Movie,Zaki Chan,2020.0
7783,Movie,Zoom,2020.0
7784,Movie,Zozo,2020.0
7786,Movie,Zulu Man in Japan,2020.0


# Q.5. Show only the Titles of all TV Shows that were released in India only.

In [22]:
data.head(1)

Unnamed: 0,Show_Id,Category,Title,Director,Cast,Country,Release_Date,Rating,Duration,Type,Description,Release_year
0,s1,TV Show,3%,,"João Miguel, Bianca Comparato, Michel Gomes, R...",Brazil,"August 14, 2020",TV-MA,4 Seasons,"International TV Shows, TV Dramas, TV Sci-Fi &...",In a future where the elite inhabit an island ...,2020.0


In [23]:
data.loc[(data['Country']=='India') & (data['Category']=='TV Show'),['Title']]

Unnamed: 0,Title
86,21 Sarfarosh: Saragarhi 1897
132,7 (Seven)
340,Agent Raghav
364,Akbar Birbal
533,Anjaan: Rural Myths
...,...
6249,The Creative Indians
6400,The Golden Years with Javed Akhtar
6469,The House That Made Me
7294,Typewriter


# Show Top 10 Directors, who gave the highest number of TV Shows & Movies to Netflix ?

In [24]:
data.Director.value_counts().sort_values(ascending = False).index[:10]

Index(['Raúl Campos, Jan Suter', 'Marcus Raboy', 'Jay Karas',
       'Cathy Garcia-Molina', 'Martin Scorsese', 'Jay Chapman',
       'Youssef Chahine', 'Steven Spielberg', 'David Dhawan', 'Hakan Algül'],
      dtype='object')

In [25]:
px.bar(data, x = data.Director.value_counts().sort_values(ascending = False).index[:10], 
        y= data.Director.value_counts().sort_values(ascending = False).values[:10], 
        color =data.Director.value_counts().sort_values(ascending = False).values[:10], 
        title = 'Top 10 Directors with Highest Number of TV Shows & Movies' )

# Q.7. Show all the Records, where "Category is Movie and Type is Comedies" or "Country is United Kingdom".

In [26]:
data.head(1)

Unnamed: 0,Show_Id,Category,Title,Director,Cast,Country,Release_Date,Rating,Duration,Type,Description,Release_year
0,s1,TV Show,3%,,"João Miguel, Bianca Comparato, Michel Gomes, R...",Brazil,"August 14, 2020",TV-MA,4 Seasons,"International TV Shows, TV Dramas, TV Sci-Fi &...",In a future where the elite inhabit an island ...,2020.0


In [27]:
data[(data.Type=='Comedies')&(data.Category=='Movie') | (data.Country=='United Kingdom')]

Unnamed: 0,Show_Id,Category,Title,Director,Cast,Country,Release_Date,Rating,Duration,Type,Description,Release_year
19,s20,Movie,'89,,"Lee Dixon, Ian Wright, Paul Merson",United Kingdom,"May 16, 2018",TV-PG,87 min,Sports Movies,"Mixing old footage with interviews, this is th...",2018.0
33,s34,Movie,#realityhigh,Fernando Lebrija,"Nesta Cooper, Kate Walsh, John Michael Higgins...",United States,"September 8, 2017",TV-14,99 min,Comedies,When nerdy high schooler Dani finally attracts...,2017.0
58,s59,TV Show,100% Hotter,,"Daniel Palmer, Melissa Sophia, Karen Williams,...",United Kingdom,"November 1, 2019",TV-14,1 Season,"British TV Shows, International TV Shows, Real...","A stylist, a hair designer and a makeup artist...",2019.0
72,s73,Movie,17 Again,Burr Steers,"Zac Efron, Leslie Mann, Matthew Perry, Thomas ...",United States,"January 1, 2021",PG-13,102 min,Comedies,"Nearing a midlife crisis, thirty-something Mik...",2021.0
82,s83,Movie,2036 Origin Unknown,Hasraf Dulull,"Katee Sackhoff, Ray Fearon, Julie Cox, Steven ...",United Kingdom,"December 20, 2018",TV-14,95 min,Sci-Fi & Fantasy,Working with an artificial intelligence to inv...,2018.0
...,...,...,...,...,...,...,...,...,...,...,...,...
7670,s7669,TV Show,World War II in Colour,,Robert Powell,United Kingdom,"August 1, 2017",TV-MA,1 Season,"British TV Shows, Docuseries, International TV...",Footage of the most dramatic moments from Worl...,2017.0
7671,s7670,TV Show,World's Busiest Cities,,"Anita Rani, Ade Adepitan, Dan Snow",United Kingdom,"February 1, 2019",TV-PG,1 Season,"British TV Shows, Docuseries","From Moscow to Mexico City, three BBC journali...",2019.0
7688,s7687,Movie,XV: Beyond the Tryline,Pierre Deschamps,,United Kingdom,"March 18, 2020",TV-14,91 min,"Documentaries, Sports Movies","Set against the 2015 Rugby World Cup, this doc...",2020.0
7725,s7724,Movie,You Can Tutu,James Brown,"Lily O'Regan, Jeannettsy Enriquez Borges, Joel...",United Kingdom,"December 31, 2017",TV-G,87 min,Children & Family Movies,A gifted young ballet dancer struggles to find...,2017.0


# Q.8. In how many movies/shows, Tom Cruise was cast ?

In [28]:
data.head(1)

Unnamed: 0,Show_Id,Category,Title,Director,Cast,Country,Release_Date,Rating,Duration,Type,Description,Release_year
0,s1,TV Show,3%,,"João Miguel, Bianca Comparato, Michel Gomes, R...",Brazil,"August 14, 2020",TV-MA,4 Seasons,"International TV Shows, TV Dramas, TV Sci-Fi &...",In a future where the elite inhabit an island ...,2020.0


In [29]:
data[data.Cast.str.contains('Tom Cruise').fillna(False)]

Unnamed: 0,Show_Id,Category,Title,Director,Cast,Country,Release_Date,Rating,Duration,Type,Description,Release_year
3860,s3861,Movie,Magnolia,Paul Thomas Anderson,"Jeremy Blackman, Tom Cruise, Melinda Dillon, A...",United States,"January 1, 2020",R,189 min,"Dramas, Independent Movies","Through chance, human action, past history and...",2020.0
5071,s5071,Movie,Rain Man,Barry Levinson,"Dustin Hoffman, Tom Cruise, Valeria Golino, Ge...",United States,"July 1, 2019",R,134 min,"Classic Movies, Dramas",A fast-talking yuppie is forced to slow down w...,2019.0


# Q.9. What are the different Ratings defined by Netflix ?

In [30]:
print("Total number of different ratings are " ,data['Rating'].nunique() ,"\nThey are ",data['Rating'].unique())

Total number of different ratings are  14 
They are  ['TV-MA' 'R' 'PG-13' 'TV-14' 'TV-PG' 'NR' 'TV-G' 'TV-Y' nan 'TV-Y7' 'PG'
 'G' 'NC-17' 'TV-Y7-FV' 'UR']


# Q.9.1. How many Movies got the 'TV-14' rating, in Canada ?

In [31]:
data.loc[(data.Rating=='TV-14') & (data.Country=='Canada') &  (data.Category=='Movie'), 'Category'].count()

11

# Q.9.2. How many TV Show got the 'R' rating, after year 2018 ?

In [32]:
data.head(1)

Unnamed: 0,Show_Id,Category,Title,Director,Cast,Country,Release_Date,Rating,Duration,Type,Description,Release_year
0,s1,TV Show,3%,,"João Miguel, Bianca Comparato, Michel Gomes, R...",Brazil,"August 14, 2020",TV-MA,4 Seasons,"International TV Shows, TV Dramas, TV Sci-Fi &...",In a future where the elite inhabit an island ...,2020.0


In [33]:
data.loc[(data.Rating=='R') & (data.Category=='TV Show') & (data.Release_year > 2018),'Show_Id'].count()

1

# Q.10. What is the maximum duration of a Movie/Show on Netflix ?

In [34]:
#Creating a series  and stories all duration which is availale in minutes 
minutes = pd.Series(data.loc[data.Duration.str.contains('min'), 'Duration'])
#Creating a series  and stories all duration which is availale in seasons 
seasons = pd.Series(data.loc[data.Duration.str.contains('Seasons'), 'Duration'])

In [35]:
def clean(series):
    cleanlist = []
    for i in series.unique():
        cleanlist.append(int(list(i.split(" "))[0]))
    cleanlist.sort(reverse = True)
    return cleanlist

In [36]:
maximum_minutes = clean(minutes)[0]
maximum_seasons = clean(seasons)[0]
print("Movie/Show with maximum minutes", maximum_minutes, "\n Movie/Show with maximum Seasons", maximum_seasons)

Movie/Show with maximum minutes 312 
 Movie/Show with maximum Seasons 16


# Q.11. Which individual country has the Highest No. of TV Shows ?

In [37]:
fig=px.bar(data,
       x = data.Country.value_counts().sort_values(ascending = False).index[:20],
       y = data.Country.value_counts().sort_values(ascending = False).values[:20],
    color = data.Country.value_counts().sort_values(ascending = False).index[:20],
        title  = "Country Wise Total Shows",
          text = data.Country.value_counts().sort_values(ascending = False).values[:20])
fig.update_traces(textposition = 'outside')

# Q.12. How can we sort the dataset by Year ?

In [38]:
data.sort_values(by = 'Release_year')

Unnamed: 0,Show_Id,Category,Title,Director,Cast,Country,Release_Date,Rating,Duration,Type,Description,Release_year
7115,s7114,Movie,To and From New York,Sorin Dan Mihalcescu,"Barbara King, Shaana Diya, John Krisiukenas, Y...",United States,"January 1, 2008",TV-MA,81 min,"Dramas, Independent Movies, Thrillers","While covering a story in New York City, a Sea...",2008.0
1765,s1766,TV Show,Dinner for Five,,,United States,"February 4, 2008",TV-MA,1 Season,Stand-Up Comedy & Talk Shows,"In each episode, four celebrities join host Jo...",2008.0
3248,s3249,Movie,Just Another Love Story,Ole Bornedal,"Anders W. Berthelsen, Rebecka Hemse, Nikolaj L...",Denmark,"May 5, 2009",TV-MA,104 min,"Dramas, International Movies",When he causes a car accident that leaves a yo...,2009.0
5766,s5766,Movie,Splatter,Joe Dante,"Corey Feldman, Tony Todd, Tara Leigh, Erin Way...",United States,"November 18, 2009",TV-MA,29 min,Horror Movies,"After committing suicide, a washed-up rocker r...",2009.0
3840,s3841,Movie,Mad Ron's Prevues from Hell,Jim Monaco,"Nick Pawlow, Jordu Schell, Jay Kushwara, Micha...",United States,"November 1, 2010",NR,84 min,"Cult Movies, Horror Movies","This collection cherry-picks trailers, forgott...",2010.0
...,...,...,...,...,...,...,...,...,...,...,...,...
3374,s3375,TV Show,Kikoriki,,Igor Dmitriev,,,TV-Y,2 Seasons,Kids' TV,A wacky rabbit and his gang of animal pals hav...,
3492,s3493,TV Show,La Familia P. Luche,,"Eugenio Derbez, Consuelo Duval, Luis Manuel Áv...",United States,,TV-14,3 Seasons,"International TV Shows, Spanish-Language TV Sh...","This irreverent sitcom featues Ludovico, Feder...",
3946,s3947,TV Show,Maron,,"Marc Maron, Judd Hirsch, Josh Brener, Nora Zeh...",United States,,TV-MA,4 Seasons,TV Comedies,"Marc Maron stars as Marc Maron, who interviews...",
5138,s5138,TV Show,Red vs. Blue,,"Burnie Burns, Jason Saldaña, Gustavo Sorola, G...",United States,,NR,13 Seasons,"TV Action & Adventure, TV Comedies, TV Sci-Fi ...","This parody of first-person shooter games, mil...",


# Q.13. Find all the instances where : Category is 'Movie' and Type is 'Dramas' or Category is 'TV Show' & Type is 'Kids' TV'.

In [39]:

data[((data.Category=='Movie') & (data.Type=='Dramas') )| ((data.Category=='TV Show') & (data.Type.str.contains('Kids')) )]

Unnamed: 0,Show_Id,Category,Title,Director,Cast,Country,Release_Date,Rating,Duration,Type,Description,Release_year
4,s5,Movie,21,Robert Luketic,"Jim Sturgess, Kevin Spacey, Kate Bosworth, Aar...",United States,"January 1, 2020",PG-13,123 min,Dramas,A brilliant group of students become card-coun...,2020.0
7,s8,Movie,187,Kevin Reynolds,"Samuel L. Jackson, John Heard, Kelly Rowan, Cl...",United States,"November 1, 2019",R,119 min,Dramas,After one of his high school students attacks ...,2019.0
108,s109,TV Show,3Below: Tales of Arcadia,,"Tatiana Maslany, Diego Luna, Nick Offerman, Ni...",United States,"July 12, 2019",TV-Y7,2 Seasons,"Kids' TV, TV Action & Adventure, TV Sci-Fi & F...","After crash-landing on Earth, two royal teen a...",2019.0
111,s112,TV Show,44 Cats,,"Sarah Natochenny, Suzy Myers, Simona Berman, E...",Italy,"October 1, 2020",TV-Y7,2 Seasons,Kids' TV,Paw-esome tales abound when singing furry frie...,2020.0
170,s171,Movie,A Family Man,Mark Williams,"Gerard Butler, Gretchen Mol, Alison Brie, Will...","Canada, United States","December 15, 2019",R,110 min,Dramas,A ruthless corporate headhunter battles his ri...,2019.0
...,...,...,...,...,...,...,...,...,...,...,...,...
7758,s7757,TV Show,Z4,,"Apollo Costa, Gabriel Santana, Matheus Lustosa...",Brazil,"February 22, 2019",TV-PG,2 Seasons,Kids' TV,Fading music biz veteran Zé realizes he has ju...,2019.0
7761,s7760,TV Show,Zak Storm,,"Michael Johnston, Jessica Gee-George, Christin...","United States, France, South Korea, Indonesia","September 13, 2018",TV-Y7,3 Seasons,Kids' TV,Teen surfer Zak Storm is mysteriously transpor...,2018.0
7768,s7767,TV Show,Zig & Sharko,,,France,"December 1, 2017",TV-Y7,1 Season,"Kids' TV, TV Comedies","Zig, an island-bound hyena, will do anything t...",2017.0
7777,s7776,TV Show,Zoids Wild,,"Kensho Ono, Takahiro Sakurai, Mikako Komatsu, ...",Japan,"August 14, 2020",TV-Y7,1 Season,"Anime Series, Kids' TV",A quest for freedom and legendary treasure beg...,2020.0
