![Alt text](https://miro.medium.com/v2/resize:fit:960/0*puClFxWSP4buuk08.gif "a title")

In [4]:
# packages
import pandas as pd # for data processing
import seaborn as sns # for visualisation 
import matplotlib.pyplot as plt

In [3]:
netflix_data = pd.read_csv("dataset/cleaned_netflix_data.csv")
netflix_data.head()

Unnamed: 0,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,Movie,Dick Johnson Is Dead,Kirsten Johnson,Unknown,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,Movie,The Starling,Theodore Melfi,"Melissa McCarthy, Chris O'Dowd, Kevin Kline, T...",United States,"September 24, 2021",2021,PG-13,104 min,"Comedies, Dramas",A woman adjusting to life after a loss contend...
2,TV Show,On the Verge,Unknown,"Julie Delpy, Elisabeth Shue, Sarah Jones, Alex...","France, United States","September 7, 2021",2021,TV-MA,1 Season,"TV Comedies, TV Dramas","Four women — a chef, a single mom, an heiress ..."
3,Movie,Stowaway,Joe Penna,"Anna Kendrick, Toni Collette, Daniel Dae Kim, ...","Germany, United States","April 22, 2021",2021,TV-MA,116 min,"Dramas, International Movies, Thrillers",A three-person crew on a mission to Mars faces...
4,Movie,Wild Dog,Ahishor Solomon,"Nagarjuna Akkineni, Dia Mirza, Saiyami Kher, A...",Unknown,"April 22, 2021",2020,TV-MA,126 min,"Action & Adventure, International Movies",A brash but brilliant Indian intelligence agen...


In [25]:
netflix_data.isna().sum()

type            0
title           0
director        0
cast            0
country         0
date_added      0
release_year    0
rating          0
duration        0
listed_in       0
description     0
dtype: int64

In [26]:
# Shape of the dataset
netflix_data.shape

(8807, 11)

In [27]:
# check non unique values 
netflix_data.nunique()

type               2
title           8807
director        4529
cast            7693
country          749
date_added      1767
release_year      74
rating            14
duration         220
listed_in        514
description     8775
dtype: int64

In [28]:
# check for duplicate values
netflix_data.duplicated().sum()

0

In [29]:
netflix_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8807 entries, 0 to 8806
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   type          8807 non-null   object
 1   title         8807 non-null   object
 2   director      8807 non-null   object
 3   cast          8807 non-null   object
 4   country       8807 non-null   object
 5   date_added    8807 non-null   object
 6   release_year  8807 non-null   int64 
 7   rating        8807 non-null   object
 8   duration      8807 non-null   object
 9   listed_in     8807 non-null   object
 10  description   8807 non-null   object
dtypes: int64(1), object(10)
memory usage: 757.0+ KB


In [30]:
# understanding variables
variables = pd.DataFrame(columns=['Variable','Num of unique values','values'])

for i,var in enumerate(netflix_data.columns):
    variables.loc[i] = [var,netflix_data[var].nunique(),netflix_data[var].unique().tolist()]

variables

Unnamed: 0,Variable,Num of unique values,values
0,type,2,"[Movie, TV Show]"
1,title,8807,"[Dick Johnson Is Dead, The Starling, On the Ve..."
2,director,4529,"[Kirsten Johnson, Theodore Melfi, Unknown, Joe..."
3,cast,7693,"[Unknown, Melissa McCarthy, Chris O'Dowd, Kevi..."
4,country,749,"[United States, France, United States, Germany..."
5,date_added,1767,"[September 25, 2021, September 24, 2021, Septe..."
6,release_year,74,"[2020, 2021, 2015, 2013, 2018, 2017, 2019, 201..."
7,rating,14,"[PG-13, TV-MA, TV-14, TV-Y, TV-Y7, R, PG, TV-P..."
8,duration,220,"[90 min, 104 min, 1 Season, 116 min, 126 min, ..."
9,listed_in,514,"[Documentaries, Comedies, Dramas, TV Comedies,..."


NETFLIX CONTENT RELEASE
----


In [91]:
# Extracting YEAR (2016) from dataset with data_added as 'February 19, 2016'

# Convert 'date_added' to datetime objects
netflix_data['date_added'] = pd.to_datetime(netflix_data['date_added'], errors='coerce')

# Extract the year from the 'date_added' column
netflix_data['year_added'] = netflix_data['date_added'].dt.year

# Count the number of titles added in each year
yearly_counts = netflix_data['year_added'].value_counts()

# Sort the counts by year
yearly_counts = yearly_counts.sort_index()

# Display the counts per year
print(yearly_counts)

year_added
2008.0       2
2009.0       2
2010.0       1
2011.0      13
2012.0       3
2013.0      10
2014.0      23
2015.0      73
2016.0     418
2017.0    1164
2018.0    1625
2019.0    1999
2020.0    1888
2021.0    1498
Name: count, dtype: int64


In [92]:
# Convert 'date_added' to datetime
netflix_data['date_added'] = pd.to_datetime(netflix_data['date_added'], errors='coerce')

# Extract the year from the 'date_added' column
netflix_data['year_added'] = netflix_data['date_added'].dt.year

# Count the number of titles added in each year and categorize them by 'type'
yearly_counts = netflix_data.groupby(['year_added', 'type']).size().unstack(fill_value=0)

# Display the counts per year, categorized as "TV" and "Movies"
print(yearly_counts)

type        Movie  TV Show
year_added                
2008.0          1        1
2009.0          2        0
2010.0          1        0
2011.0         13        0
2012.0          3        0
2013.0          6        4
2014.0         19        4
2015.0         56       17
2016.0        253      165
2017.0        839      325
2018.0       1237      388
2019.0       1424      575
2020.0       1284      604
2021.0        993      505


In [6]:
netflix_data['title'].unique()

array(['Dick Johnson Is Dead', 'The Starling', 'On the Verge', ...,
       'HOMUNCULUS', 'Life in Color with David Attenborough',
       'Searching For Sheela'], dtype=object)

FILTERING 
---
1. FILTERING BASED ON SPECIFIC CONDITIONS LIKE 'type' = Movie/TV Show AND 'country' , SHOW THE TITTLE OF THE TV SHOWS IN UK
---

In [23]:
# export data frame to show the number of shows released in @ country
country_data = netflix_data['country'].value_counts().reset_index()
country_data.columns = ['country', 'Count']
country_data.to_csv('country_data.csv', index=False)

In [24]:
# netflix_data[(netflix_data['type'] == 'TV Show') & (netflix_data['country'].isin(['United Kingdom']))] 

# look into the records
# uk_data = netflix_data[(netflix_data['type'] == 'TV Show') & (netflix_data['country'] == 'United States')] 
# Export the DataFrame to a CSV file
# uk_data.to_csv('us_data.csv', index=False)

netflix_data[(netflix_data['type'] == 'TV Show') & (netflix_data['country'] == 'United Kingdom')] ['title']



142                          Murder Maps
147     Secrets of Great British Castles
187                       The Irregulars
205          Formula 1: Drive to Survive
232                           Zero Chill
                      ...               
8603                         Wild Arabia
8611                  Booba: Food Puzzle
8621              World's Busiest Cities
8696       The Great British Baking Show
8796           Octonauts: Above & Beyond
Name: title, Length: 213, dtype: object

2. WHICH COUNTRY LOVES RELEASING / CONSUMING HORROR TV SHOWS/ MOVIES
---

In [90]:
# filtering all records that have Horror in the listed_in column
horror_list = netflix_data[netflix_data['listed_in'].str.contains('horror',case=False, na=False)]
# Group the data by 'country' and count the number of entries in each country
country_counts = horror_list['country'].str.split(', ').explode().value_counts()

# Find the country with the highest count
# max_country = country_counts.idxmax()
# max_count = country_counts.max()

# print(f"The country with the highest number of TV shows or movies is {max_country} with {max_count} entries.")

print(country_counts)


country
United States           238
Canada                   44
India                    42
United Kingdom           30
Unknown                  20
Thailand                 16
France                   13
Indonesia                11
Spain                    10
Japan                     9
South Korea               8
Mexico                    8
Germany                   7
Taiwan                    6
Philippines               5
Singapore                 5
Turkey                    5
Ireland                   4
Argentina                 4
Belgium                   4
Malaysia                  4
Australia                 4
Egypt                     4
Italy                     4
Norway                    3
Chile                     3
Brazil                    2
United Arab Emirates      2
South Africa              2
Nigeria                   2
Czech Republic            2
China                     2
Israel                    2
Poland                    2
Slovenia                  1
Serbia      

3. SHOW THE TITTLE OF THE MOVIE IN UK
---

In [26]:
netflix_data[(netflix_data['type'] == 'Movie') & (netflix_data['country'] == 'United Kingdom')] ['title']

166                                   Not a Game
182     Elizabeth and Margaret: Love and Loyalty
314                                         Pelé
321         Fate: The Winx Saga - The Afterparty
350                                      Monsoon
                          ...                   
8598                  Why Are We Getting So Fat?
8606                                    Williams
8652                                You Can Tutu
8688                                  Layer Cake
8748                            Sitting in Limbo
Name: title, Length: 206, dtype: object

4. SHOW ALL THE MOVIES UNDER THE CATEGORY MOVIE THAT ARE COMEDIE AND RELEASED IN UK
----

In [43]:
netflix_data[(netflix_data['listed_in'].str.contains('Comedies', case=False))  & (netflix_data['type'] == 'Movie') & (netflix_data['country'] == 'United Kingdom') ]

# Filtering the string Comedies in the column 'listed_in'
# netflix_data[ (netflix_data['listed_in'].str.contains('Comedies', case=False)) ] #2255 rows

# Filtering the string Comedies in the column 'listed_in' and is a  Movie 
# netflix_data[ (netflix_data['listed_in'].str.contains('Comedies', case=False)) & (netflix_data['type'] == 'Movie')] #1674 rows

# Filtering the string Comedies in the column 'listed_in' and is a  Movie  or coutry is UK
# The first conditions before or will be checked if its TRUE in not the OR condition will run
# netflix_data[ (netflix_data['listed_in'].str.contains('Comedies', case=False)) & (netflix_data['type'] == 'Movie')| (netflix_data['country'] == 'United Kingdom')] #2061 rows

Unnamed: 0,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
321,Movie,Fate: The Winx Saga - The Afterparty,Unknown,"David Spade, London Hughes, Fortune Feimster, ...",United Kingdom,"February 20, 2021",2021,TV-14,36 min,"Comedies, Dramas",Stars of the fiery hit discuss the show's magi...
566,Movie,Shaun the Sheep: The Farmer’s Llamas,Jay Grace,"Justin Fletcher, John Sparkes, Sean Connolly, ...",United Kingdom,"December 22, 2020",2020,TV-Y,28 min,"Children & Family Movies, Comedies",A trio of mischievous llamas from the county f...
900,Movie,Fisherman's Friends,Chris Foggin,"Daniel Mays, James Purefoy, David Hayman, Dave...",United Kingdom,"October 22, 2020",2019,PG-13,112 min,"Comedies, Dramas, Music & Musicals",Sea shanties have long united 10 Cornish fishe...
1096,Movie,Horrid Henry's Gross Day Out,Gary Andrews,"Lizzie Waterworth-Santo, Emma Tate, Sue Elliot...",United Kingdom,"September 17, 2020",2020,TV-Y,61 min,"Children & Family Movies, Comedies",Worlds collide when Henry makes plans to spend...
2145,Movie,Quartet,Dustin Hoffman,"Maggie Smith, Tom Courtenay, Billy Connolly, P...",United Kingdom,"August 8, 2021",2012,PG-13,98 min,"Comedies, Dramas, Independent Movies","To save their posh retirement home, former ope..."
2285,Movie,Breaking the Bank,Vadim Jean,"Kelsey Grammer, Tamsin Greig, John Michael Hig...",United Kingdom,"January 1, 2020",2014,TV-PG,105 min,"Comedies, International Movies",Bank chairman Charles Bunbury – who knows next...
2443,Movie,Mahi NRI,Gaurav Bavdankar,"Harrdy Sandhu, Poppy Jabbal, Rameet Kaur, B.N....",United Kingdom,"December 1, 2019",2017,TV-14,134 min,"Comedies, Dramas, International Movies",A Punjabi man attempts to build a life in Lond...
2594,Movie,Christmas Survival,James Dearden,"Julian Ovenden, Gemma Whelan, Joely Richardson...",United Kingdom,"November 1, 2019",2018,TV-MA,101 min,Comedies,"With Christmas just around the corner, sisters..."
2657,Movie,Free Fire,Ben Wheatley,"Sharlto Copley, Armie Hammer, Brie Larson, Cil...",United Kingdom,"October 21, 2019",2016,R,91 min,"Action & Adventure, Comedies, Independent Movies",A sketchy arms transaction in a deserted wareh...
3136,Movie,PATRICK (2019),Mandie Fletcher,"Beattie Edmondson, Ed Skrein, Tom Bennett, Jen...",United Kingdom,"May 16, 2019",2019,TV-PG,94 min,"Children & Family Movies, Comedies","When a schoolteacher inherits an adorable pug,..."


4. FIND THE NUMBER OF TV SHOWS AND MOVIES TOW CRUISE WAS CASTED IN
---

In [42]:
netflix_data[netflix_data['cast'].str.contains('Tom Cruise')]

Unnamed: 0,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
285,Movie,Rain Man,Barry Levinson,"Dustin Hoffman, Tom Cruise, Valeria Golino, Ge...",United States,"March 1, 2021",1988,R,134 min,"Classic Movies, Dramas","Motivated by money, a selfish workaholic seeki..."
2690,Movie,Magnolia,Paul Thomas Anderson,"John C. Reilly, Philip Baker Hall, Tom Cruise,...",United States,"August 1, 2021",1999,R,189 min,"Dramas, Independent Movies","Through chance, history and divine interventio..."


4. Show all the Movies that were released btwn the year 2010 and 2023 by Filtering
---

In [93]:
netflix_data[(netflix_data['type'] == 'Movie')  & ( (netflix_data['release_year'] >= 2010) & (netflix_data['release_year'] <= 2023))]

Unnamed: 0,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,Minutes,Units,year_added
0,Movie,Dick Johnson Is Dead,Kirsten Johnson,Unknown,United States,2021-09-25,2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm...",90.0,min,2021.0
1,Movie,The Starling,Theodore Melfi,"Melissa McCarthy, Chris O'Dowd, Kevin Kline, T...",United States,2021-09-24,2021,PG-13,104 min,"Comedies, Dramas",A woman adjusting to life after a loss contend...,104.0,min,2021.0
3,Movie,Stowaway,Joe Penna,"Anna Kendrick, Toni Collette, Daniel Dae Kim, ...","Germany, United States",2021-04-22,2021,TV-MA,116 min,"Dramas, International Movies, Thrillers",A three-person crew on a mission to Mars faces...,116.0,min,2021.0
4,Movie,Wild Dog,Ahishor Solomon,"Nagarjuna Akkineni, Dia Mirza, Saiyami Kher, A...",Unknown,2021-04-22,2020,TV-MA,126 min,"Action & Adventure, International Movies",A brash but brilliant Indian intelligence agen...,126.0,min,2021.0
5,Movie,Oloibiri,Curtis Graham,"Olu Jacobs, Richard Mofe-Damijo, William R. Mo...","Canada, Nigeria, United States",2021-04-21,2015,TV-14,86 min,"Dramas, International Movies, Thrillers","After drilling depletes a small village, a cor...",86.0,min,2021.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8799,Movie,One Like It,Marwan Nabil,"Riham Abdel Ghafour, Mohammed Aly Rizk, Hazem ...",Egypt,2021-04-23,2020,TV-G,15 min,"Dramas, Independent Movies, International Movies",This short film follows a day in the life of a...,15.0,min,2021.0
8800,Movie,Perfume Imaginary Museum “Time Warp”,"MIKIKO, Daito Manabe",Perfume,Unknown,2021-04-23,2020,TV-G,54 min,"International Movies, Music & Musicals",This visually stunning special performance was...,54.0,min,2021.0
8802,Movie,This Lady Called Life,Kayode Kasum,"Bisola Aiyeola, Efa Iwara, Molawa Onajobi, Tin...",Nigeria,2021-04-23,2020,TV-14,120 min,"Dramas, International Movies, Romantic Movies","Abandoned by her family, young single mother A...",120.0,min,2021.0
8804,Movie,HOMUNCULUS,Takashi Shimizu,"Go Ayano, Ryo Narita, Yukino Kishii, Anna Ishi...",Japan,2021-04-22,2021,TV-MA,116 min,"Horror Movies, International Movies, Thrillers",Truth and illusion blurs when a homeless amnes...,116.0,min,2021.0


5. WHAT ARE THE DIFFERENT RATING DEFINED BY NETFLIX
----

In [44]:
# Movies in Canada with TV-14 rating
netflix_data[(netflix_data['country'].str.contains('Canada', case=False))  & (netflix_data['type'] == 'Movie') & (netflix_data['rating'] == 'TV-14') ]

Unnamed: 0,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
5,Movie,Oloibiri,Curtis Graham,"Olu Jacobs, Richard Mofe-Damijo, William R. Mo...","Canada, Nigeria, United States","April 21, 2021",2015,TV-14,86 min,"Dramas, International Movies, Thrillers","After drilling depletes a small village, a cor..."
313,Movie,Made You Look: A True Story About Fake Art,Barry Avrich,Unknown,Canada,"February 23, 2021",2020,TV-14,90 min,"Documentaries, International Movies",A woman walks into a New York gallery with a c...
645,Movie,Level 16,Danishka Esterhazy,"Katie Douglas, Celina Martin, Peter Outerbridg...",Canada,"September 1, 2021",2018,TV-14,102 min,"Sci-Fi & Fantasy, Thrillers",In a bleak academy that teaches girls the virt...
802,Movie,We Are All For The Fatherland,Unknown,Unknown,"Lebanon, Canada, France","November 11, 2020",1979,TV-14,74 min,"Documentaries, International Movies","After the 1978 Israeli invasion of Lebanon, ch..."
926,Movie,Whispers,Maroun Baghdadi,"Nadia Tueni, Ziad Rahbani","Lebanon, Canada, France","October 19, 2020",1980,TV-14,93 min,"Documentaries, International Movies","With her home devastated by war, a Lebanese po..."
1465,Movie,The Silence of Others,"Almudena Carracedo, Robert Bahar",Unknown,"Spain, France, Canada, United States","July 1, 2020",2019,TV-14,96 min,"Documentaries, International Movies","Filmed over six years, this documentary captur..."
1968,Movie,There's Something in the Water,"Ellen Page, Ian Daniel",Ellen Page,Canada,"March 27, 2020",2019,TV-14,72 min,"Documentaries, International Movies",This documentary spotlights the struggle of mi...
2696,Movie,"Martin Matte: La vie, la mort...eh la la..!",Jean-François Blais,Martin Matte,Canada,"October 14, 2019",2019,TV-14,62 min,Stand-Up Comedy,Quebecois comedy star Martin Matte serves up e...
5216,Movie,For the Love of Spock,Adam Nimoy,"Leonard Nimoy, William Shatner, George Takei, ...","Canada, United States","December 2, 2016",2016,TV-14,112 min,Documentaries,The son of actor Leonard Nimoy directs this mo...
5265,Movie,I Am the Pretty Thing That Lives in the House,Osgood Perkins,"Ruth Wilson, Bob Balaban, Lucy Boynton, Paula ...","Canada, United States","October 28, 2016",2016,TV-14,89 min,"Horror Movies, International Movies, Thrillers",A nervous nurse who scares easily finds hersel...


6. HOW MANY TV SHOW GOT THE 'R' RATING AFTER 2018  OR HOW MANY MOVIES GOT THE 'R' RATING AFTER 2018
---

In [46]:
netflix_data[(netflix_data['rating']== 'R') & (netflix_data['type']=='TV Show')]

Unnamed: 0,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
3216,TV Show,The Hateful Eight: Extended Version,Quentin Tarantino,"Samuel L. Jackson, Kurt Russell, Jennifer Jaso...",Unknown,"April 25, 2019",2015,R,1 Season,TV Shows,Trapped at a stagecoach stop as a storm rages ...
7327,TV Show,Nobel,Unknown,"Aksel Hennie, Tuva Novotny, Anders Danielsen L...",Norway,"December 10, 2016",2016,R,1 Season,"International TV Shows, TV Dramas, TV Thrillers",A series of incidents in Afghanistan set compl...


In [49]:
netflix_data[(netflix_data['rating'] == 'R') & (netflix_data['type'] =='Movie') & (netflix_data['release_year'] > 2018) ]

Unnamed: 0,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
41,Movie,Synchronic,"Justin Benson, Aaron Moorhead","Anthony Mackie, Jamie Dornan, Katie Aselton, A...",United States,"April 16, 2021",2020,R,102 min,Thrillers,Two paramedics begin to question their realiti...
95,Movie,The Stand-In,Jamie Babbit,"Drew Barrymore, Michael Zegen, T.J. Miller, Ho...",United States,"April 10, 2021",2020,R,102 min,Comedies,"Her career in shambles, a reclusive movie star..."
120,Movie,Concrete Cowboy,Ricky Staub,"Idris Elba, Caleb McLaughlin, Jharrel Jerome, ...","United Kingdom, United States","April 2, 2021",2021,R,112 min,"Dramas, Independent Movies",Sent to live with his estranged father for the...
121,Movie,Cross: Rise of the Villains,"Patrick Durham, Paul G. Volk","Brian Austin Green, Vinnie Jones, Lori Heuring...",United States,"April 2, 2021",2019,R,100 min,Action & Adventure,"Armed with a mighty ancient amulet, a team of ..."
200,Movie,Jiu Jitsu,Dimitri Logothetis,"Alain Moussi, Nicolas Cage, Tony Jaa, Rick Yun...",United States,"March 20, 2021",2020,R,102 min,Action & Adventure,"After the defeat of a celebrated war hero, an ..."
...,...,...,...,...,...,...,...,...,...,...,...
7724,Movie,Saving Zoë,Jeffrey G. Hunt,"Laura Marano, Vanessa Marano, Christopher Tava...",United States,"January 15, 2020",2019,R,95 min,Dramas,A reserved high school freshman acts out when ...
8000,Movie,Kate,Cedric Nicolas-Troyan,"Mary Elizabeth Winstead, Jun Kunimura, Woody H...",United States,"September 10, 2021",2021,R,106 min,Action & Adventure,"Slipped a fatal poison on her final job, a rut..."
8034,Movie,The Wind,Emma Tammi,"Caitlin Gerard, Julia Goldani Telles, Ashley Z...",United States,"June 2, 2021",2019,R,88 min,Horror Movies,Isolated on a wind-ravaged 19th-century homest...
8378,Movie,Army of the Dead,Zack Snyder,"Dave Bautista, Ella Purnell, Omari Hardwick, G...",United States,"May 21, 2021",2021,R,148 min,"Action & Adventure, Horror Movies","After a zombie outbreak in Las Vegas, a group ..."


7. WHAT IS THE MAXIMUM DURATION OF MOVIE AND TV SHOWS IN NETFLIX
---

In [57]:
netflix_data['duration'].unique()

array(['90 min', '104 min', '1 Season', '116 min', '126 min', '86 min',
       '97 min', '2 Seasons', '76 min', '80 min', '81 min', '84 min',
       '87 min', '91 min', '132 min', '3 Seasons', '101 min', '102 min',
       '147 min', '224 min', '141 min', '109 min', '162 min', '142 min',
       '93 min', '60 min', '119 min', '37 min', '96 min', '123 min',
       '127 min', '88 min', '106 min', '117 min', '65 min', '7 Seasons',
       '143 min', '137 min', '75 min', '71 min', '103 min', '128 min',
       '6 Seasons', '130 min', '5 Seasons', '145 min', '77 min', '79 min',
       '78 min', '118 min', '4 Seasons', '108 min', '133 min', '107 min',
       '32 min', '55 min', '92 min', '140 min', '89 min', '94 min',
       '112 min', '100 min', '120 min', '98 min', '113 min', '158 min',
       '164 min', '121 min', '173 min', '182 min', '181 min', '185 min',
       '99 min', '114 min', '149 min', '110 min', '83 min', '111 min',
       '73 min', '21 min', '95 min', '105 min', '115 min', '24 min

In [63]:
# MAX duration of Movies in NETFLIX
netflix_data[['Minutes', 'Units']] = netflix_data['duration'].str.split(' ' , expand=True)
# netflix_data.head(5)

# turn the Minutes column to float
netflix_data['Minutes'] = netflix_data['Minutes'].astype(float)
# netflix_data.dtypes

netflix_data.Minutes.max() 
# netflix_data.Minutes.describe()

312.0

In [68]:
# Max seasons in NETFLIX
netflix_seasons = netflix_data[netflix_data['Units'] == 'Seasons']
# netflix_seasons.Minutes.max() #17

# find the value count of the seasons in NETFLIX
netflix_seasons['Minutes'].value_counts()


Minutes
2.0     425
3.0     199
4.0      95
5.0      65
6.0      33
7.0      23
8.0      17
9.0       9
10.0      7
13.0      3
15.0      2
12.0      2
11.0      2
17.0      1
Name: count, dtype: int64

8. WHICH COUNTRY HAS THE HIGHEST NO. OF TV SHOWS / MOVIES
---

In [71]:
tv_show_data = netflix_data[netflix_data['type'] == 'TV Show']
tv_show_data['country'].value_counts()

country
United States                           760
Unknown                                 391
United Kingdom                          213
Japan                                   169
South Korea                             158
                                       ... 
Austria, Germany                          1
Puerto Rico, United States, Colombia      1
United States, Mexico, Colombia           1
Italy, United Kingdom, France             1
Australia, United Kingdom                 1
Name: count, Length: 197, dtype: int64

In [72]:
tv_show_data = netflix_data[netflix_data['type'] == 'Movie']
tv_show_data['country'].value_counts()

country
United States                                 2058
India                                          893
Unknown                                        440
United Kingdom                                 206
Canada                                         122
                                              ... 
Ireland, Canada                                  1
Norway, Denmark, Netherlands, Sweden             1
United Kingdom, Japan                            1
Canada, Germany, France, United States           1
United States, United Kingdom, New Zealand       1
Name: count, Length: 652, dtype: int64

Analyzing the top 5 Actors on Netflix

In [None]:
# EXAMPLE

df['cast']=df['cast'].fillna('No cast specified')
cast_df = pd.DataFrame()
cast_df = df['cast'].str.split(',',expand=True).stack()
cast_df = cast_df.to_frame()
cast_df.columns = ['Actor']
actors = cast_df.groupby(['Actor']).size().reset_index(name = 'Total Count')
actors = actors[actors.Actor != 'No cast specified']
actors = actors.sort_values(by=['Total Count'], ascending=False)
top5Actors = actors.head()
top5Actors = top5Actors.sort_values(by=['Total Count'])
barChart2 = px.bar(top5Actors, x='Total Count', y='Actor', title='Top 5 Actors on Netflix')
barChart2.show()