**Import Libraries and Dataset**

In [29]:
import numpy as np
import pandas as pd
import plotly.express as px
from textblob import TextBlob

df = pd.read_csv('netflix_titles.csv')

Verify Count of Rows and Columns

In [30]:
df.shape

(8807, 12)

Inspect first 5 Rows of Dataset

In [31]:
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


List Column Headers

In [32]:
df.columns

Index(['show_id', 'type', 'title', 'director', 'cast', 'country', 'date_added',
       'release_year', 'rating', 'duration', 'listed_in', 'description'],
      dtype='object')

Get Count of Individual Ratings

In [33]:
x = df.groupby(['rating']).size().reset_index(name = 'counts')
x

Unnamed: 0,rating,counts
0,66 min,1
1,74 min,1
2,84 min,1
3,G,41
4,NC-17,3
5,NR,80
6,PG,287
7,PG-13,490
8,R,799
9,TV-14,2160


Create a Pie Chart Showing Distribution of Ratings

In [34]:
PieChart = px.pie(x, values = 'counts', names = 'rating', title = 'Distribution of Ratings')
PieChart.show()

Replace Null Values

In [35]:
df['director'] = df['director'].fillna('Not Specified')
df['country'] = df['country'].fillna('Not Specified')
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,Not Specified,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",Not Specified,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,Not Specified,,Not Specified,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,Not Specified,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


Verify Column Does Not Exist before Creating One

In [36]:
director_list = pd.DataFrame()
director_list

In [37]:
director_list = df['director'].str.split(',', expand = True).stack()
director_list

Unnamed: 0,Unnamed: 1,0
0,0,Kirsten Johnson
1,0,Not Specified
2,0,Julien Leclercq
3,0,Not Specified
4,0,Not Specified
...,...,...
8802,0,David Fincher
8803,0,Not Specified
8804,0,Ruben Fleischer
8805,0,Peter Hewitt


In [38]:
director_list = director_list.to_frame()
director_list

Unnamed: 0,Unnamed: 1,0
0,0,Kirsten Johnson
1,0,Not Specified
2,0,Julien Leclercq
3,0,Not Specified
4,0,Not Specified
...,...,...
8802,0,David Fincher
8803,0,Not Specified
8804,0,Ruben Fleischer
8805,0,Peter Hewitt


Rename Column

In [39]:
director_list.columns = ['Director']
director_list

Unnamed: 0,Unnamed: 1,Director
0,0,Kirsten Johnson
1,0,Not Specified
2,0,Julien Leclercq
3,0,Not Specified
4,0,Not Specified
...,...,...
8802,0,David Fincher
8803,0,Not Specified
8804,0,Ruben Fleischer
8805,0,Peter Hewitt


In [40]:
directors = director_list.groupby(['Director']).size().reset_index(name = 'Total Count')
directors

Unnamed: 0,Director,Total Count
0,Aaron Moorhead,2
1,Aaron Woolf,1
2,Abbas Alibhai Burmawalla,1
3,Abdullah Al Noor,1
4,Abhinav Shiv Tiwari,1
...,...,...
5116,Çagan Irmak,1
5117,Ísold Uggadóttir,1
5118,Óskar Thór Axelsson,1
5119,Ömer Faruk Sorak,2


In [41]:
directors = directors[directors.Director != 'Not Specified']
directors

Unnamed: 0,Director,Total Count
0,Aaron Moorhead,2
1,Aaron Woolf,1
2,Abbas Alibhai Burmawalla,1
3,Abdullah Al Noor,1
4,Abhinav Shiv Tiwari,1
...,...,...
5116,Çagan Irmak,1
5117,Ísold Uggadóttir,1
5118,Óskar Thór Axelsson,1
5119,Ömer Faruk Sorak,2


In [42]:
directors = directors.sort_values(by = ['Total Count'], ascending = False)
directors

Unnamed: 0,Director,Total Count
4021,Rajiv Chilaka,22
4068,Raúl Campos,18
261,Jan Suter,18
4652,Suhas Kadav,16
3235,Marcus Raboy,16
...,...,...
2340,J. Davis,1
2341,J. Lee Thompson,1
2342,J. Michael Long,1
609,Smriti Keshari,1


In [43]:
Top5Directors = directors.head()
Top5Directors

Unnamed: 0,Director,Total Count
4021,Rajiv Chilaka,22
4068,Raúl Campos,18
261,Jan Suter,18
4652,Suhas Kadav,16
3235,Marcus Raboy,16


Create Bar Charts

In [44]:
Top5Directors = Top5Directors.sort_values(by = ['Total Count'])
BarChart = px.bar(Top5Directors, x = 'Total Count', y = 'Director', title = 'Top 5 Directors on Netflix')
BarChart.show()

In [48]:
df['cast'] = df['cast'].fillna('Not Specified')
cast_df = pd.DataFrame()
cast_df = df['cast'].str.split(',', expand = True).stack()
cast_df = cast_df.to_frame()
cast_df.columns = ['Actor']
actors = cast_df.groupby(['Actor']).size().reset_index(name = 'Total Count')
actors = actors[actors.Actor != 'Not Specified']
actors = actors.sort_values(by = ['Total Count'], ascending = False)
Top5Actors = actors.head()
Top5Actors = Top5Actors.sort_values(by = ['Total Count'])
BarChart2 = px.bar(Top5Actors, x = 'Total Count', y = 'Actor', title = 'Top 5 Actors on Netflix')
BarChart2.show()

Analyzing the content produced on Netflix by Year

In [54]:
df1 = df[['type', 'release_year']]
df1 = df1.rename(columns = {'release_year': 'Release Year', 'type': 'Type'})
df2 = df1.groupby(['Release Year', 'Type']).size().reset_index(name = 'Total Count')
df2

Unnamed: 0,Release Year,Type,Total Count
0,1925,TV Show,1
1,1942,Movie,2
2,1943,Movie,3
3,1944,Movie,3
4,1945,Movie,3
...,...,...,...
114,2019,TV Show,397
115,2020,Movie,517
116,2020,TV Show,436
117,2021,Movie,277


Create Line Graph

In [57]:
df2 = df2[df2['Release Year']>=2000]
graph = px.line(df2, x = 'Release Year', y = 'Total Count', color = 'Type', title = 'Content Produced on Netflix Annually')
graph.show()

Sentiment Analysis of Netflix Content

In [60]:
df3 = df[['release_year', 'description']]
df3 = df.rename(columns = {'release_year': 'Release Year', 'description': 'Description'})
for index, row in df3.iterrows():
  d = row['Description']
  testimonial = TextBlob(d)
  p = testimonial.sentiment.polarity
  if p == 0:
    sent = 'Neutral'
  elif p > 0:
    sent = 'Positive'
  else:
    sent = 'Negative'
  df3.loc[[index, 2], 'Sentiment'] = sent
df3 = df3.groupby(['Release Year', 'Sentiment']).size().reset_index(name = 'Total Count')
df3 = df3[df3['Release Year'] > 2005]
BarGraph = px.bar(df3, x = 'Release Year', y = 'Total Count', color = 'Sentiment', title = 'Sentiment Analysis of Netflix Content')
BarGraph.show()