# Netflix Sentiment Analysis

**Import datasets and libraries needed 

In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px 
from textblob import TextBlob

df = pd.read_csv('netflix_titles.csv')
df.shape

(8807, 12)

**Check the columns of the dataset

In [7]:
df.columns

Index(['show_id', 'type', 'title', 'director', 'cast', 'country', 'date_added',
       'release_year', 'rating', 'duration', 'listed_in', 'description'],
      dtype='object')

In [21]:
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


### Top 5 Directors on Netflix

In [40]:
#Create dateframe of all directors
df['director'] = df['director'].fillna('No Director')
split_directors = df['director'].str.split(',',expand = True).stack()
split_directors = split_directors.to_frame()
split_directors.columns = ['Director']

#Select top 5 directors
directors = split_directors.groupby(['Director']).size().reset_index(name = 'Total Content')
directors = directors[directors.Director !='No Director']
directors = directors.sort_values(by = ['Total Content'],ascending = False)
top_5_directors = directors.head()

#Create Bar Chart
director_bar_chart=px.bar(top_5_directors,x = 'Total Content',y = 'Director',title = 'Top 5 Directors on Netflix')
director_bar_chart.show()

### Top 5 Actors on Netflix

In [41]:
#Create dateframe of all cast
df['cast'] = df['cast'].fillna('No Cast')
split_directors = df['cast'].str.split(',',expand = True).stack()
split_directors = split_directors.to_frame()
split_directors.columns = ['Actor']

#Select top 5 actors
directors = split_directors.groupby(['Actor']).size().reset_index(name = 'Total Content')
directors = directors[directors.Actor !='No Cast']
directors = directors.sort_values(by = ['Total Content'],ascending = False)
top_5_directors = directors.head()

#Create Bar Chart
director_bar_chart=px.bar(top_5_directors,x = 'Total Content',y = 'Actor',title = 'Top 5 Actors on Netflix')
director_bar_chart.show()

### Distribution of Content on Netflix

In [20]:
#Group by ratings in all of the content on Netflix
pieData = df.groupby(['rating']).size().reset_index(name = 'count')

#Create Pie Chart
ratingsPie = px.pie(pieData, values = 'count', names = 'rating', title = 'Count of Content Ratings on Netflix')
ratingsPie.show()

**Most of the content on Netflix is TV-MA, or for mature audiences**

In [54]:
#Create a new dataframe with type and release year
type_release = df[['type', 'release_year']]
type_release = type_release.rename(columns={"release_year": "Release Year"})

#Create count of each type (either TV show or Movie)
type_release_count = type_release.groupby(['Release Year','type']).size().reset_index(name = 'Total Content')
type_release_count = type_release_count[type_release_count['Release Year'] >= 2000]

#Create line chart
line_chart = px.line(type_release_count, x = 'Release Year', y = 'Total Content', color = 'type', title = 'Trend of Types of Content Produced since 2000')
line_chart.show()

### Sentiment Content Analysis

In [73]:
#Create a new dataframw with release year and description of content
year_descr = df[['release_year','description']]
year_descr = year_descr.rename(columns ={'release_year':'Release Year'})

#Iterate through dataframe and assign a sentiment to the description (either Neutral, Positive, or Negative)
for index,row in year_descr.iterrows():
    descr = row['description']
    text = TextBlob(descr)
    p = text.sentiment.polarity
    if p == 0:
        sentiment ='Neutral'
    elif p > 0:
        sentiment = 'Positive'
    else:
        sentiment = 'Negative'
    year_descr.loc[[index,2],'Sentiment'] = sentiment

#Group by Sentiment and Release Year and count different sentiments in each year
year_descr = year_descr.groupby(['Release Year','Sentiment']).size().reset_index(name = 'Total Content')
year_descr = year_descr[year_descr['Release Year'] >= 2000]

#Create bar chart
bar = px.bar(year_descr, x = "Release Year", y = "Total Content", color = "Sentiment", title = "Distribution of Sentiment in Netflix since 2000")
bar.show()

**The majority of the content in netflix in a given year is positive.**