# **Analyst Netflix Title - Exam**

In [6]:
import pandas as pd
import numpy as np
import plotly.express as px
from textblob import TextBlob

df = pd.read_csv('netflix_titles.csv')

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8807 entries, 0 to 8806
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   show_id       8807 non-null   object
 1   type          8807 non-null   object
 2   title         8807 non-null   object
 3   director      6173 non-null   object
 4   cast          7982 non-null   object
 5   country       7976 non-null   object
 6   date_added    8797 non-null   object
 7   release_year  8807 non-null   int64 
 8   rating        8803 non-null   object
 9   duration      8804 non-null   object
 10  listed_in     8807 non-null   object
 11  description   8807 non-null   object
dtypes: int64(1), object(11)
memory usage: 825.8+ KB


## **Checking the null values at each columns**

In [9]:
null_data = df.isna().sum()
print(null_data)

show_id            0
type               0
title              0
director        2634
cast             825
country          831
date_added        10
release_year       0
rating             4
duration           3
listed_in          0
description        0
dtype: int64


In [10]:
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


## **Grouping and visualizing distribution of ratings**

In [11]:
x = df.groupby(['rating']).size().reset_index(name="counts")
print(x)

      rating  counts
0     66 min       1
1     74 min       1
2     84 min       1
3          G      41
4      NC-17       3
5         NR      80
6         PG     287
7      PG-13     490
8          R     799
9      TV-14    2160
10      TV-G     220
11     TV-MA    3207
12     TV-PG     863
13      TV-Y     307
14     TV-Y7     334
15  TV-Y7-FV       6
16        UR       3


In [12]:
pieChart = px.pie(x, values='counts', names='rating', title='Distribution of countent ratings')
pieChart.show()

In [13]:
df['director'] = df['director'].fillna('Unknown director')
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,Unknown director,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,Unknown director,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,Unknown director,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


## **Taking name of each directors**

In [None]:
director_list = pd.DataFrame(df['director'].str.split(',', expand=True).stack())
director_list.columns =['Director']
print(director_list)

                Director
0    0   Kirsten Johnson
1    0  Unknown director
2    0   Julien Leclercq
3    0  Unknown director
4    0  Unknown director
...                  ...
8802 0     David Fincher
8803 0  Unknown director
8804 0   Ruben Fleischer
8805 0      Peter Hewitt
8806 0       Mozez Singh

[9612 rows x 1 columns]


In [None]:
directors = director_list.groupby(['Director']).size().reset_index(name='Count')
print(directors)

                       Director  Count
0                Aaron Moorhead      2
1                   Aaron Woolf      1
2      Abbas Alibhai Burmawalla      1
3              Abdullah Al Noor      1
4           Abhinav Shiv Tiwari      1
...                         ...    ...
5116                Çagan Irmak      1
5117           Ísold Uggadóttir      1
5118        Óskar Thór Axelsson      1
5119           Ömer Faruk Sorak      2
5120               Şenol Sönmez      2

[5121 rows x 2 columns]


## **Delete unknown directors**

In [None]:
directors = directors[directors.Director != 'Unknown director']
directors = directors.sort_values(by=['Count'], ascending=False)
print(directors)

             Director  Count
4020    Rajiv Chilaka     22
4067      Raúl Campos     18
261         Jan Suter     18
4651      Suhas Kadav     16
3235     Marcus Raboy     16
...               ...    ...
2340         J. Davis      1
2341  J. Lee Thompson      1
2342  J. Michael Long      1
609    Smriti Keshari      1
2560    Joaquín Mazón      1

[5120 rows x 2 columns]


## **Visualizing top 5 directors**

In [None]:
barChart = px.bar(directors.head().sort_values(by=['Count']), x='Count', y='Director', title='Top 5 Directors on Netflix')
barChart.show()

## **Top 5 Actors on Netflix**

In [None]:
df['cast'] = df['cast'].fillna('No cast specified')
cast = pd.DataFrame(df['cast'].str.split(',', expand=True).stack())
cast.columns = ['Actor']
actors = cast.groupby(['Actor']).size().reset_index(name='Count')
actors = actors[actors.Actor != 'No cast specified']
actors = actors.sort_values(by='Count', ascending=False)
actorsChart = px.bar(actors.head().sort_values(by=['Count']), x='Count', y='Actor', title='Top 5 Actor on Netflix')
actorsChart.show()

In [None]:
df.info()

## **Trends of Type Contents on Netflix**

In [None]:
df1 =df[['type', 'release_year']]
df2 = df1.groupby(['type', 'release_year']).size().reset_index(name='Count')
df2 = df2.rename(columns = {'type':'Type', 'release_year': 'Release Year'})
line = px.line(df2, x='Release Year', y='Count', color='Type', title='Trends of Content Netflix')
line.show()

## **Sentiment Analysis of Netflix Content**

In [16]:
df3 = df[['release_year', 'description']]
df3 = df3.rename(columns = {
    'release_year' : 'Release Year',
    'description' : 'Description'
})
for index, row in df3.iterrows():
  d = row['Description']
  testimonial = TextBlob(d)
  p = testimonial.sentiment.polarity
  if p == 0:
    sent = 'Neutral'
  elif p > 0:
    sent = 'Positive'
  else:
    sent = 'Negative'
  df3.loc[[index, 2], 'Sentiment'] = sent

df3 = df3.groupby(['Release Year', 'Sentiment']).size().reset_index(name='Count')
df3Graph = px.bar(df3, x='Release Year', y='Count', color='Sentiment', title='Sentiment Analysis of Content on Netflix')
df3Graph.show()