### Open the file and take a look 

In [1]:
import pandas as pd

data = pd.read_csv("netflix_ratings.csv")
data.head()

Unnamed: 0,title,rating,ratingLevel,ratingDescription,release year,user rating score,user rating size
0,White Chicks,PG-13,"crude and sexual humor, language and some drug...",80,2004,82.0,80
1,Lucky Number Slevin,R,"strong violence, sexual content and adult lang...",100,2006,,82
2,Grey's Anatomy,TV-14,Parents strongly cautioned. May be unsuitable ...,90,2016,98.0,80
3,Prison Break,TV-14,Parents strongly cautioned. May be unsuitable ...,90,2008,98.0,80
4,How I Met Your Mother,TV-PG,Parental guidance suggested. May not be suitab...,70,2014,94.0,80


### Check the overall number of movies

In [2]:
data.shape

(999, 7)

### Check to see if NaN indeed exists 

In [3]:
# notnull returns true if a cell has an actual value, 
# returns false if the cell is empty (value is missing)
# all returns true if an entire column has no false
data.notnull().all()

title                 True
rating                True
ratingLevel          False
ratingDescription     True
release year          True
user rating score    False
user rating size      True
dtype: bool

### Drop NaNs 

In [4]:
# This drops an entire row if there is a NaN 
data = data.dropna()

In [5]:
data.shape

(574, 7)

In [6]:
data.head()

Unnamed: 0,title,rating,ratingLevel,ratingDescription,release year,user rating score,user rating size
0,White Chicks,PG-13,"crude and sexual humor, language and some drug...",80,2004,82.0,80
2,Grey's Anatomy,TV-14,Parents strongly cautioned. May be unsuitable ...,90,2016,98.0,80
3,Prison Break,TV-14,Parents strongly cautioned. May be unsuitable ...,90,2008,98.0,80
4,How I Met Your Mother,TV-PG,Parental guidance suggested. May not be suitab...,70,2014,94.0,80
5,Supernatural,TV-14,Parents strongly cautioned. May be unsuitable ...,90,2016,95.0,80


### Select all the TV-14 programs 

In [7]:
tv_14 = data[data.rating=="TV-14"]
tv_14.head()

Unnamed: 0,title,rating,ratingLevel,ratingDescription,release year,user rating score,user rating size
2,Grey's Anatomy,TV-14,Parents strongly cautioned. May be unsuitable ...,90,2016,98.0,80
3,Prison Break,TV-14,Parents strongly cautioned. May be unsuitable ...,90,2008,98.0,80
5,Supernatural,TV-14,Parents strongly cautioned. May be unsuitable ...,90,2016,95.0,80
7,The Vampire Diaries,TV-14,Parents strongly cautioned. May be unsuitable ...,90,2017,91.0,80
9,Pretty Little Liars,TV-14,Parents strongly cautioned. May be unsuitable ...,90,2016,96.0,80


### Find the most recent released TV-14 program  

In [8]:
tv_14.columns

Index(['title', 'rating', 'ratingLevel', 'ratingDescription', 'release year',
       'user rating score', 'user rating size'],
      dtype='object')

In [9]:
tv_14=tv_14.rename(columns={'release year':'release_year'})

In [10]:
tv_14.head()

Unnamed: 0,title,rating,ratingLevel,ratingDescription,release_year,user rating score,user rating size
2,Grey's Anatomy,TV-14,Parents strongly cautioned. May be unsuitable ...,90,2016,98.0,80
3,Prison Break,TV-14,Parents strongly cautioned. May be unsuitable ...,90,2008,98.0,80
5,Supernatural,TV-14,Parents strongly cautioned. May be unsuitable ...,90,2016,95.0,80
7,The Vampire Diaries,TV-14,Parents strongly cautioned. May be unsuitable ...,90,2017,91.0,80
9,Pretty Little Liars,TV-14,Parents strongly cautioned. May be unsuitable ...,90,2016,96.0,80


In [11]:
tv_14.release_year.max()

2017

In [12]:
tv_14_in_2017 = tv_14[tv_14.release_year == 2017]
tv_14_in_2017.head(10)

Unnamed: 0,title,rating,ratingLevel,ratingDescription,release_year,user rating score,user rating size
7,The Vampire Diaries,TV-14,Parents strongly cautioned. May be unsuitable ...,90,2017,91.0,80
61,The Vampire Diaries,TV-14,Parents strongly cautioned. May be unsuitable ...,90,2017,91.0,80
269,The Vampire Diaries,TV-14,Parents strongly cautioned. May be unsuitable ...,90,2017,91.0,80
319,The Vampire Diaries,TV-14,Parents strongly cautioned. May be unsuitable ...,90,2017,91.0,80
358,The Vampire Diaries,TV-14,Parents strongly cautioned. May be unsuitable ...,90,2017,91.0,80
459,The Vampire Diaries,TV-14,Parents strongly cautioned. May be unsuitable ...,90,2017,91.0,80


### Perform a simple lookup 

In [13]:
"Breaking Bad" in set(data['title'])

True

### Take a closer look at the result

In [14]:
data[data.title == "Breaking Bad"]

Unnamed: 0,title,rating,ratingLevel,ratingDescription,release year,user rating score,user rating size
6,Breaking Bad,TV-MA,For mature audiences. May not be suitable for...,110,2013,97.0,80
58,Breaking Bad,TV-MA,For mature audiences. May not be suitable for...,110,2013,97.0,80
408,Breaking Bad,TV-MA,For mature audiences. May not be suitable for...,110,2013,97.0,80


#### Looks like there are duplicated values, let's remove them 

In [15]:
data = data.drop_duplicates(subset='title')

In [16]:
data.shape

(244, 7)

In [17]:
breaking_bad = data[data.title == "Breaking Bad"]
breaking_bad

Unnamed: 0,title,rating,ratingLevel,ratingDescription,release year,user rating score,user rating size
6,Breaking Bad,TV-MA,For mature audiences. May not be suitable for...,110,2013,97.0,80


#### Looks like the decription in ratingLevel is not showing correctly, let's extract the description for a better look 

In [18]:
description = breaking_bad.ratingLevel.values
description

array(['For mature audiences.  May not be suitable for children 17 and under.'],
      dtype=object)

### Let's check the final amount of movies left in the dataset

In [19]:
data.shape

(244, 7)

### Let's fix the ratingLevel and ratingDescription 

In [20]:
data.rename(columns={'ratingLevel':'ratingDescription', 'ratingDescription':'ratingLevel'}, inplace=True)
data.head()

Unnamed: 0,title,rating,ratingDescription,ratingLevel,release year,user rating score,user rating size
0,White Chicks,PG-13,"crude and sexual humor, language and some drug...",80,2004,82.0,80
2,Grey's Anatomy,TV-14,Parents strongly cautioned. May be unsuitable ...,90,2016,98.0,80
3,Prison Break,TV-14,Parents strongly cautioned. May be unsuitable ...,90,2008,98.0,80
4,How I Met Your Mother,TV-PG,Parental guidance suggested. May not be suitab...,70,2014,94.0,80
5,Supernatural,TV-14,Parents strongly cautioned. May be unsuitable ...,90,2016,95.0,80


In [21]:
tv_14 = data[data.rating=='TV-14']
tv_14.head()

Unnamed: 0,title,rating,ratingDescription,ratingLevel,release year,user rating score,user rating size
2,Grey's Anatomy,TV-14,Parents strongly cautioned. May be unsuitable ...,90,2016,98.0,80
3,Prison Break,TV-14,Parents strongly cautioned. May be unsuitable ...,90,2008,98.0,80
5,Supernatural,TV-14,Parents strongly cautioned. May be unsuitable ...,90,2016,95.0,80
7,The Vampire Diaries,TV-14,Parents strongly cautioned. May be unsuitable ...,90,2017,91.0,80
9,Pretty Little Liars,TV-14,Parents strongly cautioned. May be unsuitable ...,90,2016,96.0,80


In [22]:
tv_14_in_2017 = tv_14[tv_14["release year"] == 2017]
tv_14_in_2017.head(10)

Unnamed: 0,title,rating,ratingDescription,ratingLevel,release year,user rating score,user rating size
7,The Vampire Diaries,TV-14,Parents strongly cautioned. May be unsuitable ...,90,2017,91.0,80
