In [None]:
import re
import requests
import json
import pandas as pd
import urllib

#### 1. Retrieve the data, and examine it.

In [88]:
total_results = []
page_num = 1
while True:
    url = f'http://linserv1.cims.nyu.edu:10000/films?_page={page_num}' 
    res = requests.get(url)
    data = [(p['director'], int(p['rt_score'])) for p in res.json()]
    # If the page has no data, then break
    if len(data) == 0:
        break
    # Else, the page has data, retrieve the data, and continue to the next page
    total_results = total_results + data
    page_num = page_num +1
print(total_results)

[('Hayao Miyazaki', 95), ('Isao Takahata', 97), ('Hayao Miyazaki', 93), ('Hayao Miyazaki', 96), ('Isao Takahata', 100), ('Hayao Miyazaki', 94), ('Isao Takahata', 78), ('Yoshifumi Kondō', 91), ('Hayao Miyazaki', 92), ('Isao Takahata', 75), ('Hayao Miyazaki', 97), ('Hiroyuki Morita', 89), ('Hayao Miyazaki', 87), ('Gorō Miyazaki', 41), ('Hayao Miyazaki', 92), ('Hiromasa Yonebayashi', 95), ('Gorō Miyazaki', 83), ('Hayao Miyazaki', 89), ('Isao Takahata', 100), ('Hiromasa Yonebayashi', 92), ('Michaël Dudok de Wit', 93)]


#### 2. Load the data into a DataFrame

In [89]:
# Load the total results into a data frame
df = pd.DataFrame(total_results, columns =['director', 'rt_score'])
display(df)

Unnamed: 0,director,rt_score
0,Hayao Miyazaki,95
1,Isao Takahata,97
2,Hayao Miyazaki,93
3,Hayao Miyazaki,96
4,Isao Takahata,100
5,Hayao Miyazaki,94
6,Isao Takahata,78
7,Yoshifumi Kondō,91
8,Hayao Miyazaki,92
9,Isao Takahata,75


In [90]:
# Find the average rt score of each distinct director
avg_score = df.groupby(['director']).mean()
avg_score = avg_score.rename(columns={'rt_score': 'avg_rt_score'})
display(avg_score)

Unnamed: 0_level_0,avg_rt_score
director,Unnamed: 1_level_1
Gorō Miyazaki,62.0
Hayao Miyazaki,92.777778
Hiromasa Yonebayashi,93.5
Hiroyuki Morita,89.0
Isao Takahata,90.0
Michaël Dudok de Wit,93.0
Yoshifumi Kondō,91.0


In [86]:
# Find the unique counts of each distinct director
uni_counts = pd.DataFrame({'count' : df.groupby("director")["rt_score"].count()})
display(uni_counts)

Unnamed: 0_level_0,count
director,Unnamed: 1_level_1
Gorō Miyazaki,2
Hayao Miyazaki,9
Hiromasa Yonebayashi,2
Hiroyuki Morita,1
Isao Takahata,5
Michaël Dudok de Wit,1
Yoshifumi Kondō,1


In [85]:
# Merge the avg_score and uni_counts based on the same director
merged_report = pd.merge(left=avg_score, right=uni_counts, how='inner', left_on='director', right_on='director')
display(merged_report)

Unnamed: 0_level_0,avg_rt_score,count
director,Unnamed: 1_level_1,Unnamed: 2_level_1
Gorō Miyazaki,62.0,2
Hayao Miyazaki,92.777778,9
Hiromasa Yonebayashi,93.5,2
Hiroyuki Morita,89.0,1
Isao Takahata,90.0,5
Michaël Dudok de Wit,93.0,1
Yoshifumi Kondō,91.0,1


In [84]:
# Order the dataframe based on a descending order of average rt score
final_df = merged_report.sort_values(by=['avg_rt_score'], ascending=False)
display(final_df)

Unnamed: 0_level_0,avg_rt_score,count
director,Unnamed: 1_level_1,Unnamed: 2_level_1
Hiromasa Yonebayashi,93.5,2
Michaël Dudok de Wit,93.0,1
Hayao Miyazaki,92.777778,9
Yoshifumi Kondō,91.0,1
Isao Takahata,90.0,5
Hiroyuki Morita,89.0,1
Gorō Miyazaki,62.0,2
