## Get Metacritic Ratings
Using Metacritic API

In [1]:
%matplotlib inline

import configparser
import os

import requests
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import sparse, stats, spatial
import scipy.sparse.linalg
from sklearn import preprocessing, decomposition
import librosa
import IPython.display as ipd
import json

#added by me:
import requests

### Load Dataset ###

In [2]:
all_movies = pd.read_csv('Saved_Datasets/CleanDataset.csv')

In [3]:
all_movies.head()

Unnamed: 0,id,budget,genres,imdb_id,overview,production_companies,release_date,revenue,tagline,title,actor1_name,actor2_name,actor3_name,actor4_name,actor5_name,director_name
0,12,94000000,Animation|Family,266543,"Nemo, an adventurous young clownfish, is unexp...",Pixar Animation Studios,2003-05-30,940335536,"There are 3.7 trillion fish in the ocean, they...",Finding Nemo,Albert Brooks,Ellen DeGeneres,Alexander Gould,Willem Dafoe,Brad Garrett,Andrew Stanton
1,16,12800000,Drama|Crime|Music,168629,"Selma, a Czech immigrant on the verge of blind...",Fine Line Features,2000-05-17,40031879,You don't need eyes to see.,Dancer in the Dark,Björk,Catherine Deneuve,David Morse,Peter Stormare,Joel Grey,Lars von Trier
2,22,140000000,Adventure|Fantasy|Action,325980,"Jack Sparrow, a freewheeling 17th-century pira...",Walt Disney Pictures,2003-09-07,655011224,Prepare to be blown out of the water.,Pirates of the Caribbean: The Curse of the Bla...,Johnny Depp,Geoffrey Rush,Orlando Bloom,Keira Knightley,Jack Davenport,Gore Verbinski
3,24,30000000,Action|Crime,266697,An assassin is shot at the altar by her ruthle...,Miramax Films,2003-10-10,180949000,Go for the kill.,Kill Bill: Vol. 1,Uma Thurman,Lucy Liu,Vivica A. Fox,Daryl Hannah,David Carradine,Quentin Tarantino
4,25,72000000,Drama|War,418763,Jarhead is a film about a US Marine Anthony Sw...,Universal Pictures,2005-04-11,96889998,Welcome to the suck.,Jarhead,Jamie Foxx,Scott MacDonald,none,Lucas Black,Peter Sarsgaard,Sam Mendes


### Drop Unused Columns

In [4]:
all_movies.drop([  'budget', 'genres', 'id', 'overview','release_date', 'revenue', 'tagline', 'actor1_name', 'actor2_name',
                   'actor3_name', 'actor4_name', 'actor5_name', 'director_name', 'production_companies'] ,axis=1, inplace = True)

In [5]:
all_movies = all_movies.drop(all_movies[all_movies['imdb_id'].isnull()].index)

### Metacritic API

Ratings based on several critics, weigthed average between them, best rating: 100

In [6]:
def find_rating(film_name):
    '''Return rating from Metacritic API from film name. Date optionnal'''
    
    #remove punctuation
    film_name = film_name.replace(":","")
    film_name = film_name.replace("?","")
    film_name = film_name.replace(".","")
    film_name = film_name.replace("&","")
    film_name = film_name.replace("$","-")
    film_name = film_name.replace("/","-")
    
    #film name must be at least 4 characters
    if len(film_name) < 4: film_name = film_name+"----"
    
    website = "https://api-marcalencc-metacritic-v1.p.mashape.com/search/"
    headers={
        "X-Mashape-Key": "xVCvaKLJ5UmshSg19R7nyselFT6rp19AZvkjsnqxyF1gZvoz5x",
        "Accept": "application/json"
    }
    
    try:
        response = requests.get(website+film_name+"/movie",headers=headers).json()
        return response[0]['SearchItems'][0]['Rating']['CriticRating']
    except:
        print("Error: '"+film_name+"'")
        return 'Error'

Example:

In [26]:
find_rating("Memento")

80

### Find Rating for each Movie

In [7]:
test = all_movies[:].copy()
#test = pd.read_csv('Saved_Datasets/metacritic_ratings.csv', encoding='cp1252')
#test = pd.read_csv('Saved_Datasets/rating_with_errors.csv', encoding='cp1252') # old one, unused

In [8]:
#Takes approx. 2-3 hours !!!
test['Metacritic'] = test.apply (lambda row: find_rating(row['title']),axis=1)

Error: 'Harry Potter and the Philosopher's Stone'
Error: 'Mulholland Drive'
Error: 'Mr Bean's Holiday'
Error: 'The Grudge 3'
Error: 'Fantastic 4 Rise of the Silver Surfer'
Error: 'Death Proof'
Error: 'The Tulse Luper Suitcases, Part 3 From Sark to the Finish'
Error: '310 to Yuma'
Error: 'Ken Park'
Error: 'Dude, Where’s My Car'
Error: 'What Just Happened'
Error: 'Cry_Wolf'
Error: 'D-Tox'
Error: 'The Death and Life of Bobby Z'
Error: 'WALL·E'
Error: 'Jeepers Creepers 2'
Error: 'National Lampoon’s Van Wilder'
Error: 'Harry Potter and the Deathly Hallows Part 1'
Error: 'Stargate The Ark of Truth'
Error: 'Kidulthood'
Error: 'The Lazarus Project'
Error: 'Justice League The New Frontier'
Error: 'Assembly'
Error: 'Dhoom 2'
Error: 'Virgin Territory'
Error: 'Romanzo criminale'
Error: 'Jaane Tu Ya Jaane Na'
Error: 'Foolproof'
Error: 'The Boy in the Striped Pyjamas'
Error: 'Dil Chahta Hai'
Error: 'Hood of Horror'
Error: 'Fishtales'
Error: 'Phoonk'
Error: 'The Best Two Years'
Error: 'The Car Keys'


### Apply function to Errors only###

In [86]:
#Takes approx. 15-30 min. !!!
test['Metacritic'] = test.loc[test['Metacritic']=='Error'].apply (lambda row: find_rating(row['title']),axis=1)

Error: 'Harry Potter and the Philosopher's Stone'
Error: 'Mulholland Drive'
Error: 'Mr Bean's Holiday'
Error: 'The Grudge 3'
Error: 'Fantastic 4 Rise of the Silver Surfer'
Error: 'Death Proof'
Error: 'Hallam Foe'
Error: 'Yo tengo un corazón que quiere hundir mi cuerpo en los mares de ilusión'
Error: 'The Tulse Luper Suitcases, Part 1 The Moab Story'
Error: 'The Tulse Luper Suitcases, Part 2 Vaux to the Sea'
Error: 'The Tulse Luper Suitcases, Part 3 From Sark to the Finish'
Error: '310 to Yuma'
Error: 'Ken Park'
Error: 'Dude, Wheres My Car'
Error: 'What Just Happened'
Error: 'Cry_Wolf'
Error: 'D-Tox'
Error: 'The Death and Life of Bobby Z'
Error: 'Itty Bitty Titty Committee'
Error: 'WALL·E'
Error: 'Jeepers Creepers 2'
Error: 'National Lampoons Van Wilder'
Error: 'Harry Potter and the Deathly Hallows Part 1'
Error: 'Stargate The Ark of Truth'
Error: 'Kidulthood'
Error: 'The Lazarus Project'
Error: 'Justice League The New Frontier'
Error: 'Assembly'
Error: 'Dhoom 2'
Error: 'Dr Horrible's

### Compute error###

In [None]:
test_fail = test.loc[test['Metacritic']=='Error']
print("Nb. errors: "+str(len(test_fail))+" ("+str(len(test_fail)/len(test)*100)[:4]+"%)")

### ! Save Dataset ! ###

In [5]:
test.to_csv('Saved_Datasets/metacritic_ratings.csv', index=False)

### Load Dataset

In [2]:
meta = pd.read_csv('Saved_Datasets/metacritic_ratings.csv')