In [1]:
'''
CLASS: Getting Data from APIs

What is an API?
- Application Programming Interface
- Structured way to expose specific functionality and data access to users
- Web APIs usually follow the "REST" standard

How to interact with an API:
- Make a "request" to a specific URL (an "endpoint"), and get the data back in a "response"
- Most relevant request method for us is GET (other methods: POST, PUT, DELETE)
- Response is often JSON format
- Web console is sometimes available (allows you to explore an API)
'''

'\nCLASS: Getting Data from APIs\n\nWhat is an API?\n- Application Programming Interface\n- Structured way to expose specific functionality and data access to users\n- Web APIs usually follow the "REST" standard\n\nHow to interact with a REST API:\n- Make a "request" to a specific URL (an "endpoint"), and get the data back in a "response"\n- Most relevant request method for us is GET (other methods: POST, PUT, DELETE)\n- Response is often JSON format\n- Web console is sometimes available (allows you to explore an API)\n'

In [44]:
import pandas as pd
import requests

In [45]:
# read IMDb data into a DataFrame: we want a year column!
movies = pd.read_csv('../data/imdb_1000.csv')
movies.head()

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
0,9.3,The Shawshank Redemption,R,Crime,142,"[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt..."
1,9.2,The Godfather,R,Crime,175,"[u'Marlon Brando', u'Al Pacino', u'James Caan']"
2,9.1,The Godfather: Part II,R,Crime,200,"[u'Al Pacino', u'Robert De Niro', u'Robert Duv..."
3,9.0,The Dark Knight,PG-13,Action,152,"[u'Christian Bale', u'Heath Ledger', u'Aaron E..."
4,8.9,Pulp Fiction,R,Crime,154,"[u'John Travolta', u'Uma Thurman', u'Samuel L...."


In [46]:
print movies.shape
movies.describe()

(979, 6)


Unnamed: 0,star_rating,duration
count,979.0,979.0
mean,7.889785,120.979571
std,0.336069,26.21801
min,7.4,64.0
25%,7.6,102.0
50%,7.8,117.0
75%,8.1,134.0
max,9.3,242.0


In [48]:
###### exercise #######

# Is the title column unique? If not, what are the non unique names?

movies.title.unique()
set(movies.title)
movies.title.value_counts() > 1



The Girl with the Dragon Tattoo                            True
Dracula                                                    True
Les Miserables                                             True
True Grit                                                  True
Kung Fu Hustle                                            False
The Bridge on the River Kwai                              False
Donnie Brasco                                             False
Running Scared                                            False
The Evil Dead                                             False
My Left Foot                                              False
Escape from Alcatraz                                      False
The Matrix                                                False
Brokeback Mountain                                        False
Hachi: A Dog's Tale                                       False
The Visitor                                               False
Elizabeth                               

In [55]:

from collections import Counter
for title, count in Counter(movies.title).items():
    if count > 1:
        print title
    


The Girl with the Dragon Tattoo
Les Miserables
True Grit
Dracula


In [56]:
Counter(movies.title).items()

[('Smoke', 1),
 ('The Butterfly Effect', 1),
 ('Jusan-nin no shikaku', 1),
 ('8 1/2', 1),
 ('Life of Pi', 1),
 ('Moon', 1),
 ('First Blood', 1),
 ('Snatch.', 1),
 ('RoboCop', 1),
 ('Paths of Glory', 1),
 ('Chungking Express', 1),
 ('Rashomon', 1),
 ('Roman Holiday', 1),
 ('Enemy at the Gates', 1),
 ('Sherlock Holmes', 1),
 ('Metropolis', 1),
 ('The Experiment', 1),
 ('The Purple Rose of Cairo', 1),
 ('The Raid 2', 1),
 ('Sideways', 1),
 ('Frances Ha', 1),
 ('Evil', 1),
 ('Shadow of a Doubt', 1),
 ('Tae Guk Gi: The Brotherhood of War', 1),
 ('Citizen Kane', 1),
 ('We Need to Talk About Kevin', 1),
 ('Doctor Zhivago', 1),
 ('Argo', 1),
 ('Volver', 1),
 ('Rio Bravo', 1),
 ('Dances with Wolves', 1),
 ('The Return', 1),
 ('Begin Again', 1),
 ('The Blind Side', 1),
 ('The Wizard of Oz', 1),
 ("It's a Mad, Mad, Mad, Mad World", 1),
 ('Back to the Future', 1),
 ('L.A. Confidential', 1),
 ('Alice in Wonderland', 1),
 ("Adam's Apples", 1),
 ('Blade Runner', 1),
 ('Wonder Boys', 1),
 ('The Night 

In [57]:
# use requests library to interact with a URL http://www.omdbapi.com
r = requests.get('http://www.omdbapi.com?t=the shawshank redemption&r=json&type=movie')

In [78]:
r1 = requests.get('http://www.omdbapi.com?t=Argo&r=json')

In [83]:
r1.status_code

r1.text
r1.json()['Director']


u'Ben Affleck'

In [58]:
# check the status: 200 means success, 4xx or 5xx means error
r.status_code

200

In [11]:
# view the raw response text
r.text

u'{"Title":"The Shawshank Redemption","Year":"1994","Rated":"R","Released":"14 Oct 1994","Runtime":"142 min","Genre":"Crime, Drama","Director":"Frank Darabont","Writer":"Stephen King (short story \\"Rita Hayworth and Shawshank Redemption\\"), Frank Darabont (screenplay)","Actors":"Tim Robbins, Morgan Freeman, Bob Gunton, William Sadler","Plot":"Two imprisoned men bond over a number of years, finding solace and eventual redemption through acts of common decency.","Language":"English","Country":"USA","Awards":"Nominated for 7 Oscars. Another 14 wins & 21 nominations.","Poster":"http://ia.media-imdb.com/images/M/MV5BODU4MjU4NjIwNl5BMl5BanBnXkFtZTgwMDU2MjEyMDE@._V1_SX300.jpg","Metascore":"80","imdbRating":"9.3","imdbVotes":"1,626,900","imdbID":"tt0111161","Type":"movie","Response":"True"}'

In [12]:
# decode the JSON response body into a dictionary
r.json()

{u'Actors': u'Tim Robbins, Morgan Freeman, Bob Gunton, William Sadler',
 u'Awards': u'Nominated for 7 Oscars. Another 14 wins & 21 nominations.',
 u'Country': u'USA',
 u'Director': u'Frank Darabont',
 u'Genre': u'Crime, Drama',
 u'Language': u'English',
 u'Metascore': u'80',
 u'Plot': u'Two imprisoned men bond over a number of years, finding solace and eventual redemption through acts of common decency.',
 u'Poster': u'http://ia.media-imdb.com/images/M/MV5BODU4MjU4NjIwNl5BMl5BanBnXkFtZTgwMDU2MjEyMDE@._V1_SX300.jpg',
 u'Rated': u'R',
 u'Released': u'14 Oct 1994',
 u'Response': u'True',
 u'Runtime': u'142 min',
 u'Title': u'The Shawshank Redemption',
 u'Type': u'movie',
 u'Writer': u'Stephen King (short story "Rita Hayworth and Shawshank Redemption"), Frank Darabont (screenplay)',
 u'Year': u'1994',
 u'imdbID': u'tt0111161',
 u'imdbRating': u'9.3',
 u'imdbVotes': u'1,626,900'}

In [13]:
# extracting the year from the dictionary
r.json()['Year']

u'1994'

In [14]:
# what happens if the movie name is not recognized?
r = requests.get('http://www.omdbapi.com/?t=thebestmovieevermade&r=json&type=movie')
r.status_code
r.json()

{u'Error': u'Movie not found!', u'Response': u'False'}

In [25]:
##### Exercise #####

# define a function to return the year
def get_movie_year(title):
    # use requests to grab the json of the data from omdb
#     if the response was successful:
#         return the year
#     else:
#         return None

In [155]:
def get_movie_year(title):
    r = requests.get('http://www.omdbapi.com/?t='+title+'&r=json&type=movie')
    a = r.json()
    s = r.status_code
    
    if s == 200:
        print a['Year']
       
    return "None"

In [145]:
title = 'Argo'
r = requests.get('http://www.omdbapi.com/?t='+title+'&r=json&type=movie')

r.status_code

a = r.json()['Year']
print a



2012


In [156]:
get_movie_year('Argo')

2012


'None'

In [17]:
def get_movie_year(title):
    r = requests.get('http://www.omdbapi.com/?t=thebestmovieevermade&r=json&type=movie')
    response = r.jason()
    if 'Error' not in response: return response['year']
    
    #if r.status_code == 200:
    #    return title
    #return 'none' # always returns code so no need to write this.

In [18]:
# test the function
print get_movie_year('Finding dorey')
print get_movie_year('blahblahblah')

Finding dorey
blahblahblah


In [27]:
# create a smaller DataFrame for testing
# the copy method makes a carbon copy of the dataframe
top_movies = movies.head().copy()

In [28]:
# write a for loop to build a list of years
from time import sleep # timey wimey stuff
years = []
for title in top_movies.title:
    years.append(get_movie_year(title))
    sleep(1)
    
# the sleep is used to not over hit the API
# this is called "rate limiting"
# Most APIs don't allow you to hit it too much

In [161]:
from time import sleep
top_movies = movies.head().copy()
top_movies.head()
years = []

for title in top_movies.title:
    years.append(get_movie_year(title))
    sleep(1)

1994
1972
1974
2008
1994


In [29]:
# assert will throw an error if the value inside is NOT True

assert(3==4)

AssertionError: 

In [165]:
years

['None', 'None', 'None', 'None', 'None']

In [None]:
# check that the DataFrame and the list of years are the same length
assert(len(top_movies) == len(years))

In [None]:
# save that list as a new column
top_movies['year'] = years

In [None]:
'''
Bonus content: Updating the DataFrame as part of a loop
'''

# enumerate allows you to access the item location while iterating
letters = ['a', 'b', 'c']
for index, letter in enumerate(letters):
    print index, letter

In [None]:
# iterrows method for DataFrames is similar
for index, row in top_movies.iterrows():
    print index, row.title

In [None]:
# create a new column and set a default value
movies['yearsr'] = None
movies.head()

In [None]:
# loc method allows you to access a DataFrame element by 'label'
movies.loc[0, 'year'] = 1994
movies.head()

In [None]:
# write a for loop to update the year for the first three movies
for index, row in movies.iterrows():
    if index < 3:
        movies.loc[index, 'year'] = get_movie_year(row.title)
        sleep(1)
    else:
        break

In [30]:
movies.head()

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
0,9.3,The Shawshank Redemption,R,Crime,142,"[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt..."
1,9.2,The Godfather,R,Crime,175,"[u'Marlon Brando', u'Al Pacino', u'James Caan']"
2,9.1,The Godfather: Part II,R,Crime,200,"[u'Al Pacino', u'Robert De Niro', u'Robert Duv..."
3,9.0,The Dark Knight,PG-13,Action,152,"[u'Christian Bale', u'Heath Ledger', u'Aaron E..."
4,8.9,Pulp Fiction,R,Crime,154,"[u'John Travolta', u'Uma Thurman', u'Samuel L...."


In [21]:
# this is my own personal twitter api information
# if you could be so kind as to sign up yourself on both twitter and mashape that'd be great :)
# It's FREEEEEEE
api_key = 'g5uPIpw80nULQI1gfklv2zrh4'
api_secret = 'cOWvNWxYvPmEZ0ArZVeeVVvJu41QYHdUS2GpqIKtSQ1isd5PJy'
access_token = '49722956-TWl8J0aAS6KTdcbz3ppZ7NfqZEmrwmbsb9cYPNELG'
access_secret = '3eqrVssF3ppv23qyflyAto8wLEiYRA8sXEPSghuOJWTub'

# Masahpe Key
mashape_key = '0CLvblsJOxmshWkaep4szo9CILOMp1PM3hhjsnDi4k8g8ME14o'

In [22]:
# more complicated request

# HEADERS tell the API (or website) the type of system attempting the request
# DATA tells the API (or website) any pertinent info needed to make the request
# it is up to whoever wrote the API how access keys are passed in

url = "https://japerk-text-processing.p.mashape.com/sentiment/"
headers ={
        "X-Mashape-Key": mashape_key,
        "Content-Type": "application/x-www-form-urlencoded"
        }
data={
        "language": "english",
        "text": "I love it!"
        }

print requests.post(url, headers = headers, data = data).json()

{u'probability': {u'neg': 0.2906053804130494, u'neutral': 0.15976275669851123, u'pos': 0.7093946195869506}, u'label': u'pos'}


In [23]:
'''
Example of API WITH WRAPPER
tweepy is the python wrapper for twitter data
'''

'\nExample of API WITH WRAPPER\ntweepy is the python wrapper for twitter data\n'

In [24]:

import tweepy       # python wrapper for twitter api
import json
import time

In [25]:
tag = 'aapl'

# Documentation is your friend! http://docs.tweepy.org/en/v3.1.0/
auth = tweepy.OAuthHandler(api_key, api_secret)
auth.set_access_token(access_token, access_secret)
api = tweepy.API(auth) # returns a tweepy authorization handler object
tweets = api.search(q=tag)

In [36]:
# let's take a look at the first one
tweets[0]

Status(contributors=None, truncated=False, text=u'Apple Stock Price: 109.47 #apple $AAPL', is_quote_status=False, in_reply_to_status_id=None, id=718121193916727296, favorite_count=0, _api=<tweepy.api.API object at 0x10f5fb650>, author=User(follow_request_sent=False, has_extended_profile=False, profile_use_background_image=False, _json={u'follow_request_sent': False, u'has_extended_profile': False, u'profile_use_background_image': False, u'default_profile_image': False, u'id': 3374613730, u'profile_background_image_url_https': u'https://abs.twimg.com/images/themes/theme1/bg.png', u'verified': False, u'profile_text_color': u'000000', u'profile_image_url_https': u'https://pbs.twimg.com/profile_images/620690331416178688/lzrH--bM_normal.jpg', u'profile_sidebar_fill_color': u'000000', u'entities': {u'url': {u'urls': [{u'url': u'http://t.co/tPbBd0tAlH', u'indices': [0, 22], u'expanded_url': u'http://www.google.co.uk/finance?q=NASDAQ%3AAAPL', u'display_url': u'google.co.uk/finance?q=NASD\u2026

In [29]:
# wrappers come with built in python attributes and methods!
print tweets[0].created_at
print tweets[0].text

2016-04-08 03:41:09
$SPY 2morrow is going 2B Bear-u-tiful! Bulls BETTER cash out!! $SPX $QQQ  $ES_F $IWM $DJIA $AAPL $GOOGL $GS $USO $FB https://t.co/pJM7TGoHJX


In [38]:
# the author is an object in and of itself
tweets[0].author

User(follow_request_sent=False, has_extended_profile=False, profile_use_background_image=False, _json={u'follow_request_sent': False, u'has_extended_profile': False, u'profile_use_background_image': False, u'default_profile_image': False, u'id': 3374613730, u'profile_background_image_url_https': u'https://abs.twimg.com/images/themes/theme1/bg.png', u'verified': False, u'profile_text_color': u'000000', u'profile_image_url_https': u'https://pbs.twimg.com/profile_images/620690331416178688/lzrH--bM_normal.jpg', u'profile_sidebar_fill_color': u'000000', u'entities': {u'url': {u'urls': [{u'url': u'http://t.co/tPbBd0tAlH', u'indices': [0, 22], u'expanded_url': u'http://www.google.co.uk/finance?q=NASDAQ%3AAAPL', u'display_url': u'google.co.uk/finance?q=NASD\u2026'}]}, u'description': {u'urls': []}}, u'followers_count': 29, u'profile_sidebar_border_color': u'000000', u'id_str': u'3374613730', u'profile_background_color': u'000000', u'listed_count': 8, u'is_translation_enabled': False, u'utc_off

In [31]:
# the author's handle
print tweets[0].author.screen_name
print tweets[0].author.profile_image_url

420WeedStreet
http://pbs.twimg.com/profile_images/590496287658676224/jw7b-HVM_normal.jpg


<img src="http://pbs.twimg.com/profile_images/620690331416178688/lzrH--bM_normal.jpg">

In [85]:
'''
THE BELOW CODE IS OPTIONAl
It creates a stream of a given tag!
'''

# This is the listener, resposible for receiving data
# I will not be covering this in class
class StdOutListener(tweepy.StreamListener):
    def on_data(self, data):
        # Twitter returns data in JSON format - we need to decode it first
        decoded = json.loads(data)
        #print decoded
        time_ =  time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(decoded['timestamp_ms']) / 1000))
        handle = decoded['user']['screen_name']
        tweet_text = decoded['text'].encode('ascii', 'ignore')
        num_followers = int(decoded['user']['followers_count'])
        # Also, we convert UTF-8 to ASCII ignoring all bad characters sent by users
        print '@%s at %s: %s with %d followers' % (handle, time_, tweet_text, num_followers)
        print ''
        return True
    def on_error(self, status):
        print status
def begin_live_feed(tags_to_follow):
    print "beginning live feed...."
    l = StdOutListener()
    auth = tweepy.OAuthHandler(api_key, api_secret)
    auth.set_access_token(access_token, access_secret)
    stream = tweepy.Stream(auth, l)
    stream.filter(track=tags_to_follow)

begin_live_feed(['#rogueone', 'starbucks'])
# this is an example use, if you create a list of words and phrases, 
# a live stream of tweets about them will show up

# INTERUPT THE KERNEL TO STOP THE MADNESS

beginning live feed....
@iParisMD at 2016-04-07 09:16:06: RT @diegoluna_: The teaser is out! So exciting!!  https://t.co/2UjUXus9sc with 90 followers

@vinizviana at 2016-04-07 09:16:07: RT @omelete: [OmeleTV] Agora, a surpresa final e discreta: DARTH VADER EST SIM NO TRAILER! #RogueOne https://t.co/5gFFVGCPSe with 799 followers

@omgcantbreathe at 2016-04-07 09:16:07: any person whining that Star Wars is having awesome female leads can go cuddle a dianoga #rogueone with 626 followers

@potentialKEN at 2016-04-07 09:16:07: #ROGUEONE teaser trailer looks like #Disney is doubling down on the idea that #STARWARS is for #girls too! #feminism https://t.co/ybLmIWZaEL with 1339 followers

@mstaylorjordan at 2016-04-07 09:16:07: RT @starwars: "I rebel." #RogueOne https://t.co/boMhrarHXV with 476 followers

@Krelian29 at 2016-04-07 09:16:07: Star Wars Rogue One main villain ? #starwars #rogueone #starwarsrogueone #empire https://t.co/Cx0VseahDV with 84 followers

@love_celeste1 at 2016-04-07 09

KeyboardInterrupt: 

In [40]:
'''
Other considerations when accessing APIs:
- Most APIs require you to have an access key (which you should store outside your code)
- Most APIs limit the number of API calls you can make (per day, hour, minute, etc.)
- Not all APIs are free
- Not all APIs are well-documented
- Pay attention to the API version

Python wrapper is another option for accessing an API:
- Set of functions that "wrap" the API code for ease of use
- Potentially simplifies your code
- But, wrapper could have bugs or be out-of-date or poorly documented
'''

'\nOther considerations when accessing APIs:\n- Most APIs require you to have an access key (which you should store outside your code)\n- Most APIs limit the number of API calls you can make (per day, hour, minute, etc.)\n- Not all APIs are free\n- Not all APIs are well-documented\n- Pay attention to the API version\n\nPython wrapper is another option for accessing an API:\n- Set of functions that "wrap" the API code for ease of use\n- Potentially simplifies your code\n- But, wrapper could have bugs or be out-of-date or poorly documented\n'

In [42]:
#### TINDER #####
# https://gist.github.com/rtt/10403467

link = 'https://www.facebook.com/dialog/oauth?client_id=464891386855067&redirect_uri=https://www.facebook.com/connect/login_success.html&scope=basic_info,email,public_profile,user_about_me,user_activities,user_birthday,user_education_history,user_friends,user_interests,user_likes,user_location,user_photos,user_relationship_details&response_type=token'
token_req = requests.get(link, allow_redirects = True)
token_req.headers
token_req.url

u'https://www.facebook.com/login.php?skip_api_login=1&api_key=464891386855067&signed_next=1&next=https%3A%2F%2Fwww.facebook.com%2Fv2.0%2Fdialog%2Foauth%3Fredirect_uri%3Dhttps%253A%252F%252Fwww.facebook.com%252Fconnect%252Flogin_success.html%26scope%3Dbasic_info%252Cemail%252Cpublic_profile%252Cuser_about_me%252Cuser_activities%252Cuser_birthday%252Cuser_education_history%252Cuser_friends%252Cuser_interests%252Cuser_likes%252Cuser_location%252Cuser_photos%252Cuser_relationship_details%26response_type%3Dtoken%26client_id%3D464891386855067%26ret%3Dlogin&cancel_url=https%3A%2F%2Fwww.facebook.com%2Fconnect%2Flogin_success.html%3Ferror%3Daccess_denied%26error_code%3D200%26error_description%3DPermissions%2Berror%26error_reason%3Duser_denied%23_%3D_&display=page&locale=en_US'

In [43]:
token = 'EAAGm0PX4ZCpsBAEPhbX0HUZCc095HjwZAMB630ClGGc7FRDaXrZCLyZBReaF8bEvJCITPcx6ZAZAHysG4DLLNEBLafiabCYZARMyY020dwPa7qYq9WJZCGbZC8SETdkGcTuy2NXqecfk2j0Hla6RX2ZACUeRCX8P9CcOOBCitKI8DSZA3BKZAaaXuZAKHj'

In [44]:
facebbok_id = '1342020603'

In [45]:
r = requests.post('https://api.gotinder.com/auth',
                  data = {
                  'facebook_token':token,
                  'facebook_id':facebbok_id}
                  )

In [46]:
r.status_code

200

In [47]:
j = r.json()

In [48]:
h = {'X-Auth-Token': j['token']}

In [49]:
recs = requests.get('https://api.gotinder.com/user/recs', headers = h)
matches = recs.json()


In [50]:
first_match = matches['results'][0]
first_match

{u'_id': u'56dd03b223701bcc5c132e83',
 u'badges': [],
 u'bio': u'I\u2019m not really single, I\u2019m dating myself. I take me out to eat, I buy me clothes. I love me.\n\U0001f1e7\U0001f1f7\U0001f341\U0001f46f\U0001f47d\U0001f480\nFOR THE LOVE OF GOD DO NOT JUST SEND ME "Hey". \nI\'ll hate you for it :/\n',
 u'birth_date': u'1994-03-09T00:00:00.000Z',
 u'birth_date_info': u'fuzzy birthdate active, not displaying real birth_date',
 u'common_friend_count': 0,
 u'common_friends': [],
 u'common_interests': [],
 u'common_like_count': 0,
 u'common_likes': [],
 u'connection_count': 81,
 u'distance_mi': 12,
 u'gender': 1,
 u'jobs': [],
 u'name': u'Sarah',
 u'photos': [{u'id': u'f6a6c9bb-41f9-4705-aab5-5fd0d666cb5a',
   u'processedFiles': [{u'height': 640,
     u'url': u'http://images.gotinder.com/56dd03b223701bcc5c132e83/640x640_f6a6c9bb-41f9-4705-aab5-5fd0d666cb5a.jpg',
     u'width': 640},
    {u'height': 320,
     u'url': u'http://images.gotinder.com/56dd03b223701bcc5c132e83/320x320_f6a6c9b

In [52]:
print first_match['bio']
print first_match['birth_date']
print first_match['common_likes']
print first_match['gender']
print first_match['name']
print first_match['photos'][0]['processedFiles'][0]['url']

I’m not really single, I’m dating myself. I take me out to eat, I buy me clothes. I love me.
🇧🇷🍁👯👽💀
FOR THE LOVE OF GOD DO NOT JUST SEND ME "Hey". 
I'll hate you for it :/

1994-03-09T00:00:00.000Z
[]
1
Sarah
http://images.gotinder.com/56dd03b223701bcc5c132e83/640x640_f6a6c9bb-41f9-4705-aab5-5fd0d666cb5a.jpg
