In [1]:
import numpy as np
import pandas as pd

#tools to help with JSON
import json
from pandas.io.json import json_normalize
#pprint - pretty print - preserves formatting
from pprint import pprint

#urllib / requests - used commonly for apis
import requests
import urllib2


# APIs
- https://www.yelp.com/developers/documentation
- http://www.reddit.com/dev/api
- http://www.publicapis.com
- https://apps.twitter.com (OAUTH nightmares)
- http://developer.rottentomatoes.com/docs
- http://developer.nytimes.com/

### Wikipedia 

- https://en.wikipedia.org/w/api.php?action=help&modules=main (API documentation)
- https://en.wikipedia.org/w/api.php?action=help&modules=query (more documentation)
- https://en.wikipedia.org/w/api.php?action=help&modules=query%2Bextracts (Returns plain-text or limited HTML extracts of the given pages)

#Example: NY Times contributions example

## Using a web api
- Everything after the '?' is called the query string, '&' denotes variables
- API-Key is provided by the api owner and should not be shared (typically)

http://api.nytimes.com/svc/elections/us/v3/finances/2012/contributions/candidate/P80003338.json?api-key=a135ff57e67e72c98171ea7a99d92d5b%3A8%3A72904630"

### API Call

### Let's build our call
NY Times api has a handy query builder that can help us get started using their api. 
http://developer.nytimes.com/io-docs


In [4]:
website = requests.get("http://api.nytimes.com/svc/search/v2/articlesearch.json?q=election&api-key=2c3cd35f0e301971ca704c014eb24ce1%3A4%3A45521562")
website.headers

{'Content-Length': '25249', 'X-Powered-By': 'PHP/5.3.27', 'Vary': 'Accept-Encoding', 'Server': 'nginx/1.4.1', 'Access-Control-Allow-Credentials': 'true', 'Date': 'Tue, 22 Mar 2016 19:31:15 GMT', 'X-Mashery-Responder': 'prod-j-worker-atl-04.mashery.com', 'X-Cached': 'MISS', 'Content-Type': 'application/json; charset=UTF-8', 'Access-Control-Allow-Origin': '*'}

Info on website status can be found here: http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html

## JSON

- javascript object notation
- comporable to xml
- structured information

In [5]:
website.json()

{u'copyright': u'Copyright (c) 2013 The New York Times Company.  All Rights Reserved.',
 u'response': {u'docs': [{u'_id': u'56c397f3798810739a21368a',
    u'abstract': u"James Poniewozik Critic's Notebook observes difficulty comedians are having in satirizing Donald J Trump, presidential candidate who has attitude of insult comic.",
    u'blog': [],
    u'byline': {u'original': u'By JAMES PONIEWOZIK',
     u'person': [{u'firstname': u'James',
       u'lastname': u'PONIEWOZIK',
       u'organization': u'',
       u'rank': 1,
       u'role': u'reported'}]},
    u'document_type': u'article',
    u'headline': {u'main': u'Donald Trump Is a Conundrum for Political Comedy',
     u'print_headline': u'Comedians, Like Rivals, Find Trump to Be a Difficult Target'},
    u'keywords': [{u'is_major': u'Y',
      u'name': u'persons',
      u'rank': u'1',
      u'value': u'Trump, Donald J'},
     {u'is_major': u'Y',
      u'name': u'subject',
      u'rank': u'2',
      u'value': u'Comedy and Humor'},
 

### Using the urllib2 to convert to a dataframe

In [40]:
request=urllib2.Request("http://api.nytimes.com/svc/search/v2/articlesearch.json?q=election&api-key=2c3cd35f0e301971ca704c014eb24ce1%3A4%3A45521562")
response = urllib2.urlopen(request)
elevations = response.read()
data1 = json.loads(elevations)
print type(data)
data= data1['response']

<type 'dict'>


In [52]:
pres_df = json_normalize(data['docs'])
pres_df.head()
pres_df.describe

<bound method DataFrame.describe of                         _id  \
0  56c397f3798810739a21368a   
1  55a6b3ec7988102dbd7885b3   
2  56e53a377988101860ab281e   
3  56d460247988107c6d9ab625   
4  56b9125a79881038a87bfe82   
5  547949227988105199668a03   
6  56ca06ef7988100e95e43f57   
7  56b555cd79881030baa2502b   
8  4fc2857645c1498b0d6d0e99   
9  569d188f7988100a3479e71b   

                                            abstract blog  \
0  James Poniewozik Critic's Notebook observes di...   []   
1                                               None   []   
2  Release schedules of movie studios reveal numb...   []   
3  The amount — a huge sum for a candidate who ha...   []   
4  A voter-turnout tactic employed by Ted Cruz’s ...   []   
5                                               None   []   
6  Outsider presidential candidates Sen Ted Cruz ...   []   
7                                               None   []   
8                     Eichler, A: Election by Murder   []   
9  Afghan of

In [54]:
pres_df[['abstract', 'byline.original', 'subsection_name']]

Unnamed: 0,abstract,byline.original,subsection_name
0,James Poniewozik Critic's Notebook observes di...,By JAMES PONIEWOZIK,Television
1,,By THE NEW YORK TIMES,Election 2016
2,Release schedules of movie studios reveal numb...,By MICHAEL CIEPLY,Media
3,The amount — a huge sum for a candidate who ha...,By NICHOLAS CONFESSORE,Politics
4,A voter-turnout tactic employed by Ted Cruz’s ...,By NICHOLAS CONFESSORE,Politics
5,,By FRAN SILVERMAN,Connecticut
6,Outsider presidential candidates Sen Ted Cruz ...,By NICHOLAS CONFESSORE and SARAH COHEN,Politics
7,,By NICHOLAS CONFESSORE and SARAH COHEN,Politics
8,"Eichler, A: Election by Murder",I.A,
9,Afghan officials condemn election commission's...,By MUJIB MASHAL,Asia Pacific


#Exercise: NY Times movies

In [9]:
## pull all the movies (use the requests package)
movies = requests.get("http://api.nytimes.com/svc/movies/v2/reviews/all.json?api-key=7acc2215c568722cbb6f1d6a47e747e4%3A9%3A72904630")

In [10]:
## take a look at the headers
movies.headers

{'Content-Length': '35900', 'Via': '1.1 varnish', 'X-Cache': 'MISS', 'X-Mashery-Responder': 'prod-j-worker-atl-04.mashery.com', 'Age': '0', 'Vary': 'Origin', 'Server': 'nginx/1.4.1', 'Last-Modified': 'Thu, 03 Mar 2016 12:00:06 GMT', 'X-Varnish': '1779286052', 'Cache-Control': 'max-age=7200', 'Date': 'Tue, 22 Mar 2016 19:33:47 GMT', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Methods': 'GET, PUT, POST', 'Content-Type': 'application/json; charset=UTF-8', 'Accept-Ranges': 'bytes', 'ETag': '"Thu, 03 Mar 2016 12:00:06 GMT"'}

In [55]:
## rerun the api request with urllib2 andtransform the data to pandas dataframe 
request=urllib2.Request("http://api.nytimes.com/svc/movies/v2/reviews/all.json?api-key=7acc2215c568722cbb6f1d6a47e747e4%3A9%3A72904630")
response = urllib2.urlopen(request)
elevations = response.read()
data = json.loads(elevations)
movies_df = json_normalize(data['results'])
movies_df.describe

<bound method DataFrame.describe of                  byline capsule_review  critics_pick         date_updated  \
0       Neil Genzlinger                            1  2016-03-03 07:00:06   
1        Stephen Holden                            0  2016-02-11 18:53:26   
2        Manohla Dargis                            0  2016-02-11 07:00:05   
3        Manohla Dargis                            0  2016-02-04 15:39:06   
4        Manohla Dargis                            0  2016-02-04 17:33:52   
5        Ben Kenigsberg                            1  2016-02-05 00:31:10   
6        Stephen Holden                            0  2016-02-04 17:13:51   
7       Neil Genzlinger                            0  2016-02-04 17:54:58   
8       Neil Genzlinger                            0  2016-02-04 17:39:23   
9        Nicolas Rapold                            0  2016-02-04 17:47:10   
10  Jeannette Catsoulis                            0  2016-02-04 16:55:21   
11          A. O. Scott                 

In [11]:
## print a data frame with just the title, movie id and critics pick status
movies_df[["display_title", "nyt_movie_id", "critics_pick"]]


Unnamed: 0,display_title,nyt_movie_id,critics_pick
0,I Touched All Your Stuff,481363,0
1,7 Chinese Brothers,480152,0
2,Fever,479630,0
3,Que Horas Ela Volta?,478698,0
4,The Great Man,480680,1
5,Mistress America,478468,1
6,Tom at the Farm,472150,1
7,A Sinner in Mecca,481622,1
8,Contracted: Phase II,481518,0
9,Dirty Weekend,479330,0


## Repeat this process with a new API

## Extra credit: 
There are several ways to convert to a python data frame. Find another!

## New to APIs? What to know more about RESTful APIs?
Check out this great tutorial
https://www.codecademy.com/courses/python-intermediate-en-6zbLp/3/5?curriculum_id=50ecbb00306689057a000188