In [1]:
import numpy as np
import pandas as pd

#tools to help with JSON
import json
from pandas.io.json import json_normalize
#pprint - pretty print - preserves formatting
from pprint import pprint

#urllib / requests - used commonly for apis
import requests
import urllib2


# APIs
- https://www.yelp.com/developers/documentation
- http://www.reddit.com/dev/api
- http://www.publicapis.com
- https://apps.twitter.com (OAUTH nightmares)
- http://developer.rottentomatoes.com/docs
- http://developer.nytimes.com/

### Wikipedia 

- https://en.wikipedia.org/w/api.php?action=help&modules=main (API documentation)
- https://en.wikipedia.org/w/api.php?action=help&modules=query (more documentation)
- https://en.wikipedia.org/w/api.php?action=help&modules=query%2Bextracts (Returns plain-text or limited HTML extracts of the given pages)

#Example: NY Times contributions example

## Using a web api
- Everything after the '?' is called the query string, '&' denotes variables
- API-Key is provided by the api owner and should not be shared (typically)

http://api.nytimes.com/svc/elections/us/v3/finances/2012/contributions/candidate/P80003338.json?api-key=a135ff57e67e72c98171ea7a99d92d5b%3A8%3A72904630"

### API Call

### Let's build our call
NY Times api has a handy query builder that can help us get started using their api. 
http://developer.nytimes.com/io-docs


In [5]:
api_call = "http://api.nytimes.com/svc/search/v2/articlesearch.json?q=%22F-22%22&"\
                       "begin_date=19900101&end_date=19981231&sort=oldest&api-"\
                       "key=52576efae1e6ae68fa52aa793d823f24%3A13%3A60973557"
website = requests.get(api_call)
website.headers

{'Content-Length': '17291', 'X-Powered-By': 'PHP/5.3.27', 'Vary': 'Accept-Encoding', 'Server': 'nginx/1.4.1', 'Access-Control-Allow-Credentials': 'true', 'Date': 'Tue, 22 Mar 2016 23:05:00 GMT', 'X-Mashery-Responder': 'prod-j-worker-atl-01.mashery.com', 'X-Cached': 'MISS', 'Content-Type': 'application/json; charset=UTF-8', 'Access-Control-Allow-Origin': '*'}

Info on website status can be found here: http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html

## JSON

- javascript object notation
- comporable to xml
- structured information

In [4]:
website.json()

{u'copyright': u'Copyright (c) 2013 The New York Times Company.  All Rights Reserved.',
 u'response': {u'docs': [{u'_id': u'4fd1a5148eb7c8105d6bd812',
    u'abstract': None,
    u'blog': [],
    u'byline': {u'original': u'By Andy Grundberg',
     u'person': [{u'firstname': u'Andy',
       u'lastname': u'Grundberg',
       u'organization': u'',
       u'rank': 1,
       u'role': u'reported'}]},
    u'document_type': u'article',
    u'headline': {u'main': u'Camera'},
    u'keywords': [{u'name': u'subject', u'value': u'TERMS NOT AVAILABLE'}],
    u'lead_paragraph': u'A friend recently returned from a photography safari in Africa and last week brought out her album to show me the color pictures she and her husband had taken. Zebras, lions, elephants, rhinoceroses -- I was amazed at the number and variety of wild species that had willingly sat still for their portraits. Having grown up on National Geographic pictures of wildlife, I always thought that capturing fleet and dangerous denizens 

### Using the urllib2 to convert to a dataframe

#### There are other ways that can bring these things to a dataframe. But we use URLLIB

In [13]:
request=urllib2.Request(api_call)
response = urllib2.urlopen(request)
elevations = response.read()
data1 = json.loads(elevations)
print type(data1)
data= data1['response']
print data

<type 'dict'>
{u'docs': [{u'type_of_material': u'News', u'blog': [], u'news_desk': None, u'lead_paragraph': u'A friend recently returned from a photography safari in Africa and last week brought out her album to show me the color pictures she and her husband had taken. Zebras, lions, elephants, rhinoceroses -- I was amazed at the number and variety of wild species that had willingly sat still for their portraits. Having grown up on National Geographic pictures of wildlife, I always thought that capturing fleet and dangerous denizens of jungle and savannah was a task to which only intrepid professionals were equal. The National Geographic helped make it seem that way by recounting the difficult months it took for the photographers to bring home the bacon.', u'headline': {u'main': u'Camera'}, u'abstract': None, u'print_page': u'79', u'word_count': 815, u'_id': u'4fd1a5148eb7c8105d6bd812', u'snippet': u'A friend recently returned from a photography safari in Africa and last week brought o

In [15]:
pres_df = json_normalize(data['docs'])
pres_df.head()
pres_df.describe

<bound method DataFrame.describe of                         _id abstract blog  byline  \
0  4fd1a5148eb7c8105d6bd812     None   []     NaN   
1  4fd1a9188eb7c8105d6c5763     None   []     NaN   
2  4fd1a82f8eb7c8105d6c38fa     None   []     NaN   
3  4fd1c9dc8eb7c8105d6fd803     None   []     NaN   
4  4fd1a91c8eb7c8105d6c5aba     None   []     NaN   
5  4fd198cb8eb7c8105d6a870a     None   []     NaN   
6  4fd1c9e28eb7c8105d6fdc75     None   []     NaN   
7  4fd19c048eb7c8105d6ad3a3     None   []     NaN   
8  4fd1c19b8eb7c8105d6f0f82     None   []     NaN   
9  4fd1a3318eb7c8105d6ba69b     None   []     NaN   

                 byline.original  \
0              By Andy Grundberg   
1        By RICHARD W. STEVENSON   
2               By Joel Kurtzman   
3        By RICHARD W. STEVENSON   
4                            NaN   
5                            NaN   
6           By MALCOLM W. BROWNE   
7  By COL. HARRY G. SUMMERS JR.;   
8                            NaN   
9                   

In [16]:
pres_df[['snippet', 'byline.original', 'type_of_material']]

Unnamed: 0,snippet,byline.original,type_of_material
0,A friend recently returned from a photography ...,By Andy Grundberg,News
1,Ending an intense five-year competition for wh...,By RICHARD W. STEVENSON,News
2,COMPANIES Lockheed's Side Gets a Big Boost Fro...,By Joel Kurtzman,Summary
3,"A year ago, Daniel M. Tellep, the chairman and...",By RICHARD W. STEVENSON,News
4,International A3-9 United Nations officials...,,Summary
5,Peace dividend? The Air Force is acting as if ...,,Editorial
6,THE Air Force's award last month of a huge con...,By MALCOLM W. BROWNE,News
7,"ON AUG. 2, 1990, the very day that Iraqi force...",By COL. HARRY G. SUMMERS JR.;,News
8,"To the Editor: ""Behind the Choosing of the F...",,Letter
9,To the Editor: The Times has an outstanding ...,,Letter


In [17]:
# We can then write to a .CSV

# Exercise: NY Times movies

In [1]:
api_call = "http://api.nytimes.com/svc/search/v2/articlesearch.json?q=%22F-22%22&"\
                       "begin_date=19900101&end_date=19981231&sort=oldest&api-"\
                       "key=52576efae1e6ae68fa52aa793d823f24%3A13%3A60973557"
website = requests.get(api_call)
## take a look at the headers
website.headers

In [None]:
## rerun the api request with urllib2 andtransform the data to pandas dataframe 

In [3]:
## print a data frame with just the title, movie id and critics pick status



#Bonus: 
###Use this data to do NLP pre-processing then model it to predict critics picks based on the text in the review.

## Repeat this process with a new API

## Extra credit: 
There are several ways to convert to a python data frame. Find another!

## New to APIs? What to know more about RESTful APIs?
Check out this great tutorial
https://www.codecademy.com/courses/python-intermediate-en-6zbLp/3/5?curriculum_id=50ecbb00306689057a000188