In [29]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
This exercise shows some important concepts that you should be aware about:
- using codecs module to write unicode files
- using authentication with web APIs
- using offset when accessing web APIs

To run this code locally you have to register at the NYTimes developer site 
and get your own API key. You will be able to complete this exercise in our UI
without doing so, as we have provided a sample result. (See the file 
'popular-viewed-1.json' from the tabs above.)

Your task is to modify the article_overview() function to process the saved
file that represents the most popular articles (by view count) from the last
day, and return a tuple of variables containing the following data:
- labels: list of dictionaries, where the keys are the "section" values and
  values are the "title" values for each of the retrieved articles.
- urls: list of URLs for all 'media' entries with "format": "Standard Thumbnail"

All your changes should be in the article_overview() function. See the test() 
function for examples of the elements of the output lists.
The rest of functions are provided for your convenience, if you want to access
the API by yourself.
"""
import json
import codecs
import requests

URL_MAIN = "http://api.nytimes.com/svc/"
URL_POPULAR = URL_MAIN + "mostpopular/v2/"
API_KEY = { "popular": "6437898100134bfb9c7a74679e24b6f7",
            "article": "6437898100134bfb9c7a74679e24b6f7"}


def get_from_file(kind, period):
    filename = "popular-{0}-{1}.json".format(kind, period)
    with open(filename, "r") as f:
        return json.loads(f.read())


def article_overview(kind, period):
    data = get_from_file(kind, period)
    titles = []
    urls =[]
    # YOUR CODE HERE
    for article in data:
        titles.append({article['section']:article['title']})
        for media in article['media']:
            for metadata in media["media-metadata"]:
                if metadata['format'] == "Standard Thumbnail":
                    urls.append(metadata['url'])
    return (titles, urls)


def query_site(url, target, offset):
    # This will set up the query with the API key and offset
    # Web services often use offset paramter to return data in small chunks
    # NYTimes returns 20 articles per request, if you want the next 20
    # You have to provide the offset parameter
    if API_KEY["popular"] == "" or API_KEY["article"] == "":
        print "You need to register for NYTimes Developer account to run this program."
        print "See Intructor notes for information"
        return False
    params = {"api-key": API_KEY[target], "offset": offset}
    r = requests.get(url, params = params)

    if r.status_code == requests.codes.ok:
        return r.json()
    else:
        r.raise_for_status()


def get_popular(url, kind, days, section="all-sections", offset=0):
    # This function will construct the query according to the requirements of the site
    # and return the data, or print an error message if called incorrectly
    if days not in [1,7,30]:
        print "Time period can be 1,7, 30 days only"
        return False
    if kind not in ["viewed", "shared", "emailed"]:
        print "kind can be only one of viewed/shared/emailed"
        return False

    url += "most{0}/{1}/{2}.json".format(kind, section, days)
    data = query_site(url, "popular", offset)

    return data


def save_file(kind, period):
    # This will process all results, by calling the API repeatedly with supplied offset value,
    # combine the data and then write all results in a file.
    data = get_popular(URL_POPULAR, "viewed", 1)
    num_results = data["num_results"]
    full_data = []
    with codecs.open("popular-{0}-{1}.json".format(kind, period), encoding='utf-8', mode='w') as v:
        for offset in range(0, num_results, 20):        
            data = get_popular(URL_POPULAR, kind, period, offset=offset)
            full_data += data["results"]
        
        v.write(json.dumps(full_data, indent=2))


def test():
    titles, urls = article_overview("viewed", 1)
    assert len(titles) == 20
    assert len(urls) == 30
    assert titles[2] == {'Opinion': 'Professors, We Need You!'}
    assert urls[20] == 'http://graphics8.nytimes.com/images/2014/02/17/sports/ICEDANCE/ICEDANCE-thumbStandard.jpg'


#if __name__ == "__main__":
#    test()

In [16]:
save_file(kind="viewed", period=1)

In [17]:
data = get_from_file(kind="viewed", period=1)

In [18]:
data

[{u'abstract': u'We are seeking candidates who will explore every destination on our list of 52 Places to Go.',
  u'adx_keywords': u'',
  u'asset_id': 100000005505645L,
  u'byline': u'',
  u'column': u'',
  u'des_facet': u'',
  u'geo_facet': u'',
  u'id': 100000005505645L,
  u'media': u'',
  u'org_facet': u'',
  u'per_facet': u'',
  u'published_date': u'2017-10-23',
  u'section': u'Job Market',
  u'source': u'The New York Times',
  u'title': u'The New York Times wants to hire a journalist to travel the world',
  u'type': u'Interactive',
  u'url': u'https://www.nytimes.com/interactive/2017/jobs/nyt-52-places-the-trip.html',
  u'views': 1},
 {u'abstract': u'A secret meeting between Secretary of State Rex W. Tillerson and the Afghan president was said to take place in Kabul. Photographs told another story.',
  u'adx_keywords': u'Tillerson, Rex W;Ghani, Ashraf;Afghanistan War (2001- );United States International Relations;Afghanistan;Bagram Air Base (Afghanistan);State Department;Photograp

In [19]:
len(data)

1578

In [20]:
type(data[0])

dict

In [21]:
data[0].keys()

[u'asset_id',
 u'byline',
 u'title',
 u'url',
 u'media',
 u'section',
 u'views',
 u'column',
 u'published_date',
 u'abstract',
 u'des_facet',
 u'geo_facet',
 u'adx_keywords',
 u'source',
 u'org_facet',
 u'per_facet',
 u'type',
 u'id']

In [30]:
article_overview("viewed", 1)

([{u'Job Market': u'The New York Times wants to hire a journalist to travel the world'},
  {u'World': u'Tillerson in Kabul? Two Photos Lead to Many Questions'},
  {u'Arts': u'Leon Wieseltier Admits \u2018Offenses\u2019 Against Female Colleagues as New Magazine Is Killed'},
  {u'Business Day': u'Lord &amp; Taylor Building, Icon of New York Retail, Will Become WeWork Headquarters'},
  {u'U.S.': u'Jeff Flake, a Fierce Trump Critic, Will Not Seek Re-election for Senate'},
  {u'U.S.': u'Full Transcript: Jeff Flake\u2019s Speech on the Senate Floor'},
  {u'Technology': u'Tech Giants Are Paying Huge Salaries for Scarce A.I. Talent'},
  {u'Opinion': u'How to Engage a Fanatic'},
  {u'Opinion': u'I Accidentally Turned My Dad In to Immigration Services'},
  {u'U.S.': u'\u2018Like Going Back in Time\u2019: Puerto Ricans Put Survival Skills to Use'},
  {u'Opinion': u'The Bone-Spur Bozo at the White House'},
  {u'U.S.': u'Trump and Corker Escalate Battle Over Taxes, in Personal Terms'},
  {u'Busines