# Connecting to NYT API

## Libraries

In [1]:
import json
import requests
import pandas as pd
from pandas.io.json import json_normalize

In [2]:
from IPython.display import display, HTML

display(HTML(data="""
<style>
    div#notebook-container    { width: 95%; }
    div#menubar-container     { width: 65%; }
    div#maintoolbar-container { width: 99%; }
</style>
"""))

In [3]:
pd.set_option('display.max_rows', None)

## Calling the API

In [4]:
key = 'kAD5GYrJCPoBK6HAdLQH3TTSFiQ3fRCO'

url = 'https://api.nytimes.com/svc/search/v2/articlesearch.json?q=("election" "trump" "clinton")&facet_field=day_of_week&facet=true&begin_date=20161001&end_date=20161001&api-key='+key

response = requests.get(url)
results = response.json()

results

#need to further refine filtering to only incluse US election (maybe politics section)
#loop by day

{'status': 'OK',
 'copyright': 'Copyright (c) 2019 The New York Times Company. All Rights Reserved.',
 'response': {'docs': [{'abstract': 'In an interview, Mr. Trump said he was reconsidering whether he would back Mrs. Clinton if she became president.',
    'web_url': 'https://www.nytimes.com/2016/10/01/us/politics/donald-trump-interview-bill-hillary-clinton.html',
    'snippet': 'In an interview, Mr. Trump said he was reconsidering whether he would back Mrs. Clinton if she became president.',
    'lead_paragraph': 'Donald J. Trump unleashed a slashing new attack on Hillary Clinton over Bill Clinton’s sexual indiscretions on Friday as he sought to put the Clintons’ relationship at the center of his political argument against her before their next debate.',
    'print_section': 'A',
    'print_page': '10',
    'source': 'The New York Times',
    'multimedia': [{'rank': 0,
      'subtype': 'thumbnail',
      'caption': None,
      'credit': None,
      'type': 'image',
      'url': 'imag

## Exploring the JSON for the Headline

In [5]:
results['response']['docs'][0].keys()

dict_keys(['abstract', 'web_url', 'snippet', 'lead_paragraph', 'print_section', 'print_page', 'source', 'multimedia', 'headline', 'keywords', 'pub_date', 'document_type', 'news_desk', 'section_name', 'subsection_name', 'byline', 'type_of_material', '_id', 'word_count', 'uri'])

#### Publication Date

In [6]:
results['response']['docs'][0]['pub_date'][:10]

'2016-10-01'

#### Headline

In [7]:
results['response']['docs'][0]['headline']['main']

'Donald Trump Opens New Line of Attack on Hillary Clinton: Her Marriage'

#### Snippet

In [8]:
results['response']['docs'][1]['snippet']

'Her campaign and the party have a combined cash reserve of $150 million, setting them up for a final spending splurge on television ads and outreach.'

In [9]:
results['response']['docs'][8]['headline']['main']

'The Monster Sorority of Women Voters'

In [10]:
len(results['response']['docs'])

9

## Loop to gather multiple titles

In [12]:
quant = 1

titles = []

for i in range(quant):
    headline = results['response']['docs'][i]['headline']['main']
    titles.append(headline)

In [13]:
titles

['Donald Trump Opens New Line of Attack on Hillary Clinton: Her Marriage']

## Loop to iterate through multiple days

#### And save it a dataset

In [14]:
months = ['10','11']
days = [str(i) for i in range(1,31)]       
titles = []

for j in months:
    for i in days:
        url = 'https://api.nytimes.com/svc/search/v2/articlesearch.json?q=("election" "trump" "clinton")&facet_field=day_of_week&facet=true&begin_date=2016'+j+i+'&end_date=2016'+j+i+'&api-key='+key
        response = requests.get(url)
        results = response.json()

        try:
            quant = len(results['response']['docs'])

            print (i)

            for y in range(quant):
                pubdate = results['response']['docs'][y]['pub_date'][:10]
                headline = results['response']['docs'][y]['headline']['main']
                snippet = results['response']['docs'][y]['snippet']
                article_dict = {'Date':pubdate, 'Headline': headline, 'Snippet': snippet }
                titles.append(article_dict)

        except KeyError:
            pass
            


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21


KeyboardInterrupt: 

In [195]:
df_titles_trump = pd.DataFrame(titles)


### Let's create seperate datasets for articles about Trump and about Clinton

In [123]:
# due to problems with the connection to the API had to individually pull certain days

#candidate = ["trump", "clinton"]
#months = ['10','11']
days = [str(i) for i in range(1,31)]       
titles_split_clint = []

#for z in candidate:
#for j in months:
for i in days:
    url = 'https://api.nytimes.com/svc/search/v2/articlesearch.json?q=("clinton")&facet_field=day_of_week&facet=true&begin_date=201611'+i+'&end_date=201611'+i+'&api-key='+key
    response = requests.get(url)
    results = response.json()

    try:
        #quant = len(results['response']['docs'])
        quant = 10

        print (i)

        for y in range(quant):
            pubdate = results['response']['docs'][y]['pub_date'][:10]
            candidate = 'clinton'
            headline = results['response']['docs'][y]['headline']['main']
            snippet = results['response']['docs'][y]['snippet']
            article_dict = {'Date':pubdate, 'Candidate':candidate, 'Headline': headline, 'Snippet': snippet }
            titles_split_clint.append(article_dict)

    except KeyError:
        pass
        
df_titles_split_clint = pd.DataFrame(titles_split_clint)
df_titles_split_clint.head()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30


Unnamed: 0,Candidate,Date,Headline,Snippet
0,clinton,2016-11-01,Confidence Even as Hillary Clinton’s Momentum ...,Mrs. Clinton’s campaign was optimistic that sh...
1,clinton,2016-11-01,Clinton Redirects Aim at Trump,After days of talking about the F.B.I. directo...
2,clinton,2016-11-01,Clinton-Connected Consulting Firm Sues Republi...,"The lawsuit, filed in New York State by Teneo,..."
3,clinton,2016-11-01,A Survivor With a Steely Resolve That Can Insp...,Hillary Clinton has endured many punches but s...
4,clinton,2016-11-01,"Hillary Clinton, Moving Past F.B.I. Review, Tu...",Mrs. Clinton’s remarks in Florida signaled the...


## Export dataset

In [188]:
df_titles.to_csv('data/nyt.csv', index=False)
df_titles_split.to_csv('data/nyt_split.csv', index=False)
df_titles_trump.to_csv('data/nyt_trump.csv', index=False)