# NYT API
To use the New York Times API, register for an [API key here](https://developer.nytimes.com/).

For more detailed explanation on using the API, see this resource:
https://nicksubic.medium.com/a-guide-to-querying-the-new-york-times-api-with-python-b621556236f8


In [1]:
import requests
from time import sleep
from secrets import nytAPIkey # either add the secret key variable to your secrets.py file 
                              # or save it as environmental variable.


## our parameters

In [2]:
# key
key = nytAPIkey

# query
query = 'migrant'

# base URL
base_url = f'https://api.nytimes.com/svc/search/v2/articlesearch.json?&q={query}&api-key={key}'

In [3]:
response = requests.get(base_url)

In [4]:
response

<Response [200]>

In [5]:
dir(response)

['__attrs__',
 '__bool__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__enter__',
 '__eq__',
 '__exit__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__nonzero__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_content',
 '_content_consumed',
 '_next',
 'apparent_encoding',
 'close',
 'connection',
 'content',
 'cookies',
 'elapsed',
 'encoding',
 'headers',
 'history',
 'is_permanent_redirect',
 'is_redirect',
 'iter_content',
 'iter_lines',
 'json',
 'links',
 'next',
 'ok',
 'raise_for_status',
 'raw',
 'reason',
 'request',
 'status_code',
 'text',
 'url']

In [6]:
parsed = response.json()

In [7]:
parsed.keys()

dict_keys(['status', 'copyright', 'response'])

In [9]:
parsed['response'].keys()

dict_keys(['docs', 'meta'])

In [10]:
parsed['response']['meta']

{'hits': 30812, 'offset': 0, 'time': 79}

In [11]:
articles = parsed['response']['docs']

In [13]:
type(articles)

list

In [14]:
articles[0].keys()

dict_keys(['abstract', 'web_url', 'snippet', 'lead_paragraph', 'print_section', 'print_page', 'source', 'multimedia', 'headline', 'keywords', 'pub_date', 'document_type', 'news_desk', 'section_name', 'byline', 'type_of_material', '_id', 'word_count', 'uri'])

In [15]:
len(articles)

10

In [16]:
results = []
for i in range(0, 5):
    url = f'https://api.nytimes.com/svc/search/v2/articlesearch.json?q=homelessness&page={i}&api-key={nytAPIkey}'
    response = requests.get(url)
    parsed = response.json()
    articles = parsed['response']['docs']
    results.append(articles)
    sleep(6) # sleep at least 6 seconds not to overload the servers

In [17]:
type(results)

list

In [18]:
len(results)

5

In [19]:
type(results[0][0])

dict

In [20]:
article = results[0][0]

In [21]:
article.keys()

dict_keys(['abstract', 'web_url', 'snippet', 'lead_paragraph', 'print_section', 'print_page', 'source', 'multimedia', 'headline', 'keywords', 'pub_date', 'document_type', 'news_desk', 'section_name', 'byline', 'type_of_material', '_id', 'word_count', 'uri'])

In [None]:
results = []
for i in range(0, 5):
    url = f'https://api.nytimes.com/svc/search/v2/articlesearch.json?q={query}&page={i}&api-key={key}'
    response = requests.get(url)
    parsed = response.json()
    articles = parsed['response']['docs']
    results.append(articles)
    sleep(15) # sleep at least 6 seconds not to overload the servers

### individual challenge: how to get just abstract of the first item in this new object?

In [None]:
type(results)

In [None]:
type(results[0])

In [None]:
# we have a dict within a list within a list
type(results[0][0])

In [None]:
results[0][0].keys()

In [None]:
results[0][0]['abstract']

In [22]:
# we need to do nested loops

for result in results:
    for article in result:
        print(article['abstract'])

A majority of the justices appeared skeptical of courts wading into the thorny policy questions around when local governments can punish people for sleeping and camping outdoors.
A group of homeless plaintiffs argue that local laws aimed at banning sleeping outside violated their constitutional rights. The city claims that’s not what the Eighth Amendment means.
As voters’ frustration has grown with sprawling tent encampments in public spaces, Democratic and Republican leaders alike have called for greater authority to ban such camping.
Trevon Murphy, who a family member said had a history of mental health problems, killed one man and injured two others.
A Democrat and a Republican from New York will introduce the “Michelle Go Act,” named after a woman who was killed when a mentally ill homeless man pushed her in front of a subway train.
The evictions are part of an aggressive strategy to force some adult migrants out of city shelters after 30 or 60 days.
He was ensnared in Ithaca’s hom

### group challenge: make a list of abstracts

In [25]:
abstracts = []
for result in results:
    for article in result:
        abstracts.append(article['abstract'])

In [26]:
len(abstracts)

50

In [27]:
abstracts[:10]

['A majority of the justices appeared skeptical of courts wading into the thorny policy questions around when local governments can punish people for sleeping and camping outdoors.',
 'A group of homeless plaintiffs argue that local laws aimed at banning sleeping outside violated their constitutional rights. The city claims that’s not what the Eighth Amendment means.',
 'As voters’ frustration has grown with sprawling tent encampments in public spaces, Democratic and Republican leaders alike have called for greater authority to ban such camping.',
 'Trevon Murphy, who a family member said had a history of mental health problems, killed one man and injured two others.',
 'A Democrat and a Republican from New York will introduce the “Michelle Go Act,” named after a woman who was killed when a mentally ill homeless man pushed her in front of a subway train.',
 'The evictions are part of an aggressive strategy to force some adult migrants out of city shelters after 30 or 60 days.',
 'He wa

### individual practice: explore other keys in our data & share

## text analysis with spaCy

In [28]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [29]:
docs = list(nlp.pipe(abstracts))

In [30]:
for doc in docs:
    print(doc)

A majority of the justices appeared skeptical of courts wading into the thorny policy questions around when local governments can punish people for sleeping and camping outdoors.
A group of homeless plaintiffs argue that local laws aimed at banning sleeping outside violated their constitutional rights. The city claims that’s not what the Eighth Amendment means.
As voters’ frustration has grown with sprawling tent encampments in public spaces, Democratic and Republican leaders alike have called for greater authority to ban such camping.
Trevon Murphy, who a family member said had a history of mental health problems, killed one man and injured two others.
A Democrat and a Republican from New York will introduce the “Michelle Go Act,” named after a woman who was killed when a mentally ill homeless man pushed her in front of a subway train.
The evictions are part of an aggressive strategy to force some adult migrants out of city shelters after 30 or 60 days.
He was ensnared in Ithaca’s hom

## tokens

In [31]:
for token in docs[0]:
    print(token) 

A
majority
of
the
justices
appeared
skeptical
of
courts
wading
into
the
thorny
policy
questions
around
when
local
governments
can
punish
people
for
sleeping
and
camping
outdoors
.


In [32]:
type(token)

spacy.tokens.token.Token

In [33]:
# see all the methods for Token objects!
dir(token)

['_',
 '__bytes__',
 '__class__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__len__',
 '__lt__',
 '__ne__',
 '__new__',
 '__pyx_vtable__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__unicode__',
 'ancestors',
 'check_flag',
 'children',
 'cluster',
 'conjuncts',
 'dep',
 'dep_',
 'doc',
 'ent_id',
 'ent_id_',
 'ent_iob',
 'ent_iob_',
 'ent_kb_id',
 'ent_kb_id_',
 'ent_type',
 'ent_type_',
 'get_extension',
 'has_dep',
 'has_extension',
 'has_head',
 'has_morph',
 'has_vector',
 'head',
 'i',
 'idx',
 'iob_strings',
 'is_alpha',
 'is_ancestor',
 'is_ascii',
 'is_bracket',
 'is_currency',
 'is_digit',
 'is_left_punct',
 'is_lower',
 'is_oov',
 'is_punct',
 'is_quote',
 'is_right_punct',
 'is_sent_end',
 'is_sent_start',
 'is_space',
 'is_stop',
 'is_title',
 'is_upper',
 'lang

In [34]:
for token in docs[0]:
    print(token, token.pos_, token.dep_)    

A DET det
majority NOUN nsubj
of ADP prep
the DET det
justices NOUN pobj
appeared VERB ROOT
skeptical ADJ oprd
of ADP prep
courts NOUN pobj
wading VERB acl
into ADP prep
the DET det
thorny ADJ amod
policy NOUN compound
questions NOUN pobj
around ADP prep
when SCONJ advmod
local ADJ amod
governments NOUN nsubj
can AUX aux
punish VERB advcl
people NOUN dobj
for ADP prep
sleeping VERB pobj
and CCONJ cc
camping NOUN compound
outdoors ADV conj
. PUNCT punct


## NER

In [35]:
# see docs (hah!) on Doc object on this page: https://spacy.io/api/doc

type(doc)

spacy.tokens.doc.Doc

In [None]:
dir(doc)

In [36]:
for doc in docs:
    for ent in doc.ents:
        print(ent.text, ent.label_)

the Eighth Amendment LAW
Democratic NORP
Republican NORP
Trevon Murphy PERSON
one CARDINAL
two CARDINAL
Democrat NORP
Republican NORP
New York GPE
30 or 60 days DATE
Ithaca GPE
New York City’s GPE
Morning After the Revolution WORK_OF_ART
Nellie Bowles PERSON
Midland GPE
Mich. GPE
about a year DATE
the Brooklyn Conservatory of Music ORG
California GPE
Oregon GPE
Supreme Court ORG
Mayday WORK_OF_ART
Nemat Shafik ORG
Hundreds of thousands CARDINAL
Hundreds of thousands CARDINAL
winters DATE
Julio Florencio Teo Gomez PERSON
one CARDINAL
Janna Volz’s PERSON
Jason Volz PERSON
More than a year DATE
Karen Bass PERSON
thousands CARDINAL
Carlton McPherson PERSON
Carlton McPherson PERSON
New York City GPE
more than $1 million MONEY
Proposition 1 LAW
$6.38 billion MONEY
Gavin Newsom PERSON
Three million CARDINAL
more than 400 CARDINAL
6,000 CARDINAL
Gavin Newsom PERSON
Proposition 1 LAW
California GPE
Chicago GPE
California GPE
Two CARDINAL
Democrats NORP
Cook County GPE
Chicago GPE
Democratic NOR

## word frequencies

In [37]:
from collections import Counter

words = []
for doc in docs:
    for token in doc:
        if not token.is_stop:
            if not token.is_punct:
                words.append(token.text)

word_freq = Counter(words)
common_words = word_freq.most_common(20)
print(common_words)

[('homeless', 12), ('people', 11), ('city', 8), ('homelessness', 7), ('New', 6), ('York', 6), ('shelters', 6), ('man', 5), ('City', 5), ('shelter', 5), ('family', 4), ('mental', 4), ('subway', 4), ('illness', 4), ('1', 4), ('camping', 3), ('laws', 3), ('public', 3), ('said', 3), ('problems', 3)]


## dependency 

In [38]:
from spacy import displacy
displacy.render(docs[0], style="dep", jupyter=True)

## BONUS: searching text by grammatical dependancy 
See this tutorial for more info: https://applied-language-technology.mooc.fi/html/notebooks/part_iii/03_pattern_matching.html

In [None]:
# first, join the docs into one string to process with matcher
# then run nlp() again

abstracts_string = ' '.join(abstracts)
doc = nlp(abstracts_string)

In [None]:
doc[:50]

In [None]:
from spacy.matcher import Matcher

# Create a Matcher and provide model vocabulary; assign result under the variable 'matcher'
matcher = Matcher(nlp.vocab)

# Call the variable to examine the object
matcher

In [None]:
# Define a list with nested dictionaries that contains the pattern to be matched
pronoun_verb = [{'POS': 'PRON'}, {'POS': 'VERB'}]

In [None]:
# Add the pattern to the matcher under the name 'pronoun+verb'
matcher.add("pronoun+verb", patterns=[pronoun_verb])

In [None]:
# Apply the Matcher to the Doc object under 'doc'; provide the argument
# 'as_spans' and set its value to True to get Spans as output\

matches = matcher(doc)

# Call the variable to examine the output
# result

In [None]:
matches

In [None]:
for match_id, start, end in matches:
    string_id = doc.vocab.strings[match_id]  # Look up string ID
    span = doc[start:end]
    print(string_id, span.text)