In [18]:
from waybacknews.searchapi import SearchApiClient
from mediacloud import api
from datetime import datetime
from IPython.display import display, HTML
import pandas as pd


In [12]:
# Query parameters
query_term = 'police shooting'
start =  datetime(2023, 8, 24)
end = datetime.today()
language = "en"

In [14]:
domains = ['nytimes.com','cnn.com','foxnews.com','nypost.com','washingtonpost.com','usatoday.com','cnbc.com',
              'theguardian.com','breakingnews.com','buzzfeed.com','cbsnews.com','reuters.com','huffingtonpost.com',
              'usnews.com','latimes.com','politico.com','newsweek.com','breitbart.com',]
domains_str = f"domain:({' OR '.join(domains)})"

query = f"{query_term} AND language:{language} AND {domains_str}"
query

'police shooting AND language:en AND domain:(nytimes.com OR cnn.com OR foxnews.com OR nypost.com OR washingtonpost.com OR usatoday.com OR cnbc.com OR theguardian.com OR breakingnews.com OR buzzfeed.com OR cbsnews.com OR reuters.com OR huffingtonpost.com OR usnews.com OR latimes.com OR politico.com OR newsweek.com OR breitbart.com)'

In [15]:
# Instantiate API
api = SearchApiClient("mediacloud")
article_generator = api.all_articles(query, start, end)



In [16]:
# Run Summary Queries
count   = api.count(query, start, end)
sources = api.top_sources(query, start, end)
tlds    = api.top_tlds(query, start, end)

In [19]:
# Display Query
pattern = '%B %d, %Y %H:%M'
display(HTML("<h3>Query</h3>"))
print(query)
print(f"{start.strftime(pattern)} - {end.strftime(pattern)}")

# Display Summaries of Results
n = 25
display(HTML("<h3>Summary of results</h3>"))
print(f"Found {count} articles\n")
display(pd.DataFrame(sources).head(n).style.set_caption(f"Top {n} sources"))
display(pd.DataFrame(tlds).head(n).style.set_caption(f"Top {n} tlds"))

police shooting AND language:en AND domain:(nytimes.com OR cnn.com OR foxnews.com OR nypost.com OR washingtonpost.com OR usatoday.com OR cnbc.com OR theguardian.com OR breakingnews.com OR buzzfeed.com OR cbsnews.com OR reuters.com OR huffingtonpost.com OR usnews.com OR latimes.com OR politico.com OR newsweek.com OR breitbart.com)
August 24, 2023 00:00 - November 07, 2023 11:51


Found 3151 articles



ImportError: Missing optional dependency 'Jinja2'. DataFrame.style requires jinja2. Use pip or conda to install Jinja2.

In [20]:
articles = []
for list_of_articles in article_generator:
    articles += list_of_articles    

print(f"all_articles endpoint: {len(articles)} articles")
print(f"count endpoint: {count} articles")

all_articles endpoint: 3151 articles
count endpoint: 3151 articles


In [22]:
stories = pd.DataFrame(articles)\
        .sort_values(by='publication_date', ascending=False)

stories.to_csv('data_storage/stories_df.csv', index=False)

stories

Unnamed: 0,title,publication_date,capture_time,language,domain,url,original_capture_url,archive_playback_url,article_url
2737,Biden Mourns With Lewiston After Mass Shooting...,2023-11-03,2023-11-04T04:40:34Z,en,nytimes.com,https://www.nytimes.com/2023/11/03/us/politics...,https://web.archive.org/web/20231104044034id_/...,https://web.archive.org/web/20231104044034/htt...,https://wayback-api.archive.org/colsearch/v1/m...
2735,Murder Victim’s Daughter Helps Clear Brooklyn ...,2023-11-03,2023-11-04T03:38:55Z,en,nytimes.com,https://www.nytimes.com/2023/11/03/nyregion/br...,https://web.archive.org/web/20231104033855id_/...,https://web.archive.org/web/20231104033855/htt...,https://wayback-api.archive.org/colsearch/v1/m...
325,"Biden mourns the loss of ""18 precious souls"" i...",2023-11-03,2023-11-04T04:42:18Z,en,cbsnews.com,https://www.cbsnews.com/live-updates/joe-biden...,https://web.archive.org/web/20231104044218id_/...,https://web.archive.org/web/20231104044218/htt...,https://wayback-api.archive.org/colsearch/v1/m...
1943,WI sheriff’s deputy fatally shoots armed fugit...,2023-11-03,2023-11-04T02:10:47Z,en,foxnews.com,https://www.foxnews.com/us/wi-sheriffs-deputy-...,https://web.archive.org/web/20231104021047id_/...,https://web.archive.org/web/20231104021047/htt...,https://wayback-api.archive.org/colsearch/v1/m...
1953,"2 dead, 1 injured in Mexico City-area graveyar...",2023-11-03,2023-11-04T02:21:01Z,en,foxnews.com,https://www.foxnews.com/world/2-dead-1-injured...,https://web.archive.org/web/20231104022101id_/...,https://web.archive.org/web/20231104022101/htt...,https://wayback-api.archive.org/colsearch/v1/m...
...,...,...,...,...,...,...,...,...,...
570,Woman injured in Miami Gardens shooting,2023-08-24,2023-08-26T07:04:35Z,en,cbsnews.com,https://www.cbsnews.com/miami/video/woman-inju...,https://web.archive.org/web/20230826070435id_/...,https://web.archive.org/web/20230826070435/htt...,https://wayback-api.archive.org/colsearch/v1/m...
590,Dispatcher fatally shot in Arkansas ambulance ...,2023-08-24,2023-08-26T05:59:57Z,en,cbsnews.com,https://www.cbsnews.com/news/cassandra-pena-ro...,https://web.archive.org/web/20230826055957id_/...,https://web.archive.org/web/20230826055957/htt...,https://wayback-api.archive.org/colsearch/v1/m...
593,Colorado father killed after confronting alleg...,2023-08-24,2023-08-26T06:11:35Z,en,cbsnews.com,https://www.cbsnews.com/news/colorado-father-k...,https://web.archive.org/web/20230826061135id_/...,https://web.archive.org/web/20230826061135/htt...,https://wayback-api.archive.org/colsearch/v1/m...
1894,Texas man who shot at apartment burglary suspe...,2023-08-24,2023-08-26T06:57:03Z,en,foxnews.com,https://www.foxnews.com/us/texas-man-shot-apar...,https://web.archive.org/web/20230826065703id_/...,https://web.archive.org/web/20230826065703/htt...,https://wayback-api.archive.org/colsearch/v1/m...
