# Washington Post Newswhip analysis

By Ben Welsh

## Import

Pull in our Python tools

In [166]:
import json
import time
import pathlib
from datetime import datetime, timezone

In [167]:
import requests
import pandas as pd
import altair as alt
from dateutil.relativedelta import relativedelta

## Download

Fetch data from the Newswhipe API

In [141]:
api_endpoint = 'https://api.newswhip.com/v1/stats?key=DevKey'

In [None]:
First get a range of months from the start of the API until this month.

In [142]:
today = datetime.now()

In [143]:
this_month = datetime(today.year, today.month, 1, 0, 0, 0, tzinfo=timezone.utc)

In [144]:
start_date = datetime(2014, 2, 1, 0, 0, 0, tzinfo=timezone.utc)

In [145]:
month_range = []

In [146]:
while start_date <= this_month:
    month_range.append(start_date)
    start_date += relativedelta(months=1)

Break that into paired start and end dates

In [147]:
month_sets = []

In [148]:
for i, m in list(enumerate(month_range))[1:]:
    try:
        month_sets.append([m, month_range[i+1]])
    except IndexError:
        continue

Download each month's file into a JSON on disk

In [149]:
to_unix = lambda x: int(x.timestamp()) * 1000

In [150]:
def get_payload(from_date, to_date):
    return json.dumps({
        'filters':['publisher:washingtonpost.com'],
        'aggregate_by':'publisher',
        'sort_by': "fb_total.count",
        'from': to_unix(from_date),
        'to': to_unix(to_date),
    })

In [174]:
def get_newswhip_totals(from_date, to_date):
    json_path = pathlib.Path(f"./data/{from_date}.json")
    if json_path.exists():
        return json.load(open(json_path, 'r'))
    else:
        data = get_payload(from_date, to_date)
        r = requests.post(url=api_endpoint, data=data)
        results = r.json()
        json.dump(results, open(f"./data/{from_date}.json", "w"), indent=2)
        return results

In [180]:
results_dict = {}

In [181]:
for from_date, to_date in month_sets:
    results_dict[from_date] = get_newswhip_totals(from_date, to_date)

## Consolidate

Merge all the JSON files into a single dataframe with the values we want to analyze

In [198]:
def parse_results(r):
    return {
        'month': r[0],
        'article_count': r[1][0]['stats']['fb_total']['count'],
        'engagements': r[1][0]['stats']['fb_total']['sum'],
    }

In [202]:
result_list = []

In [203]:
for r in results_dict.items():
    result_list.append(parse_results(r))

In [204]:
df = pd.DataFrame(result_list)

In [207]:
df.tail(12)

Unnamed: 0,month,article_count,engagements
89,2021-08-01 00:00:00+00:00,6483,10416600.0
90,2021-09-01 00:00:00+00:00,7446,8521194.0
91,2021-10-01 00:00:00+00:00,7821,7437303.0
92,2021-11-01 00:00:00+00:00,8283,7814014.0
93,2021-12-01 00:00:00+00:00,6863,8526550.0
94,2022-01-01 00:00:00+00:00,7415,9288538.0
95,2022-02-01 00:00:00+00:00,7746,7889832.0
96,2022-03-01 00:00:00+00:00,8183,9690655.0
97,2022-04-01 00:00:00+00:00,6135,6837711.0
98,2022-05-01 00:00:00+00:00,6354,8206302.0


## Analyze

In [210]:
df['rolling_average'] = df.article_count.rolling(12).mean()

In [212]:
chart = alt.Chart(df).encode(
    x=alt.X("month:O"),
)

bars = chart.mark_bar().encode(
    y=alt.Y("article_count:Q")
)

line = chart.mark_line().encode(
    y=alt.Y("rolling_average:Q")
)

(bars + line).properties(width=500)

## Export

In [213]:
df.to_csv("./totals.csv", index=False)