# Setup

In [103]:
# Navigate file system
import os

# Access API secrets in .env file
%load_ext dotenv
%dotenv

# Accessing API
import requests

# Datetime utilities
import time
from datetime import date, timedelta
import dateutil
from dateutil.relativedelta import relativedelta

# Dataset exploration
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

# Output formatting
from pprint import pprint

# Notebook settings
import warnings
warnings.filterwarnings('ignore')

The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv


In [105]:
GUARDIAN_API_KEY=os.getenv('GUARDIAN_API_KEY')
API_ENDPOINT = 'https://content.guardianapis.com/'

In [157]:
start_date = date(2021, 9, 27)
end_date = date(2022, 9, 27)

In [177]:
date_ranges = []

def create_date_ranges(start_date, end_date):
    num_of_months = (end_date.year - start_date.year) * 12 +  (end_date.month - start_date.month)
    print(f"Number of months: {num_of_months}")
    for month in range(1, num_of_months):
        new_end_date = start_date + relativedelta(months=1) - timedelta(days=1)
        date_ranges.append((start_date.strftime('%Y-%m-%d'), new_end_date.strftime('%Y-%m-%d')))
        start_date = start_date + relativedelta(months=1)
    last_month_start = new_end_date + relativedelta(days=1)
    last_month_end = end_date
    date_ranges.append((last_month_start.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d')))
    print(f"Last months in date range: {last_month_start}, {last_month_end}")

In [178]:
create_date_ranges(start_date=start_date, end_date=end_date)

Number of months: 12
Last months in date range: 2022-08-27, 2022-09-27


In [179]:
date_ranges

[('2021-09-27', '2021-10-26'),
 ('2021-10-27', '2021-11-26'),
 ('2021-11-27', '2021-12-26'),
 ('2021-12-27', '2022-01-26'),
 ('2022-01-27', '2022-02-26'),
 ('2022-02-27', '2022-03-26'),
 ('2022-03-27', '2022-04-26'),
 ('2022-04-27', '2022-05-26'),
 ('2022-05-27', '2022-06-26'),
 ('2022-06-27', '2022-07-26'),
 ('2022-07-27', '2022-08-26'),
 ('2022-08-27', '2022-09-27')]

In [107]:
params = {
    'from-date': '2022-09-26',
    'to-date': '2022-09-27',
    'show-fields': 'all',
    'page-size': '50',
    'page': '1',
    'api-key': GUARDIAN_API_KEY
}

In [121]:
test = requests.get(f"https://content.guardianapis.com/search?&from-date=2021-09-27&to-date=2022-09-27&show-fields=all&page-size=50&api-key={GUARDIAN_API_KEY}").json()['response']#['total']

In [122]:
print('Total items in date range: ',test['total'])
print('Current page: ',test['currentPage'])
print('Page size: ',test['pageSize'])
print('Number of pages: ',test['pages'])

Total items in date range:  78764
Current page:  1
Page size:  50
Number of pages:  1576


In [97]:
search_response = requests.get(f"https://content.guardianapis.com/search?&from-date=2022-09-27&show-fields=all&api-key={GUARDIAN_API_KEY}").json()['response']
results = search_response['results']
for page in range(2, search_response['pages'] + 1):
    #print(page)
    search_response = search_response = requests.get(f"https://content.guardianapis.com/search?&from-date=2022-09-27&show-fields=all&page={page}&api-key={GUARDIAN_API_KEY}").json()['response']
    results.extend(search_response['results'])


In [98]:
len(results)

326

In [101]:
pd.json_normalize(results).columns

Index(['id', 'type', 'sectionId', 'sectionName', 'webPublicationDate',
       'webTitle', 'webUrl', 'apiUrl', 'isHosted', 'pillarId', 'pillarName',
       'fields.headline', 'fields.standfirst', 'fields.trailText',
       'fields.byline', 'fields.main', 'fields.body', 'fields.wordcount',
       'fields.commentCloseDate', 'fields.commentable',
       'fields.firstPublicationDate', 'fields.isInappropriateForSponsorship',
       'fields.isPremoderated', 'fields.lastModified',
       'fields.liveBloggingNow', 'fields.productionOffice',
       'fields.publication', 'fields.shortUrl', 'fields.shouldHideAdverts',
       'fields.showInRelatedContent', 'fields.thumbnail',
       'fields.legallySensitive', 'fields.lang', 'fields.isLive',
       'fields.bodyText', 'fields.charCount', 'fields.shouldHideReaderRevenue',
       'fields.showAffiliateLinks', 'fields.bylineHtml',
       'fields.newspaperPageNumber', 'fields.starRating',
       'fields.newspaperEditionDate', 'fields.sensitive',
       'f

In [108]:
pd.json_normalize(results).head()

Unnamed: 0,id,type,sectionId,sectionName,webPublicationDate,webTitle,webUrl,apiUrl,isHosted,pillarId,pillarName,fields.headline,fields.standfirst,fields.trailText,fields.byline,fields.main,fields.body,fields.wordcount,fields.commentCloseDate,fields.commentable,fields.firstPublicationDate,fields.isInappropriateForSponsorship,fields.isPremoderated,fields.lastModified,fields.liveBloggingNow,fields.productionOffice,fields.publication,fields.shortUrl,fields.shouldHideAdverts,fields.showInRelatedContent,fields.thumbnail,fields.legallySensitive,fields.lang,fields.isLive,fields.bodyText,fields.charCount,fields.shouldHideReaderRevenue,fields.showAffiliateLinks,fields.bylineHtml,fields.newspaperPageNumber,fields.starRating,fields.newspaperEditionDate,fields.sensitive,fields.displayHint
0,politics/live/2022/sep/28/keir-starmer-mini-bu...,liveblog,politics,Politics,2022-09-28T11:24:57Z,Angela Rayner urges Labour to celebrate its ac...,https://www.theguardian.com/politics/live/2022...,https://content.guardianapis.com/politics/live...,False,pillar/news,News,Angela Rayner urges Labour to celebrate its ac...,<p>Deputy Labour leader’s speech met with appl...,Deputy Labour leader’s speech met with applaus...,Andrew Sparrow,"<figure class=""element element-atom""> \n <gu-a...","<div id=""block-63342dfe8f08ef7e8107bdaa"" class...",4215,2022-10-01T06:30:00Z,True,2022-09-28T06:36:45Z,False,False,2022-09-28T11:24:57Z,True,UK,theguardian.com,https://www.theguardian.com/p/mbgd8,False,True,https://media.guim.co.uk/8437f055cb5ecad45bcd9...,False,en,True,"Rayner turns to Kwasi Kwarteng, the chancellor...",24098,False,False,"<a href=""profile/andrewsparrow"">Andrew Sparrow...",,,,,
1,world/live/2022/sep/28/russia-ukraine-war-live...,liveblog,world,World news,2022-09-28T11:16:47Z,Russia-Ukraine war: Moscow denies attacking No...,https://www.theguardian.com/world/live/2022/se...,https://content.guardianapis.com/world/live/20...,False,pillar/news,News,Russia-Ukraine war: Moscow denies attacking No...,<p>Claims that Russia was behind attack on th...,Claims that Russia was behind attack on the ga...,Léonie Chao-Fong (now); Martin Belam and Tess ...,"<figure class=""element element-atom""> \n <gu-a...","<div id=""block-63342bf08f08ef7e8107bd93"" class...",3990,,,2022-09-28T04:28:50Z,False,False,2022-09-28T11:16:47Z,True,AUS,theguardian.com,https://www.theguardian.com/p/mbgvg,True,True,https://media.guim.co.uk/95b9dcff18cb013b33dff...,False,en,True,The Russian-installed leaders of Ukraine’s Luh...,25661,False,False,"<a href=""profile/leonie-chao-fong"">Léonie Chao...",,,,,
2,sport/live/2022/sep/28/county-cricket-yorkshir...,liveblog,sport,Sport,2022-09-28T11:15:09Z,"County cricket: Yorkshire v Gloucestershire, W...",https://www.theguardian.com/sport/live/2022/se...,https://content.guardianapis.com/sport/live/20...,False,pillar/sport,Sport,"County cricket: Yorkshire v Gloucestershire, W...",<ul><li>Day three of the last round of County ...,Join Tanya Aldred for updates from the last ro...,Tanya Aldred at Headingley,"<figure class=""element element-image"" data-med...","<div id=""block-63342cb28f086841b84c39d5"" class...",1087,2022-10-01T08:54:49Z,True,2022-09-28T08:54:49Z,False,False,2022-09-28T11:15:09Z,True,UK,theguardian.com,https://www.theguardian.com/p/mbgp4,False,True,https://media.guim.co.uk/d14adc985aa52987e830c...,False,en,True,Make that Surrey 190 for nine. A glimpse round...,6411,False,False,"<a href=""profile/tanyaaldred"">Tanya Aldred</a>...",,,,,
3,business/live/2022/sep/28/sterling-slumps-imf-...,liveblog,business,Business,2022-09-28T11:11:14Z,Pound tumbles despite Bank of England interven...,https://www.theguardian.com/business/live/2022...,https://content.guardianapis.com/business/live...,False,pillar/news,News,Pound tumbles despite Bank of England interven...,<p>Bank’s emergency move brings relief to bond...,Bank’s emergency move brings relief to bond ma...,Julia Kollewe,"<figure class=""element element-image"" data-med...","<div id=""block-63342b258f086841b84c39d0"" class...",5181,,,2022-09-28T06:24:04Z,False,False,2022-09-28T11:20:41Z,True,UK,theguardian.com,https://www.theguardian.com/p/mbe4b,False,True,https://media.guim.co.uk/4f53ab951fccc0ef9f634...,False,en,True,Sterling has tumbled 1.5% against the dollar t...,30928,False,False,"<a href=""profile/juliakollewe"">Julia Kollewe</a>",,,,,
4,tv-and-radio/2022/sep/28/the-old-man-review-je...,article,tv-and-radio,Television & radio,2022-09-28T11:09:30Z,The Old Man review – Jeff Bridges can barely p...,https://www.theguardian.com/tv-and-radio/2022/...,https://content.guardianapis.com/tv-and-radio/...,False,pillar/arts,Arts,The Old Man review – Jeff Bridges can barely p...,<p>Bridges plays an ex-CIA agent who has been ...,Bridges plays an ex-CIA agent who has been in ...,Rebecca Nicholson,"<figure class=""element element-image"" data-med...",<p>The first episode of The Old Man (Disney+) ...,749,2022-10-01T11:15:00Z,True,2022-09-28T11:09:30Z,False,False,2022-09-28T11:11:21Z,,UK,The Guardian,https://www.theguardian.com/p/mbeda,False,True,https://media.guim.co.uk/02d208b44fbf742bd24f6...,False,en,True,The first episode of The Old Man (Disney+) is ...,4131,False,False,"<a href=""profile/rebeccanicholson"">Rebecca Nic...",10.0,4.0,2022-09-29T00:00:00Z,,


In [99]:
results[0]

{'id': 'politics/live/2022/sep/28/keir-starmer-mini-budget-truss-kwarteng-labour-conference-rayner-uk-politics-news-live',
 'type': 'liveblog',
 'sectionId': 'politics',
 'sectionName': 'Politics',
 'webPublicationDate': '2022-09-28T11:24:57Z',
 'webTitle': 'Angela Rayner urges Labour to celebrate its achievements instead of talking ‘endlessly’ about its failures – UK politics live',
 'webUrl': 'https://www.theguardian.com/politics/live/2022/sep/28/keir-starmer-mini-budget-truss-kwarteng-labour-conference-rayner-uk-politics-news-live',
 'apiUrl': 'https://content.guardianapis.com/politics/live/2022/sep/28/keir-starmer-mini-budget-truss-kwarteng-labour-conference-rayner-uk-politics-news-live',
 'fields': {'headline': 'Angela Rayner urges Labour to celebrate its achievements instead of talking ‘endlessly’ about its failures – UK politics live',
  'standfirst': '<p>Deputy Labour leader’s speech met with applause and cheers as she urges delegates to celebrate successes</p>',
  'trailText':