In [None]:
# Import necessary libraries
import json
import requests
import pandas as pd

In [None]:
# API key for The Guardian's API
guardian_key = 'your_key'

In [None]:
# API endpoint for The Guardian's search functionality
url = f'https://content.guardianapis.com/search?page=1&page-size=200&api-key={guardian_key}'

In [None]:
# Initial API call to test the response
response = requests.get(url)

In [None]:
# Parse the JSON response from the API
guardian_response = response.json()

In [None]:
# Extracting the list of results from the response
result_list = guardian_response['response']['results']

In [None]:
# Accessing the headline of the first result
first_headline = result_list[0]['webTitle']

'Burnley v Watford: Premier League – live!'

In [None]:
# Accessing the publication date of the first result
first_publication_date = result_list[0]['webPublicationDate']

'2020-06-25T18:41:01Z'

In [None]:
# Define a function to make a call to The Guardian API
def guardian_call(params):
    """
    Function to perform an API call to The Guardian API.

    :param params: Dictionary containing the parameters for the API call.
    :return: A JSON object containing the API response.
    """
    response = requests.get(url, params)
    return json.loads(response.text)

# Example use of the 'guardian_call' function
articles = guardian_call(params)

In [None]:
# Extracting the first article from the response
first_article = articles['response']['results'][0]

{'id': 'science/2020/jun/12/aubrey-burl-obituary',
 'type': 'article',
 'sectionId': 'books',
 'sectionName': 'Books',
 'webPublicationDate': '2020-06-12T16:56:58Z',
 'webTitle': 'Aubrey Burl obituary',
 'webUrl': 'https://www.theguardian.com/science/2020/jun/12/aubrey-burl-obituary',
 'apiUrl': 'https://content.guardianapis.com/science/2020/jun/12/aubrey-burl-obituary',
 'isHosted': False,
 'pillarId': 'pillar/arts',
 'pillarName': 'Arts'}

In [None]:
# Define a function to parse results from the API response
def parse_results(results):
    """
    Function to parse the results from The Guardian API response.

    :param results: A list of article results from the API.
    :return: A list of parsed results, each as a list containing the title and publication date.
    """
    parsed_result = []
    for article in results:
        article_data = [article['webTitle'], article['webPublicationDate']]
        parsed_result.append(article_data)
    return parsed_result

In [None]:
# Example use of the 'parse_results' function
parsed_articles = parse_results(articles['response']['results'])

In [None]:
# Display parsed results using pandas DataFrame for visualization
pd.DataFrame(parsed_articles, columns=['Title', 'Publication Date']).head()

Unnamed: 0,0,1
0,Aubrey Burl obituary,2020-06-12T16:56:58Z
1,Belgian transport minister in row over 34-mile...,2020-06-12T16:48:58Z
2,Amusement arcades furious after No 10 blocks r...,2020-06-12T16:44:33Z
3,JK Rowling: UK domestic abuse adviser writes t...,2020-06-12T16:43:50Z
4,London fashion week drops elitist traditions a...,2020-06-12T16:42:57Z


In [None]:
# Creating a DataFrame with specified columns
columns = ['text', 'date']
df = pd.DataFrame(columns=columns) # Create an empty DataFrame
df.to_csv('guardian_scraped_titles_nonclickbait.csv') # Save the empty DataFrame to a CSV file

In [None]:
# Define a function to save parsed data to a CSV file
def data_save(parsed_results, csv_filename):
    """
    Function to save parsed results into a CSV file.

    :param parsed_results: List of parsed article data.
    :param csv_filename: Name of the CSV file to save the data to.
    """
    existing = pd.read_csv(csv_filename, index_col=0)
    new_data = pd.DataFrame(parsed_results, columns=columns)
    updated_df = pd.concat([existing, new_data])
    updated_df.to_csv(csv_filename)

In [None]:
# Script to call The Guardian API, parse the results, and save them to a CSV file

# Initialize a variable to keep track of the current page number in the API results
current_page = 16

# Set up a while loop to iterate through the API results pages
while current_page < 19:
    # Update the 'page' parameter to request the current page of results
    params['page'] = current_page

    # Perform an API call with the updated page parameter
    api_results = guardian_call(params)

    # Parse the results obtained from the API
    # This converts the results into a list of articles, each represented as a list containing the title and publication date
    parsed_results = parse_results(api_results['response']['results'])

    # Save the parsed results to a CSV file
    # This function appends the new results to any existing data in the specified CSV file
    data_save(parsed_results, 'guardian_scraped_titles_nonclickbait.csv')

    # Increment the page counter
    # This moves the script to the next set of results in the subsequent iteration of the loop
    current_page += 1

In [None]:
# Load and display the final DataFrame
guardian_df = pd.read_csv('guardian_scraped_titles_nonclickbait.csv', index_col=0)
print(guardian_df)

Unnamed: 0,text,date
0,Family court judges given power to intervene i...,2020-06-24T23:01:29Z
1,Children at RNIB schools and homes put at risk...,2020-06-24T23:01:29Z
2,Child abuse victims to sue Celtic FC in landma...,2020-06-24T23:01:28Z
3,"Cryptic crossword No 28,169",2020-06-24T23:00:28Z
4,Trent Alexander-Arnold the spark for Liverpool...,2020-06-24T22:55:55Z
...,...,...
195,Jigsy review – Les Dennis sheds tears of a clo...,2020-06-09T13:01:21Z
196,What's the secret to great jollof rice? | Kitc...,2020-06-09T13:00:49Z
197,UK students: what impact is coronavirus having...,2020-06-09T12:39:15Z
198,IBM quits facial-recognition market over polic...,2020-06-09T12:37:27Z
