In [2]:
import requests
import json
import time
import os

# Your Guardian API Key
GUARDIAN_API_KEY = '437d9f3b-d32c-4598-a9e4-cd67f692569e'

# Base URL for the Guardian content API
BASE_URL = 'https://content.guardianapis.com/search'

# Search parameters (last 6 months)
begin_date = '2024-10-25'  # October 25, 2024
end_date = '2025-04-25'    # April 25, 2025

# Artist list
artists = [
    "Kanye West", "Sabrina Carpenter", "Kendrick Lamar", "SZA",
    "Drake", "Chappell Roan", "Post Malone", "Lady Gaga",
    "Benson Boone", "Billie Eilish", "The Weeknd", "Gracie Abrams",
    "Teddy Swims", "Taylor Swift", "Bad Bunny", "Ariana Grande",
    "Travis Scott", "Rihanna", "Ed Sheeran", "Beyoncé",
    "Bruno Mars", "Charli XCX"
]

# Function to fetch articles
def fetch_guardian_articles(artist, from_date, to_date, pages=5):
    all_articles = []
    
    for page in range(1, pages + 1):
        params = {
            'q': artist,
            'from-date': from_date,
            'to-date': to_date,
            'api-key': GUARDIAN_API_KEY,
            'page': page,
            'page-size': 50,  # Guardian allows up to 50 per page
            'show-fields': 'trailText,bodyText',
            'show-tags': 'keyword'
        }
        
        response = requests.get(BASE_URL, params=params)
        
        if response.status_code == 200:
            data = response.json()
            results = data['response']['results']
            
            if not results:
                print(f"No articles found on page {page} for '{artist}'. Stopping further requests.")
                break  # No more results
            
            for item in results:
                tags = [tag['webTitle'] for tag in item.get('tags', [])]

                # Filter: Keep articles where artist name is mentioned in tags or title
                if not any(artist.lower() in tag.lower() for tag in tags) and artist.lower() not in item['webTitle'].lower():
                    continue  # skip if artist isn't clearly referenced

                article = {
                    'headline': item['webTitle'],
                    'trailText': item['fields'].get('trailText', ''),
                    'bodyText': item['fields'].get('bodyText', ''),
                    'web_url': item['webUrl'],
                    'pub_date': item['webPublicationDate'],
                    'section_name': item.get('sectionName', ''),
                    'tags': tags
                }
                all_articles.append(article)
            
            time.sleep(2)  # small polite sleep (Guardian is chill but be nice)
        
        elif response.status_code == 429:
            print("Rate limited. Sleeping for 60 seconds...")
            time.sleep(60)
            continue  # Try again after waiting
        
        else:
            print(f"Error: {response.status_code}")
            break

    return all_articles

# Create a folder to save all Guardian articles
os.makedirs('guardian_artist_articles', exist_ok=True)

# Loop through all artists and fetch/save articles
for artist in artists:
    print(f"Fetching Guardian articles for {artist}...")
    articles = fetch_guardian_articles(artist, begin_date, end_date, pages=5)
    
    # Save to a JSON file
    safe_artist_name = artist.lower().replace(" ", "_")
    filename = f'guardian_artist_articles/{safe_artist_name}.json'
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(articles, f, indent=4)
    
    if not articles:
        print(f"WARNING: No articles found for {artist}!\n")
    else:
        print(f"Saved {len(articles)} articles for {artist}.\n")

print("All Guardian artist articles collected successfully.")


Fetching Guardian articles for Kanye West...
Saved 11 articles for Kanye West.

Fetching Guardian articles for Sabrina Carpenter...
Saved 19 articles for Sabrina Carpenter.

Fetching Guardian articles for Kendrick Lamar...
Error: 400
Saved 34 articles for Kendrick Lamar.

Fetching Guardian articles for SZA...
Error: 400
Saved 7 articles for SZA.

Fetching Guardian articles for Drake...
Error: 400
Saved 15 articles for Drake.

Fetching Guardian articles for Chappell Roan...
Error: 400
Saved 23 articles for Chappell Roan.

Fetching Guardian articles for Post Malone...
Saved 2 articles for Post Malone.

Fetching Guardian articles for Lady Gaga...
Error: 400
Saved 21 articles for Lady Gaga.

Fetching Guardian articles for Benson Boone...
Error: 400

Fetching Guardian articles for Billie Eilish...
Error: 400
Saved 17 articles for Billie Eilish.

Fetching Guardian articles for The Weeknd...
Error: 400
Saved 4 articles for The Weeknd.

Fetching Guardian articles for Gracie Abrams...
Error: 40