In [16]:
%pip install urllib3 requests pandas
import csv 

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [9]:
import requests
import csv
import os
from datetime import datetime
import logging

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

#send this to env for security
API_KEY = "MR11Skvb1Q6K1QB2Yqz196qwPT4CMNdPmlHGClj1n27MnXkYf571"
BASE_URL = "https://muckrack.com/api/saved-searches/oI8W/results/"
HEADERS = {"Authorization": f"Bearer {API_KEY}"}

def get_saved_search_results(page_size=100, max_results=None):
    """
    Fetch all results from the Muck Rack API, handling pagination.
    
    Args:
    page_size (int): Number of results per page (default 20)
    max_results (int): Maximum number of results to fetch (optional)
    
    Returns:
    list: All fetched results
    """
    all_results = []
    next_page = BASE_URL
    total_fetched = 0

    while next_page:
        logging.info(f"Fetching page: {next_page}")
        params = {"page_size": page_size}
        response = requests.get(next_page, headers=HEADERS, params=params)
        
        if response.status_code != 200:
            logging.error(f"Error fetching results: {response.status_code}")
            break

        data = response.json()
        if 'results' in data:
            new_results = data['results']
            all_results.extend(new_results)
            total_fetched += len(new_results)
            logging.info(f"Fetched {len(new_results)} results. Total: {total_fetched}")

        if max_results and total_fetched >= max_results:
            logging.info(f"Reached maximum results limit: {max_results}")
            break

        next_page = data.get('next')

    return all_results

def save_results_to_csv(results, filename):
    """
    Save the results to a CSV file.
    
    Args:
    results (list): List of result dictionaries
    filename (str): Name of the CSV file to save
    """
    fields = [
        'link_url', 'link_id', 'link_title', 'link_snippet', 'link_description',
        'key_terms', 'sentiment', 'date_published', 'date_modified', 'authors',
        'audience', 'content_type', 'media_outlet', 'scope', 'language',
        'location', 'domain_authority', 'page_authority'
    ]

    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fields)
        writer.writeheader()
        for result in results:
            row = {field: result.get(field, '') for field in fields}
            # Convert lists and dicts to strings
            for field in ['key_terms', 'authors', 'scope', 'language', 'location']:
                if isinstance(row[field], list):
                    row[field] = ', '.join(map(str, row[field]))
            if isinstance(row['media_outlet'], dict):
                row['media_outlet'] = f"{row['media_outlet'].get('id', '')}: {row['media_outlet'].get('title', '')}"
            writer.writerow(row)

    logging.info(f"Results saved to {filename}")

def main():
    # Set parameters
    page_size = 100
    max_results = 1000  # Set to None for all results

    # Fetch results
    all_results = get_saved_search_results(page_size, max_results)
    
    # Save results
    current_date = datetime.now().strftime("%Y%m%d_%H%M%S")
    save_path = os.path.expanduser("~x/Downloads")
    filename = os.path.join(save_path, f"muck_rack_results_{current_date}.csv")
    save_results_to_csv(all_results, filename)

    print(f"Total results fetched: {len(all_results)}")
    print(f"Results saved to {filename}")

if __name__ == "__main__":
    main()



2025-03-05 15:22:33,742 - INFO - Fetching page: https://muckrack.com/api/saved-searches/oI8W/results/
2025-03-05 15:22:36,192 - INFO - Fetched 100 results. Total: 100
2025-03-05 15:22:36,193 - INFO - Fetching page: https://muckrack.com/api/saved-searches/oI8W/results/?page_size=100&modified_since=2024-07-23T06%3A37%3A53
2025-03-05 15:22:39,402 - INFO - Fetched 100 results. Total: 200
2025-03-05 15:22:39,404 - INFO - Fetching page: https://muckrack.com/api/saved-searches/oI8W/results/?page_size=100&page_size=100&modified_since=2024-09-12T14%3A43%3A08
2025-03-05 15:22:47,536 - INFO - Fetched 100 results. Total: 300
2025-03-05 15:22:47,537 - INFO - Fetching page: https://muckrack.com/api/saved-searches/oI8W/results/?page_size=100&page_size=100&page_size=100&modified_since=2024-10-04T19%3A19%3A02
2025-03-05 15:22:52,177 - INFO - Fetched 100 results. Total: 400
2025-03-05 15:22:52,178 - INFO - Fetching page: https://muckrack.com/api/saved-searches/oI8W/results/?page_size=100&page_size=100&p

Total results fetched: 842
Results saved to /Users/krishna/Downloads/muck_rack_results_20250305_152325.csv
