In [8]:
import asyncio
from playwright.async_api import async_playwright
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import csv #

In [4]:
URL = "https://edition.cnn.com/politics"
USERAGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0'


In [9]:
def save_to_csv(headlines, filename="/data/politics.csv"):
    """
    Saves a list of headlines to a CSV file.
    """
    with open(filename, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Headline'])  # Write the header
        for headline in headlines:
            writer.writerow([headline]) # Write each headline as a new row
    print(f"Successfully saved {len(headlines)} headlines to {filename}")

In [11]:
async with async_playwright() as p:
        try:
            browser = await p.firefox.launch(headless=False)
            context = await browser.new_context(user_agent=USERAGENT)
            page = await context.new_page()

            print(f"Navigating to: {URL}")
            await page.goto(URL, wait_until="domcontentloaded")
            await page.wait_for_selector('.container__headline', state='visible', timeout=60000)

            html_content = await page.content()
            doc = BeautifulSoup(html_content, "html.parser")

            headlines = []
            # Find all elements that contain a headline
            headline_elements = doc.select('span.container__headline-text')

            # Extract headlines until we have 50 or run out of elements
            for element in headline_elements:
                headline_text = element.get_text(strip=True)
                if headline_text:
                    headlines.append(headline_text)
                if len(headlines) >= 50:
                    break

            for i, headline in enumerate(headlines):
                print(f"{i+1}. {headline}")

            print(f"\nSuccessfully scraped {len(headlines)} headlines.")
            
            save_to_csv(headlines)

        except Exception as e:
            print(f"An error occurred: {e}")
        finally:
            if 'browser' in locals():
                await browser.close()

Navigating to: https://edition.cnn.com/politics
1. Officials have been planning for weeks to send National Guard to Chicago as Trump seeks to expand crime crackdown
2. ‘Clever and a little bit offensive’: Inside the White House’s norm-breaking social media strategy
3. House Oversight Committee Democrats say most Epstein files turned over by DOJ were already public
4. Justice Department declines to defend grants for Hispanic-serving colleges, calling them unconstitutional
5. Trump administration might deport Kilmar Abrego Garcia to Uganda
6. Privately influential but publicly absent, Melania Trump is picking and choosing her moments this term
7. Trump team keeps giving away the game on its retribution crusade
8. What the 2020 investigation of John Bolton says about the new probe
9. It’s time to redefine the ‘swing voter’
10. Who is John Bolton? What to know about Trump’s former national security adviser
11. A week after Trump embraced Putin, the Ukraine peace effort is going nowhere
12.

In [None]:
print(headlines)



In [None]:
# Print the results
for pub in headlines:
    print("---")
    print(f"Title: {pub['title']}")
    print(f"Title Link: {pub['title_link']}")
    print(f"Published Date: {pub['published_date']}")
    print(f"Abstract: {pub['abstract']}")
    print(f"Authors: {pub['authors']}")
    print(f"Keywords: {pub['keywords']}")
    print(f"Subject Areas: {pub['subject_areas']}")
    print("---")

---
Title: A Qard Hassan (Benevolent Loan) Crowdfunding Model for Refugee Finance
Title Link: https://pureportal.coventry.ac.uk/en/publications/a-qard-hassan-benevolent-loan-crowdfunding-model-for-refugee-fina
Published Date: 11 Feb 2025
Abstract: Access to adequate finance can significantly impact the well-being of refugees. Lack of access to funds can lead to poor living conditions, limited access to education and health care and a lack of economic opportunities. While the refugee finance gap is particularly acute in low- and middle-income countries, refugees in both advanced and emerging countries are affected due to the lack of regulations and relevant financial products. There is a lack of awareness of the financial needs of refugees among financial institutions. Therefore, we adopt a qualitative research design and propose a conceptual Qard Hassan-based crowdfunding model to reduce the global refugee finance gap. Our conceptual model is unique and can provide greater flexibility 

In [20]:
import json
# Save the results to a JSON file
with open('../data/publications_data.json', 'w') as f:
    json.dump(all_publications_data, f, indent=4)
print("Data saved to publications_data.json")

Data saved to publications_data.json
