In [5]:
import feedparser
import requests
from datetime import datetime, timedelta

def scrape_govexec_past_week(rss_url):
    feed = feedparser.parse(rss_url)
    week_ago = datetime.now() - timedelta(days=7)
    
    articles = []
    for entry in feed.entries:
        pub_date = datetime(*entry.published_parsed[:6])
        
        if pub_date >= week_ago:
            articles.append({
                'title': entry.title,
                'link': entry.link,
                'published': pub_date.strftime('%Y-%m-%d %H:%M'),
                'summary': entry.get('summary', ''),
                'content': entry.get('content', [{}])[0].get('value', '') if entry.get('content') else ''
            })
    
    return articles

rss_url = "https://govexec.com/rss/all/"
articles = scrape_govexec_past_week(rss_url)

print(f"Found {len(articles)} articles from the past week:")
for article in articles:
    print(f"\n{article['title']}")
    print(f"Published: {article['published']}")
    print(f"Link: {article['link']}")

Found 20 articles from the past week:

With DeJoy out, postal stakeholders push for pause in criticized Delivering for America overhaul plan
Published: 2025-06-24 23:00
Link: https://www.govexec.com/management/2025/06/dejoy-out-postal-stakeholders-push-pause-criticized-delivering-america-overhaul-plan/406292/

How shrinking the EPA could make wildfire smoke even more dangerous
Published: 2025-06-24 21:59
Link: https://www.govexec.com/federal-news/2025/06/how-shrinking-epa-could-make-wildfire-smoke-even-more-dangerous/406290/

Funding for further EHR deployments ‘vitally important,’ VA secretary says
Published: 2025-06-24 21:43
Link: https://www.govexec.com/management/2025/06/funding-further-ehr-deployments-vitally-important-va-secretary-says/406286/

Lawmakers spar over DOGE as Republicans look to lock in cuts
Published: 2025-06-24 20:30
Link: https://www.govexec.com/oversight/2025/06/lawmakers-spar-over-doge-republicans-look-lock-cuts/406283/

National Parks scramble to fill top leade

In [7]:
import feedparser
import requests
from datetime import datetime, timedelta
from bs4 import BeautifulSoup

def scrape_govexec_past_week(rss_url):
    feed = feedparser.parse(rss_url)
    week_ago = datetime.now() - timedelta(days=7)
    
    articles = []
    for entry in feed.entries:
        pub_date = datetime(*entry.published_parsed[:6])
        if pub_date >= week_ago:
            articles.append({
                'title': entry.title,
                'link': entry.link,
                'published': pub_date.strftime('%Y-%m-%d %H:%M')
            })
    return articles

# Get articles and extract <p> tags
articles = scrape_govexec_past_week("https://govexec.com/rss/all/")

for article in articles:
    try:
        response = requests.get(article['link'], headers={'User-Agent': 'Mozilla/5.0'})
        soup = BeautifulSoup(response.content, 'html.parser')
        paragraphs = [p.get_text(strip=True) for p in soup.find_all('p') if p.get_text(strip=True)]
        
        print(f"\n=== {article['title']} ===")
        for i, para in enumerate(paragraphs, 1):
            print(f"{i}. {para}")
            
    except Exception as e:
        print(f"Error with {article['title']}: {e}")


=== With DeJoy out, postal stakeholders push for pause in criticized Delivering for America overhaul plan ===
1. Rep. Kweisi Mfume, D-Md., on Capitol Hill on March 8, 2023. The lawmaker questioned the effectiveness of the U.S. Postal Service's "Delivering for America" reform plan at a hearing on Tuesday.Chip Somodevilla / Getty Images
2. Stay Connected
3. Sean Michael Newhouse
4. As a new leader takes the helm of the U.S. Postal Service, a House panel on Tuesday debated the future of the independent entity. While no member of Congress or expert witness offered a new comprehensive vision, there was near universal consensus on pausing and even reversing the recent reforms of former Postmaster General Louis DeJoy.
5. DeJoy’s 10-year plan to promote the financial sustainability of USPS, dubbed Delivering for America, started in 2021 and aimed atslowing some delivery but generate savings. Specifically, it required mail to sit overnight at post offices instead of being collected each evenin

In [11]:
import feedparser
import requests
from datetime import datetime, timedelta
from bs4 import BeautifulSoup

def scrape_govexec_past_week(rss_url):
    feed = feedparser.parse(rss_url)
    week_ago = datetime.now() - timedelta(days=7)
    return [{'title': entry.title, 'link': entry.link} 
            for entry in feed.entries 
            if datetime(*entry.published_parsed[:6]) >= week_ago]

def extract_clean_content(url):
    response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Title
    title = soup.select_one('h1.content-title')
    title = title.get_text(strip=True) if title else "No title"
    
    # Author  
    author = soup.select_one('span.authors-multiple a, a.gemg-author-link')
    author = author.get_text(strip=True) if author else "No author"
    
    # Content - only main article paragraphs
    content = soup.select('div.content-body p')
    paragraphs = [p.get_text(strip=True) for p in content if len(p.get_text(strip=True)) > 20]
    
    return {
        'title': title,
        'author': author, 
        'content': ' '.join(paragraphs)  # Single string instead of list
    }

# Extract from all articles
articles = scrape_govexec_past_week("https://govexec.com/rss/all/")

for article in articles[:1]:
    try:
        result = extract_clean_content(article['link'])
        print(f"TITLE: {result['title']}")
        print(f"AUTHOR: {result['author']}")
        print(f"CONTENT: {result['content']}...")  # First 200 chars
        print("-" * 50)
    except Exception as e:
        print(f"Error processing {article['title']}: {e}")

TITLE: With DeJoy out, postal stakeholders push for pause in criticized Delivering for America overhaul plan
AUTHOR: Sean Michael Newhouse
CONTENT: As a new leader takes the helm of the U.S. Postal Service, a House panel on Tuesday debated the future of the independent entity. While no member of Congress or expert witness offered a new comprehensive vision, there was near universal consensus on pausing and even reversing the recent reforms of former Postmaster General Louis DeJoy. DeJoy’s 10-year plan to promote the financial sustainability of USPS, dubbed Delivering for America, started in 2021 and aimed atslowing some delivery but generate savings. Specifically, it required mail to sit overnight at post offices instead of being collected each evening to be transported to a processing center and sought to consolidate processing plants into 60 regional distribution centers. Additionally, stamp prices haveincreased six timessince the start of DeJoy’s tenure and that amount is set to ris

In [14]:
import feedparser
import requests
from datetime import datetime, timedelta
from bs4 import BeautifulSoup
import re

def scrape_govexec_past_week(rss_url):
    feed = feedparser.parse(rss_url)
    week_ago = datetime.now() - timedelta(days=7)
    return [{'title': entry.title, 'link': entry.link} 
            for entry in feed.entries 
            if datetime(*entry.published_parsed[:6]) >= week_ago]

def extract_clean_content(url):
    response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Title
    title = soup.select_one('h1.content-title')
    title = title.get_text(strip=True) if title else "No title"
    
    # Author  
    author = soup.select_one('span.authors-multiple a, a.gemg-author-link')
    author = author.get_text(strip=True) if author else "No author"
    
    # Content - handle embedded links properly
    main_content = soup.select_one('div.content-body')
    paragraphs = []
    
    if main_content:
        # Remove unwanted elements
        for element in main_content.find_all(['svg', 'script', 'noscript']):
            element.decompose()
        
        # Process each paragraph
        for p in main_content.find_all('p'):
            # Get clean text content, preserving text from links
            text = p.get_text(separator=' ', strip=True)
            
            # Clean up extra whitespace
            text = re.sub(r'\s+', ' ', text).strip()
            
            # Filter out unwanted content
            if (len(text) > 30 and 
                not any(phrase in text for phrase in [
                    'Share your', 'NEXT STORY:', 'Help us tailor', 
                    'Thank you', 'Stay Connected', 'Newsletter page'
                ])):
                paragraphs.append(text)
    
    return {
        'title': title,
        'author': author, 
        'content': paragraphs
    }

# Process all articles
articles = scrape_govexec_past_week("https://govexec.com/rss/all/")

for article in articles:
    try:
        result = extract_clean_content(article['link'])
        
        print(f"TITLE: {result['title']}")
        print(f"AUTHOR: {result['author']}")
        print("CONTENT:")
        
        for i, para in enumerate(result['content'], 1):
            print(f"{i}. {para}")
        
        print("-" * 80)
        
    except Exception as e:
        print(f"Error processing {article['title']}: {e}")
        print("-" * 80)

TITLE: With DeJoy out, postal stakeholders push for pause in criticized Delivering for America overhaul plan
AUTHOR: Sean Michael Newhouse
CONTENT:
1. As a new leader takes the helm of the U.S. Postal Service, a House panel on Tuesday debated the future of the independent entity. While no member of Congress or expert witness offered a new comprehensive vision, there was near universal consensus on pausing and even reversing the recent reforms of former Postmaster General Louis DeJoy.
2. DeJoy’s 10-year plan to promote the financial sustainability of USPS, dubbed Delivering for America, started in 2021 and aimed at slowing some delivery but generate savings . Specifically, it required mail to sit overnight at post offices instead of being collected each evening to be transported to a processing center and sought to consolidate processing plants into 60 regional distribution centers.
3. Additionally, stamp prices have increased six times since the start of DeJoy’s tenure and that amount 

In [15]:
import feedparser
import requests
from datetime import datetime, timedelta
from bs4 import BeautifulSoup
import re
import json
import os

# Output directory
output_path = "/workspaces/langgraph/data/"

def scrape_govexec_past_week(rss_url):
    feed = feedparser.parse(rss_url)
    week_ago = datetime.now() - timedelta(days=7)
    return [{'title': entry.title, 'link': entry.link} 
            for entry in feed.entries 
            if datetime(*entry.published_parsed[:6]) >= week_ago]

def extract_clean_content(url):
    response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Title
    title = soup.select_one('h1.content-title')
    title = title.get_text(strip=True) if title else "No title"
    
    # Author  
    author = soup.select_one('span.authors-multiple a, a.gemg-author-link')
    author = author.get_text(strip=True) if author else "No author"
    
    # Content - handle embedded links properly
    main_content = soup.select_one('div.content-body')
    paragraphs = []
    
    if main_content:
        # Remove unwanted elements
        for element in main_content.find_all(['svg', 'script', 'noscript']):
            element.decompose()
        
        # Process each paragraph
        for p in main_content.find_all('p'):
            # Get clean text content, preserving text from links
            text = p.get_text(separator=' ', strip=True)
            
            # Clean up extra whitespace
            text = re.sub(r'\s+', ' ', text).strip()
            
            # Filter out unwanted content
            if (len(text) > 30 and 
                not any(phrase in text for phrase in [
                    'Share your', 'NEXT STORY:', 'Help us tailor', 
                    'Thank you', 'Stay Connected', 'Newsletter page'
                ])):
                paragraphs.append(text)
    
    return {
        'title': title,
        'author': author, 
        'content': paragraphs,
        'link': url  # Add the link to the returned data
    }

# Create output directory if it doesn't exist
os.makedirs(output_path, exist_ok=True)

# Process all articles
articles = scrape_govexec_past_week("https://govexec.com/rss/all/")
all_results = []  # Store all results for saving

print(f"Found {len(articles)} articles from the past week\n")

for i, article in enumerate(articles, 1):
    try:
        print(f"[{i}/{len(articles)}] Processing: {article['title']}")
        result = extract_clean_content(article['link'])
        
        # Add to results list
        all_results.append(result)
        
        # Your original console output
        print(f"TITLE: {result['title']}")
        print(f"AUTHOR: {result['author']}")
        print("CONTENT:")
        
        for j, para in enumerate(result['content'], 1):
            print(f"{j}. {para}")
        
        print("-" * 80)
        
    except Exception as e:
        print(f"Error processing {article['title']}: {e}")
        print("-" * 80)

# Also save as "latest" for easy loading
latest_filepath = os.path.join(output_path, "govexec_articles_latest.json")
with open(latest_filepath, 'w', encoding='utf-8') as f:
    json.dump(all_results, f, indent=2, ensure_ascii=False)

Found 20 articles from the past week

[1/20] Processing: With DeJoy out, postal stakeholders push for pause in criticized Delivering for America overhaul plan
TITLE: With DeJoy out, postal stakeholders push for pause in criticized Delivering for America overhaul plan
AUTHOR: Sean Michael Newhouse
CONTENT:
1. As a new leader takes the helm of the U.S. Postal Service, a House panel on Tuesday debated the future of the independent entity. While no member of Congress or expert witness offered a new comprehensive vision, there was near universal consensus on pausing and even reversing the recent reforms of former Postmaster General Louis DeJoy.
2. DeJoy’s 10-year plan to promote the financial sustainability of USPS, dubbed Delivering for America, started in 2021 and aimed at slowing some delivery but generate savings . Specifically, it required mail to sit overnight at post offices instead of being collected each evening to be transported to a processing center and sought to consolidate pro

In [None]:
# Example: How to load the data later
def load_saved_articles(filename="govexec_articles_latest.json"):
    """Load articles from saved JSON file"""
    filepath = os.path.join(output_path, filename)
    
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            articles = json.load(f)
        print(f"Loaded {len(articles)} articles from {filename}")
        return articles
    except FileNotFoundError:
        print(f"File {filename} not found in {output_path}")
        return []

# Uncomment to test loading:
loaded_articles = load_saved_articles()
if loaded_articles:
    print(f"First article: {loaded_articles[0]['title']}")