# Project 7: Load RSS Content and Extract Data from Each Link (Multithreading)

In [5]:
import os
import threading
import requests
import xml.etree.ElementTree as ET

rss_path = "./content/rss_feed.xml"
output_path = "./content/output.txt"

# --- Thread lock for safe file writing ---
write_lock = threading.Lock()

def fetch_and_write(url):
    """Fetch each RSS link and append its text content to output.txt"""
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()

        # Write to output file safely
        with write_lock:
            with open(output_path, "a", encoding="utf-8") as f:
                f.write(f"\n\n--- Content from: {url} ---\n")
                f.write(response.text)
        print(f" Fetched: {url}")

    except Exception as e:
        print(f" Failed to fetch {url}: {e}")


In [6]:
# --- Validate RSS file ---
if not os.path.exists(rss_path):
    print(" RSS XML file not found in /content folder.")
else:
    try:
        tree = ET.parse(rss_path)
        root = tree.getroot()

        # Extract <link> elements (RSS 2.0: channel/item/link)
        links = [item.find("link").text for item in root.findall("./channel/item") if item.find("link") is not None]

        if not links:
            print(" RSS file loaded but no links found.")
        else:
            print(f" RSS feed loaded: {len(links)} items")
            print(" Starting multi-threaded fetch...")

            # Clear output file before writing
            open(output_path, "w", encoding="utf-8").close()

            # --- Launch threads ---
            threads = []
            for link in links:
                t = threading.Thread(target=fetch_and_write, args=(link,))
                threads.append(t)
                t.start()

            # Wait for all threads to complete
            for t in threads:
                t.join()

            print(f" Completed. Output saved to {output_path}")

    except ET.ParseError:
        print(" RSS XML file is empty or invalid.")
    except Exception as e:
        print(f" Unexpected error: {e}")


 RSS feed loaded: 2 items
 Starting multi-threaded fetch...
 Fetched: https://www.w3schools.com/xml/xml_rss.asp
 Fetched: https://www.w3schools.com/xml
 Completed. Output saved to ./content/output.txt
