## Task 1: Web Scraping

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time


base_url = "https://news.ycombinator.com"


titles = []
scores = []
urls = []

# Scrape first two pages (to get ~30 posts)
for page in range(1, 3):
    url = base_url if page == 1 else f"{base_url}/news?p={page}"
    try:
        response = requests.get(url)
        response.raise_for_status()  # Check for HTTP errors
        soup = BeautifulSoup(response.text, "html.parser")

        # Find all post rows
        posts = soup.find_all("tr", class_="athing")
        for post in posts:
            # Title and URL
            titleline = post.find("span", class_="titleline")
            if titleline and titleline.a:  # Check if elements exist
                title = titleline.a.text
                url = titleline.a["href"]
                titles.append(title)
                urls.append(url)
                # Score (in the next <tr> element)
                score_elem = post.find_next_sibling("tr").find("span", class_="score")
                score = score_elem.text if score_elem else "0 points"
                scores.append(score)

        # Delay to respect server
        time.sleep(2)

    except requests.RequestException as e:
        print(f"Error fetching page {page}: {e}")
        continue

# Create DataFrame
data = {"Title": titles, "Score": scores, "URL": urls}
df = pd.DataFrame(data)

# Save to CSV
df.to_csv("hacker_news_posts2.csv", index=False)
df.head()  # Display first 5 rows



Unnamed: 0,Title,Score,URL
0,Asbestosis,79 points,https://diamondgeezer.blogspot.com/2025/10/asb...
1,Advent of Code 2025: Number of puzzles reduce ...,109 points,https://adventofcode.com/2025/about#faq_num_days
2,A worker fell into a nuclear reactor pool,441 points,https://www.nrc.gov/reading-rm/doc-collections...
3,Pico-Banana-400k,245 points,https://github.com/apple/pico-banana-400k
4,Clojure Land – Discover open-source Clojure li...,74 points,https://clojure.land/
5,My favorite cult sci-fi and fantasy books you ...,3 points,https://shepherd.com/best-books/cult-sci-fi-an...
6,Writing a RISC-V Emulator in Rust,32 points,https://book.rvemu.app/
7,The Linux Boot Process: From Power Button to K...,274 points,https://www.0xkato.xyz/linux-boot/
8,Eavesdropping on Internal Networks via Unencry...,25 points,https://satcom.sysnet.ucsd.edu/
9,LaserTweezer – Optical Trap,21 points,https://www.gaudi.ch/GaudiLabs/?page_id=578


In [None]:

df

Unnamed: 0,Title,Score,URL
0,Asbestosis,79 points,https://diamondgeezer.blogspot.com/2025/10/asb...
1,Advent of Code 2025: Number of puzzles reduce ...,109 points,https://adventofcode.com/2025/about#faq_num_days
2,A worker fell into a nuclear reactor pool,441 points,https://www.nrc.gov/reading-rm/doc-collections...
3,Pico-Banana-400k,245 points,https://github.com/apple/pico-banana-400k
4,Clojure Land – Discover open-source Clojure li...,74 points,https://clojure.land/
5,My favorite cult sci-fi and fantasy books you ...,3 points,https://shepherd.com/best-books/cult-sci-fi-an...
6,Writing a RISC-V Emulator in Rust,32 points,https://book.rvemu.app/
7,The Linux Boot Process: From Power Button to K...,274 points,https://www.0xkato.xyz/linux-boot/
8,Eavesdropping on Internal Networks via Unencry...,25 points,https://satcom.sysnet.ucsd.edu/
9,LaserTweezer – Optical Trap,21 points,https://www.gaudi.ch/GaudiLabs/?page_id=578
