In [None]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import pandas as pd

base = "https://books.toscrape.com/"
url = base
rows = []
while url:
    r = requests.get(url, timeout=30)
    r.raise_for_status()
    soup = BeautifulSoup(r.text, "html.parser")
    for p in soup.select("article.product_pod"):
        title = p.h3.a.get("title","").strip()
        price = p.select_one("p.price_color").get_text(strip=True).replace("Â","")
        avail = " ".join(p.select_one("p.instock.availability").get_text(strip=True).split())
        star_elt = p.select_one("p.star-rating")
        star = ""
        if star_elt and star_elt.has_attr("class"):
            for c in star_elt["class"]:
                if c in {"One","Two","Three","Four","Five"}:
                    star = c
                    break
        rows.append({"Title":title,"Price":price,"Availability":avail,"Star Rating":star})
    next_link = soup.select_one("li.next a")
    url = urljoin(url, next_link["href"]) if next_link else None
df = pd.DataFrame(rows)
df.to_csv("books.csv", index=False)
df.head()


Unnamed: 0,Title,Price,Availability,Star Rating
0,A Light in the Attic,£51.77,In stock,Three
1,Tipping the Velvet,£53.74,In stock,One
2,Soumission,£50.10,In stock,One
3,Sharp Objects,£47.82,In stock,Four
4,Sapiens: A Brief History of Humankind,£54.23,In stock,Five


In [None]:
!pip install selenium webdriver-manager

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import re
from webdriver_manager.chrome import ChromeDriverManager

opt = webdriver.ChromeOptions()
opt.add_argument("--headless=new")
opt.add_argument("--no-sandbox")
opt.add_argument("--disable-dev-shm-usage")
driver = webdriver.Chrome(ChromeDriverManager().install(), options=opt)
driver.get("https://www.imdb.com/chart/top/")
wait = WebDriverWait(driver, 20)
rows = []
try:
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "main")))
    items = driver.find_elements(By.CSS_SELECTOR, 'ul.ipc-metadata-list li.ipc-metadata-list-summary-item')
    if items:
        for li in items:
            h = li.find_element(By.CSS_SELECTOR, "h3.ipc-title__text").text.strip()
            m = re.match(r"(\\d+)\\.\\s+(.*)\\s+\\((\\d{4})\\)", h)
            if not m:
                parts = h.split(". ",1)
                rank = parts[0].strip()
                rest = parts[1] if len(parts)>1 else ""
                ym = re.search(r"\\((\\d{4})\\)", rest)
                year = ym.group(1) if ym else ""
                title = re.sub(r"\\s*\\(\\d{4}\\)\\s*$","",rest).strip()
            else:
                rank, title, year = m.group(1), m.group(2), m.group(3)
            try:
                rating = li.find_element(By.CSS_SELECTOR, '[data-testid="ratingGroup--imdb-rating"] span.ipc-rating-star--rating').text.strip()
            except:
                try:
                    rating = li.find_element(By.CSS_SELECTOR, "span.ipc-rating-star--rating").text.strip()
                except:
                    rating = ""
            rows.append({"Rank":int(rank),"Movie Title":title,"Year of Release":year,"IMDB Rating":rating})
    else:
        table = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "tbody.lister-list")))
        for tr in table.find_elements(By.CSS_SELECTOR, "tr"):
            rank_title = tr.find_element(By.CSS_SELECTOR, "td.titleColumn").text.strip()
            m = re.match(r"(\\d+)\\.\\s+(.*)\\s*\\((\\d{4})\\)", rank_title)
            if m:
                rank, title, year = int(m.group(1)), m.group(2), m.group(3)
            else:
                parts = rank_title.split(". ",1)
                rank = int(parts[0])
                rest = parts[1] if len(parts)>1 else ""
                ym = re.search(r"\\((\\d{4})\\)", rest)
                year = ym.group(1) if ym else ""
                title = re.sub(r"\\s*\\(\\d{4}\\)\\s*$","",rest).strip()
            try:
                rating = tr.find_element(By.CSS_SELECTOR, "td.imdbRating strong").text.strip()
            except:
                rating = ""
            rows.append({"Rank":rank,"Movie Title":title,"Year of Release":year,"IMDB Rating":rating})
finally:
    driver.quit()
rows.sort(key=lambda x: x.get("Rank", 0))
df = pd.DataFrame(rows)
df.to_csv("imdb_top250.csv", index=False)
df.head()


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

r = requests.get("https://www.timeanddate.com/weather/", timeout=30)
r.raise_for_status()
soup = BeautifulSoup(r.text, "html.parser")
table = soup.select_one("table.tb-wt") or soup.select_one("table.zebra.tb-wt") or soup.find("table")
headers = [th.get_text(strip=True) for th in table.select("thead th")] if table and table.select("thead th") else [th.get_text(strip=True) for th in table.select("tr th")]
idx = {h:i for i,h in enumerate(headers)}
rows = []
for tr in table.select("tbody tr"):
    tds = tr.find_all("td")
    if not tds:
        continue
    city = tr.find("a").get_text(strip=True) if tr.find("a") else tds[idx.get("City",0)].get_text(strip=True)
    temp = ""
    weather = ""
    if "Temp" in idx:
        temp = tds[idx["Temp"]].get_text(strip=True)
    else:
        for td in tds:
            if "°" in td.get_text():
                temp = td.get_text(strip=True)
                break
    if "Weather" in idx:
        weather = tds[idx["Weather"]].get_text(strip=True)
    else:
        weather = tds[-1].get_text(strip=True)
    rows.append({"City Name":city,"Temperature":temp,"Weather Condition":weather})
df = pd.DataFrame(rows)
df.to_csv("weather.csv", index=False)
df.head()
