In [3]:
import requests
import pandas as pd
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By

# Scrape Books data

In [4]:
base_url = "https://books.toscrape.com/catalogue/page-{}.html"
books = []

for page in range(1, 51):
    url = base_url.format(page)
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    for book in soup.find_all('article', class_='product_pod'):
        title = book.h3.a['title']
        price = book.find('p', class_='price_color').text.strip()
        availability = book.find('p', class_='instock availability').text.strip()
        star_rating = book.p['class'][1]

        books.append([title, price, availability, star_rating])

df_books = pd.DataFrame(books, columns=['Title', 'Price', 'Availability', 'Star Rating'])
df_books.to_csv("books.csv", index=False)
print("books.csv saved")


books.csv saved


# Scrape IMDB Top 250 Movies

In [5]:
url = "https://www.imdb.com/chart/top/"
driver = webdriver.Chrome()
driver.get(url)
time.sleep(3)

movies = []
rows = driver.find_elements(By.XPATH, '//tbody[@class="lister-list"]/tr')

for row in rows:
    rank = row.find_element(By.XPATH, './/td[@class="titleColumn"]').text.split('.')[0]
    title = row.find_element(By.XPATH, './/td[@class="titleColumn"]/a').text
    year = row.find_element(By.XPATH, './/td[@class="titleColumn"]/span').text.strip("()")
    rating = row.find_element(By.XPATH, './/td[@class="ratingColumn imdbRating"]/strong').text
    movies.append([rank, title, year, rating])

driver.quit()

df_imdb = pd.DataFrame(movies, columns=['Rank', 'Title', 'Year', 'Rating'])
df_imdb.to_csv("imdb_top250.csv", index=False)
print("imdb_top250.csv saved")


imdb_top250.csv saved


Scrape Weather Data

In [6]:
url = "https://www.timeanddate.com/weather/"
response = requests.get(url)
soup = BeautifulSoup(response.text, "lxml")

cities_data = []
table = soup.find("table", class_="zebra")

if table:
    for row in table.find_all("tr")[1:]:
        cols = row.find_all("td")
        if len(cols) >= 3:
            city = cols[0].text.strip()
            temp = cols[1].text.strip()
            condition = cols[2].text.strip()
            cities_data.append([city, temp, condition])

    df_weather = pd.DataFrame(cities_data, columns=["City", "Temperature", "Condition"])
    df_weather.to_csv("weather.csv", index=False)
    print("weather.csv saved with", len(df_weather), "rows")
else:
    print("Weather table not found! Verify the table structure and class name.")


weather.csv saved with 70 rows
