In [1]:
# Step 1: Install libraries

import requests
from bs4 import BeautifulSoup
import pandas as pd
import json

In [7]:
# Step 2: Get HTML content from the site
url = "https://books.toscrape.com/catalogue/page-1.html"
response = requests.get(url)

if response.status_code != 200:
    raise Exception(f"Failed to fetch page: {response.status_code}")

html_content = response.text
soup = BeautifulSoup(html_content, "html.parser")

In [8]:

# Step 3: Extract book info
books = []
for article in soup.find_all("article", class_="product_pod"):
    title = article.h3.a["title"]
    price = article.find("p", class_="price_color").text.strip()
    rating = article.p["class"][1]  # e.g., "Three", "Five"

    books.append({
        "title": title,
        "price": price,
        "rating": rating
    })

books

[{'title': 'A Light in the Attic', 'price': 'Â£51.77', 'rating': 'Three'},
 {'title': 'Tipping the Velvet', 'price': 'Â£53.74', 'rating': 'One'},
 {'title': 'Soumission', 'price': 'Â£50.10', 'rating': 'One'},
 {'title': 'Sharp Objects', 'price': 'Â£47.82', 'rating': 'Four'},
 {'title': 'Sapiens: A Brief History of Humankind',
  'price': 'Â£54.23',
  'rating': 'Five'},
 {'title': 'The Requiem Red', 'price': 'Â£22.65', 'rating': 'One'},
 {'title': 'The Dirty Little Secrets of Getting Your Dream Job',
  'price': 'Â£33.34',
  'rating': 'Four'},
 {'title': 'The Coming Woman: A Novel Based on the Life of the Infamous Feminist, Victoria Woodhull',
  'price': 'Â£17.93',
  'rating': 'Three'},
 {'title': 'The Boys in the Boat: Nine Americans and Their Epic Quest for Gold at the 1936 Berlin Olympics',
  'price': 'Â£22.60',
  'rating': 'Four'},
 {'title': 'The Black Maria', 'price': 'Â£52.15', 'rating': 'One'},
 {'title': 'Starving Hearts (Triangular Trade Trilogy, #1)',
  'price': 'Â£13.99',
  'r

In [9]:
df_books = pd.DataFrame(books)
df_books

Unnamed: 0,title,price,rating
0,A Light in the Attic,Â£51.77,Three
1,Tipping the Velvet,Â£53.74,One
2,Soumission,Â£50.10,One
3,Sharp Objects,Â£47.82,Four
4,Sapiens: A Brief History of Humankind,Â£54.23,Five
5,The Requiem Red,Â£22.65,One
6,The Dirty Little Secrets of Getting Your Dream...,Â£33.34,Four
7,The Coming Woman: A Novel Based on the Life of...,Â£17.93,Three
8,The Boys in the Boat: Nine Americans and Their...,Â£22.60,Four
9,The Black Maria,Â£52.15,One


In [15]:
# Step 4: Get mock JSON data (this is just a placeholder API for teaching)
json_api_url = "https://jsonplaceholder.typicode.com/posts"
json_response = requests.get(json_api_url)

if json_response.status_code != 200:
    raise Exception(f"Failed to fetch JSON data: {json_response.status_code}")

json_data = json_response.json()
len(json_data)

100

Using open library

In [27]:
url = "https://openlibrary.org/search.json?q=A+light+in+the+attic"

response = requests.get(url)
response.json()

{'numFound': 36,
 'start': 0,
 'numFoundExact': True,
 'num_found': 36,
 'documentation_url': 'https://openlibrary.org/dev/docs/api/search',
 'q': 'A light in the attic',
 'offset': None,
 'docs': [{'author_key': ['OL548174A'],
   'author_name': ['Shel Silverstein'],
   'cover_edition_key': 'OL24753251M',
   'cover_i': 6806998,
   'ebook_access': 'printdisabled',
   'edition_count': 8,
   'first_publish_year': 1981,
   'has_fulltext': True,
   'ia': ['lightinatticsilv00silv', 'lightinatticsilv00silv'],
   'ia_collection_s': 'americana;internetarchivebooks;openlibrary-d-ol;printdisabled;stmaryscountylibrary',
   'key': '/works/OL15843795W',
   'language': ['spa', 'eng', 'chi'],
   'public_scan_b': False,
   'title': 'A light in the attic'},
  {'author_key': ['OL548174A'],
   'author_name': ['Shel Silverstein'],
   'cover_edition_key': 'OL26342869M',
   'cover_i': 8024958,
   'ebook_access': 'printdisabled',
   'edition_count': 1,
   'first_publish_year': 1996,
   'has_fulltext': True,
 

In [22]:
pd.DataFrame([json_data[i]['title'] for i in range(len(json_data))])

Unnamed: 0,0
0,sunt aut facere repellat provident occaecati e...
1,qui est esse
2,ea molestias quasi exercitationem repellat qui...
3,eum et est occaecati
4,nesciunt quas odio
...,...
95,quaerat velit veniam amet cupiditate aut numqu...
96,quas fugiat ut perspiciatis vero provident
97,laboriosam dolor voluptates
98,temporibus sit alias delectus eligendi possimu...


In [None]:
# Step 5: Merge extra info into our books dataframe (just simulate joining)
for i, book in enumerate(books):
    if i < len(json_data):
        book["extra_info"] = json_data[i]["title"]  

df_books = pd.DataFrame(books)
df_books

Unnamed: 0,title,price,rating,extra_info
0,A Light in the Attic,Â£51.77,Three,sunt aut facere repellat provident occaecati e...
1,Tipping the Velvet,Â£53.74,One,qui est esse
2,Soumission,Â£50.10,One,ea molestias quasi exercitationem repellat qui...
3,Sharp Objects,Â£47.82,Four,eum et est occaecati
4,Sapiens: A Brief History of Humankind,Â£54.23,Five,nesciunt quas odio
5,The Requiem Red,Â£22.65,One,dolorem eum magni eos aperiam quia
6,The Dirty Little Secrets of Getting Your Dream...,Â£33.34,Four,magnam facilis autem
7,The Coming Woman: A Novel Based on the Life of...,Â£17.93,Three,dolorem dolore est ipsam
8,The Boys in the Boat: Nine Americans and Their...,Â£22.60,Four,nesciunt iure omnis dolorem tempora et accusan...
9,The Black Maria,Â£52.15,One,optio molestias id quia eum


In [4]:
# Step 6: Save final dataset
df_books.to_csv("books_data.csv", index=False)
print("Data saved to books_data.csv")

Data saved to books_data.csv


In [5]:
# Step 7: Read the saved CSV
df_loaded = pd.read_csv("books_data.csv")
print("Loaded Data:")
print(df_loaded.head())


Loaded Data:
                                   title    price rating  \
0                   A Light in the Attic  Â£51.77  Three   
1                     Tipping the Velvet  Â£53.74    One   
2                             Soumission  Â£50.10    One   
3                          Sharp Objects  Â£47.82   Four   
4  Sapiens: A Brief History of Humankind  Â£54.23   Five   

                                          extra_info  
0  sunt aut facere repellat provident occaecati e...  
1                                       qui est esse  
2  ea molestias quasi exercitationem repellat qui...  
3                               eum et est occaecati  
4                                 nesciunt quas odio  


In [6]:
df_loaded

Unnamed: 0,title,price,rating,extra_info
0,A Light in the Attic,Â£51.77,Three,sunt aut facere repellat provident occaecati e...
1,Tipping the Velvet,Â£53.74,One,qui est esse
2,Soumission,Â£50.10,One,ea molestias quasi exercitationem repellat qui...
3,Sharp Objects,Â£47.82,Four,eum et est occaecati
4,Sapiens: A Brief History of Humankind,Â£54.23,Five,nesciunt quas odio
5,The Requiem Red,Â£22.65,One,dolorem eum magni eos aperiam quia
6,The Dirty Little Secrets of Getting Your Dream...,Â£33.34,Four,magnam facilis autem
7,The Coming Woman: A Novel Based on the Life of...,Â£17.93,Three,dolorem dolore est ipsam
8,The Boys in the Boat: Nine Americans and Their...,Â£22.60,Four,nesciunt iure omnis dolorem tempora et accusan...
9,The Black Maria,Â£52.15,One,optio molestias id quia eum
