### 1. Write a python program which searches all the product under a particular product from www.amazon.in. The product to be searched will be taken as input from user. For e.g. If user input is ‘guitar’. Then search for guitars.

In [1]:
import requests
from bs4 import BeautifulSoup
import time

product_to_search = input("Enter the product you want to search for on Amazon: ")

search_url = "https://www.amazon.in/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=" + product_to_search

print("Constructed URL:", search_url)
time.sleep(5) 

response = requests.get(search_url)

if response.status_code == 200:
    soup = BeautifulSoup(response.content, 'html.parser')
    product_containers = soup.find_all('div', class_='s-result-item')

    if product_containers:
        for container in product_containers:
            product_title = container.find('span', class_='a-size-medium a-color-base a-text-normal').text.strip()
            product_price_element = container.find('span', class_='a-offscreen')
            product_price = product_price_element.text.strip() if product_price_element else "-"
            print("Product:", product_title)
            print("Price:", product_price)
            print("-----------------------------------------")
    else:
        print("No products found for '{}'".format(product_to_search))

Enter the product you want to search for on Amazon:  guitar


Constructed URL: https://www.amazon.in/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=guitar


### 2. In the above question, now scrape the following details of each product listed in first 3 pages of your search results and save it in a data frame and csv. In case if any product has less than 3 pages in search results then scrape all the products available under that product name. Details to be scraped are: "Brand Name", "Name of the Product", "Price", "Return/Exchange", "Expected Delivery", "Availability" and “Product URL”. In case, if any of the details are missing for any of the product then replace it by “-“.

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

product_to_search = input("Enter the product you want to search for on Amazon: ")

search_url = "https://www.amazon.in/s?k=" + product_to_search
print("Constructed URL:", search_url)

brand_names = []
product_names = []
product_prices = []
return_exchanges = []
expected_deliveries = []
availabilities = []
product_urls = []

for page_number in range(1, 4):
    current_page_url = search_url + "&page=" + str(page_number)
    print("Current Page URL:", current_page_url)
    time.sleep(5)  

    response = requests.get(current_page_url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        product_containers = soup.find_all('div', class_='s-result-item')

        if product_containers:
            for container in product_containers:
                product_link = container.find('a', class_='a-link-normal')
                if product_link:
                    product_url = product_link.get('href')
                    if product_url and not product_url.startswith('http'):
                        product_url = "https://www.amazon.in" + product_url

                    product_response = requests.get(product_url)

                    if product_response.status_code == 200:
                        product_soup = BeautifulSoup(product_response.content, 'html.parser')
                        brand_name = product_soup.find('a', class_='a-link-normal').text.strip()
                        product_name_element = product_soup.find('span', id='productTitle')
                        product_name = product_name_element.text.strip() if product_name_element else "-"
                        product_price_element = product_soup.find('span', id='priceblock_ourprice')
                        product_price = product_price_element.text.strip() if product_price_element else "-"
                        return_exchange = product_soup.find('div', class_='a-section a-spacing-mini').text.strip().replace('\n', '').replace('  ', '')
                        expected_delivery_tag = product_soup.find('div', id='ddmDeliveryMessage')
                        expected_delivery = expected_delivery_tag.text.strip() if expected_delivery_tag else "-"
                        availability_tag = product_soup.find('div', id='availability')
                        availability = availability_tag.text.strip() if availability_tag else "-"

                        brand_names.append(brand_name)
                        product_names.append(product_name)
                        product_prices.append(product_price)
                        return_exchanges.append(return_exchange)
                        expected_deliveries.append(expected_delivery)
                        availabilities.append(availability)
                        product_urls.append(product_url)
                    else:
                        print("Failed to retrieve product details for:", product_url)
                else:
                    print("Product link not found on page {} for '{}'".format(page_number, product_to_search))
        else:
            print("No products found on page {} for '{}'".format(page_number, product_to_search))
    else:
        print("Failed to retrieve search results for page {}".format(page_number))

brand_names = [name if name else "-" for name in brand_names]
product_names = [name if name else "-" for name in product_names]
product_prices = [price if price else "-" for price in product_prices]
return_exchanges = [exchange if exchange else "-" for exchange in return_exchanges]
expected_deliveries = [delivery if delivery else "-" for delivery in expected_deliveries]
availabilities = [availability if availability else "-" for availability in availabilities]
product_urls = [url if url else "-" for url in product_urls]

data = {
    'Brand Name': brand_names,
    'Name of the Product': product_names,
    'Price': product_prices,
    'Return/Exchange': return_exchanges,
    'Expected Delivery': expected_deliveries,
    'Availability': availabilities,
    'Product URL': product_urls
}
df = pd.DataFrame(data)

df.to_csv('amazon_products.csv', index=False)

print("Scraping and saving of data completed.")

Enter the product you want to search for on Amazon:  shoes


Constructed URL: https://www.amazon.in/s?k=shoes
Current Page URL: https://www.amazon.in/s?k=shoes&page=1
Failed to retrieve search results for page 1
Current Page URL: https://www.amazon.in/s?k=shoes&page=2
Failed to retrieve search results for page 2
Current Page URL: https://www.amazon.in/s?k=shoes&page=3
Failed to retrieve search results for page 3
Scraping and saving of data completed.


### 4. Write a python program to search for a smartphone(e.g.: Oneplus Nord, pixel 4A, etc.) on www.flipkart.com and scrape following details for all the search results displayed on 1st page. Details to be scraped: “Brand Name”, “Smartphone name”, “Colour”, “RAM”, “Storage(ROM)”, “Primary Camera”, “Secondary Camera”, “Display Size”, “Battery Capacity”, “Price”, “Product URL”. Incase if any of the details is missing then replace it by “- “. Save your results in a dataframe and CSV.

In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_flipkart_smartphones(search_query):
    # Construct the search URL
    search_url = f"https://www.flipkart.com/search?q={search_query}&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off"

    response = requests.get(search_url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')

        product_containers = soup.find_all('div', class_='_1AtVbE')

        data = {
            'Brand Name': [],
            'Smartphone Name': [],
            'Colour': [],
            'RAM': [],
            'Storage(ROM)': [],
            'Primary Camera': [],
            'Secondary Camera': [],
            'Display Size': [],
            'Battery Capacity': [],
            'Price': [],
            'Product URL': []
        }

        for container in product_containers:
            brand_name_elem = container.find('div', class_='_4rR01T')
            brand_name = brand_name_elem.text.strip() if brand_name_elem else "-"
            data['Brand Name'].append(brand_name)

       
            smartphone_name_elem = container.find('a', class_='IRpwTa')
            smartphone_name = smartphone_name_elem.text.strip() if smartphone_name_elem else "-"
            data['Smartphone Name'].append(smartphone_name)

        
            colour_elem = container.find('a', class_='IRpwTa')
            colour = colour_elem.text.strip() if colour_elem else "-"
            data['Colour'].append(colour)
            ram_elem = container.find(string='RAM')
            ram = ram_elem.find_next('li').text.strip() if ram_elem else "-"
            data['RAM'].append(ram)
            storage_elem = container.find(string='Storage')
            storage = storage_elem.find_next('li').text.strip() if storage_elem else "-"
            data['Storage(ROM)'].append(storage)
            primary_camera_elem = container.find(string='Primary Camera')
            primary_camera = primary_camera_elem.find_next('li').text.strip() if primary_camera_elem else "-"
            data['Primary Camera'].append(primary_camera)
            secondary_camera_elem = container.find(string='Secondary Camera')
            secondary_camera = secondary_camera_elem.find_next('li').text.strip() if secondary_camera_elem else "-"
            data['Secondary Camera'].append(secondary_camera)
            display_size_elem = container.find(string='Display Size')
            display_size = display_size_elem.find_next('li').text.strip() if display_size_elem else "-"
            data['Display Size'].append(display_size)
            battery_capacity_elem = container.find(string='Battery Capacity')
            battery_capacity = battery_capacity_elem.find_next('li').text.strip() if battery_capacity_elem else "-"
            data['Battery Capacity'].append(battery_capacity)
            price_elem = container.find('div', class_='_30jeq3 _1_WHN1')
            price = price_elem.text.strip() if price_elem else "-"
            data['Price'].append(price)
            product_url_elem = container.find('a', class_='IRpwTa')
            product_url = 'https://www.flipkart.com' + product_url_elem['href'] if product_url_elem else "-"
            data['Product URL'].append(product_url)
        df = pd.DataFrame(data)
        df.to_csv('flipkart_smartphones.csv', index=False)

        print("Data scraped and saved successfully.")

    else:
        print("Failed to retrieve search results.")

search_query = input("Enter the smartphone you want to search for on Flipkart: ")
scrape_flipkart_smartphones(search_query)

Enter the smartphone you want to search for on Flipkart:  watch


Data scraped and saved successfully.


### 6. Write a program to scrap all the available details of best gaming laptops from digit.in.

In [4]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_gaming_laptops():
    url = 'https://www.digit.in/top-products/best-gaming-laptops-40.html'
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        laptops = soup.find_all('div', class_='TopNumbeHeading sticky-footer')
        data = {
            'Name': [],
            'Price': [],
            'Processor': [],
            'RAM': [],
            'OS': [],
            'Display': [],
            'Resolution': [],
            'Battery': []
        }

        for laptop in laptops:
            data['Name'].append(laptop.h3.a.text.strip())
            specs = laptop.find_next('div', class_='Section-center')
            data['Price'].append(specs.find('div', class_='smprice').text.strip())
            data['Processor'].append(specs.find_all('div', class_='value')[0].text.strip())
            data['RAM'].append(specs.find_all('div', class_='value')[1].text.strip())
            data['OS'].append(specs.find_all('div', class_='value')[2].text.strip())
            data['Display'].append(specs.find_all('div', class_='value')[3].text.strip())
            data['Resolution'].append(specs.find_all('div', class_='value')[4].text.strip())
            data['Battery'].append(specs.find_all('div', class_='value')[5].text.strip())

        df = pd.DataFrame(data)
        df.to_csv('gaming_laptops.csv', index=False)
        print("Data scraped and saved successfully.")
    else:
        print("Failed to retrieve data from digit.in")

scrape_gaming_laptops()

Data scraped and saved successfully.


### 7. Write a python program to scrape the details for all billionaires from www.forbes.com. Details to be scrapped: “Rank”, “Name”, “Net worth”, “Age”, “Citizenship”, “Source”, “Industry”.

In [5]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_billionaires():
    url = "https://www.forbes.com/billionaires/"
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        billionaires = soup.find_all('div', class_='personName')

        data = {
            'Rank': [],
            'Name': [],
            'Net Worth': [],
            'Age': [],
            'Citizenship': [],
            'Source': [],
            'Industry': []
        }

        for i, billionaire in enumerate(billionaires, start=1):
            data['Rank'].append(i)
            data['Name'].append(billionaire.text.strip())
            details = billionaire.find_next('div', class_='netWorth').text.strip().split('\n')
            data['Net Worth'].append(details[0])
            data['Age'].append(details[1])
            data['Citizenship'].append(details[2])
            data['Source'].append(details[3])
            data['Industry'].append(details[4])

        df = pd.DataFrame(data)
        df.to_csv('billionaires.csv', index=False)
        print("Data scraped and saved successfully.")
    else:
        print("Failed to retrieve data from Forbes.")

scrape_billionaires()

Data scraped and saved successfully.


### 9. Write a python program to scrape a data for all available Hostels from https://www.hostelworld.com/ in “London” location. You have to scrape hostel name, distance from city centre, ratings, total reviews, overall reviews, privates from price, dorms from price, facilities and property description.

In [6]:
import requests
from bs4 import BeautifulSoup

def scrape_hostels_in_london():
    url = "https://www.hostelworld.com/s?q=London,%20England&country=England&city=London&type=city&id=3&from=2022-03-08&to=2022-03-11&guests=1&page=1"
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')

        hostel_listings = soup.find_all('div', class_='fabresult')

        for hostel in hostel_listings:
            name = hostel.find('h2', class_='title-2').text.strip()
            distance = hostel.find('span', class_='label').text.strip()
            ratings = hostel.find('div', class_='score').text.strip()
            total_reviews = hostel.find('div', class_='reviews').text.strip()
            overall_reviews = hostel.find('div', class_='rating').text.strip()
            privates_price = hostel.find('div', class_='price').text.strip()
            dorms_price = hostel.find('div', class_='dorms').text.strip()
            facilities = [item.text.strip() for item in hostel.find_all('span', class_='rating-factors')]
            description = hostel.find('div', class_='desc').text.strip()

            print("Name:", name)
            print("Distance from city centre:", distance)
            print("Ratings:", ratings)
            print("Total reviews:", total_reviews)
            print("Overall reviews:", overall_reviews)
            print("Privates from price:", privates_price)
            print("Dorms from price:", dorms_price)
            print("Facilities:", facilities)
            print("Description:", description)
            print("-" * 50)

    else:
        print("Failed to retrieve data from Hostelworld.")

scrape_hostels_in_london()
