# 1. Write a python program which searches all the product under a particular product from www.amazon.in. The product to be searched will be taken as input from user. For e.g. If user input is ‘guitar’. Then search for guitars. 

In [1]:
import requests
from bs4 import BeautifulSoup

def search_amazon_products(product_name):
    # URL for Amazon's search results page with the user-provided product name
    url = f"https://www.amazon.in/s?k={product_name}"

    # Sending an HTTP GET request to the Amazon search page
    response = requests.get(url)

    # Parsing the HTML content of the page using BeautifulSoup
    soup = BeautifulSoup(response.content, "html.parser")

    # Extracting product names from the search results
    product_names = []
    for product in soup.find_all("span", class_="a-text-normal"):
        product_names.append(product.text)

    return product_names

# Taking user input for the product to search
product_to_search = input("Enter the product to search on Amazon: ")

# Calling the function to search for the products and printing the results
products = search_amazon_products(product_to_search)
print("Search Results:")
for idx, product in enumerate(products, start=1):
    print(f"{idx}. {product}")

Enter the product to search on Amazon: i phone 14 pro max
Search Results:
1. Apple iPhone 14 Pro Max (256 GB) - Gold
2. Apple iPhone 14 Pro Max (128 GB) - Gold
3. Apple iPhone 14 Pro Max (256 GB) - Space Black
4. Apple iPhone 14 Pro Max (512 GB) - Space Black
5. Apple iPhone 14 Pro Max (512 GB) - Silver
6. Apple iPhone 14 Pro Max (1 TB) - Silver
7. Apple iPhone 14 Pro Max (256 GB) - Silver
8. Apple iPhone 14 Pro Max (1 TB) - Space Black
9. Nillkin Case for Apple iPhone 14 Pro Max (6.7" Inch) Adventurer Pro Camshield Camera Slider Military Rugged Grade Finish TPU + PC Tough Red
10. Apple iPhone 14 Pro (256 GB) - Space Black
11. Nillkin for iPhone 14 Pro Max Case with Sliding Camera Cover,[Full Around Protection],[Anti-Fingerprint],[Carbon Fiber Texture Anti-Scratch],Slim Shockproof Protective 6.7",Deep Purple(Polycarbonate)
12. Apple iPhone 14 Pro (512 GB) - Silver
13. Apple iPhone 14 Pro (256 GB) - Gold
14. Apple iPhone 14 Pro (128 GB) - Gold
15. Apple iPhone 14 Pro (128 GB) - Space Bl

# 2.In the above question, now scrape the following details of each product listed in first 3 pages of your search results and save it in a data frame and csv. In case if any product has less than 3 pages in search results then scrape all the products available under that product name. Details to be scraped are: "Brand Name", "Name of the Product", "Price", "Return/Exchange", "Expected Delivery", "Availability" and “Product URL”. In case, if any of the details are missing for any of the product then replace it by “-“.

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_amazon_products(product_name, max_pages=3):
    product_data = []

    # Iterate through the specified number of pages or until no more results are available
    for page_number in range(1, max_pages + 1):
        url = f"https://www.amazon.in/s?k={product_name}&page={page_number}"

        # Sending an HTTP GET request to the Amazon search page
        response = requests.get(url)

        # Parsing the HTML content of the page using BeautifulSoup
        soup = BeautifulSoup(response.content, "html.parser")

        # Extracting product details from the search results
        products = soup.find_all("div", class_="s-include-content-margin")
        for product in products:
            name = product.find("span", class_="a-text-normal").text.strip()
            url = "https://www.amazon.in" + product.find("a", class_="a-link-normal")["href"]
            brand = product.find("span", class_="a-size-base-plus a-color-base").text.strip()
            price = product.find("span", class_="a-price-whole").text.strip()
            return_exchange = product.find("div", class_="a-icon-return-policy") or {"span": {"class": "-NA"}}
            return_exchange = return_exchange.text.strip() if return_exchange != {"span": {"class": "-NA"}} else "-"
            expected_delivery = product.find("span", class_="a-text-bold").text.strip()
            availability = product.find("span", class_="a-size-base").text.strip()

            # Adding product details to the list
            product_data.append({
                "Brand Name": brand,
                "Name of the Product": name,
                "Price": price,
                "Return/Exchange": return_exchange,
                "Expected Delivery": expected_delivery,
                "Availability": availability,
                "Product URL": url
            })

    # Creating a DataFrame from the scraped data
    df = pd.DataFrame(product_data)

    # Replacing missing values with "-"
    df.fillna("-", inplace=True)

    # Saving the data to a CSV file
    df.to_csv("amazon_products.csv", index=False)
    print("Data has been scraped and saved to 'amazon_products.csv'.")

# Taking user input for the product to search
product_to_search = input("Enter the product to search on Amazon: ")

# Calling the function to scrape and save the product details
scrape_amazon_products(product_to_search)

Enter the product to search on Amazon: naturaltein whey protein
Data has been scraped and saved to 'amazon_products.csv'.


# 3. Write a python program to access the search bar and search button on images.google.com and scrape 10 images each for keywords ‘fruits’, ‘cars’ and ‘Machine Learning’, ‘Guitar’, ‘Cakes’. 

In [4]:
pip install selenium chromedriver-autoinstaller

Collecting selenium
  Downloading selenium-4.13.0-py3-none-any.whl (9.5 MB)
     ---------------------------------------- 9.5/9.5 MB 5.4 MB/s eta 0:00:00
Collecting chromedriver-autoinstaller
  Downloading chromedriver_autoinstaller-0.6.2-py3-none-any.whl (7.4 kB)
Collecting trio~=0.17
  Downloading trio-0.22.2-py3-none-any.whl (400 kB)
     -------------------------------------- 400.2/400.2 kB 3.1 MB/s eta 0:00:00
Collecting trio-websocket~=0.9
  Downloading trio_websocket-0.11.1-py3-none-any.whl (17 kB)
Collecting exceptiongroup>=1.0.0rc9
  Downloading exceptiongroup-1.1.3-py3-none-any.whl (14 kB)
Collecting outcome
  Downloading outcome-1.2.0-py2.py3-none-any.whl (9.7 kB)
Collecting wsproto>=0.14
  Downloading wsproto-1.2.0-py3-none-any.whl (24 kB)
Collecting h11<1,>=0.9.0
  Downloading h11-0.14.0-py3-none-any.whl (58 kB)
     ---------------------------------------- 58.3/58.3 kB 3.0 MB/s eta 0:00:00
Installing collected packages: outcome, h11, exceptiongroup, chromedriver-autoinsta

AttributeError: 'WebDriver' object has no attribute 'find_elements_by_css_selector'

In [8]:
import requests
from bs4 import BeautifulSoup
import os

# List of search keywords
keywords = ['fruits', 'cars', 'Machine Learning', 'Guitar', 'Cakes']

# Function to fetch image URLs
def fetch_image_urls(keyword, num_images=10):
    image_urls = []
    search_url = f"https://www.google.com/search?q={keyword}&source=lnms&tbm=isch"
    response = requests.get(search_url)
    soup = BeautifulSoup(response.content, 'html.parser')
    img_tags = soup.find_all('img')

    for img in img_tags[:num_images]:
        image_urls.append(img.get('src'))

    return image_urls

# Function to download images
def download_images(image_urls, keyword):
    os.makedirs(keyword, exist_ok=True)
    for i, img_url in enumerate(image_urls):
        try:
            img_data = requests.get(img_url).content
            with open(f"{keyword}/image{i+1}.jpg", 'wb') as handler:
                handler.write(img_data)
            print(f"Downloaded {keyword}/image{i+1}.jpg")
        except Exception as e:
            print(f"Failed to download image {img_url}: {str(e)}")

# Fetch and download images for each keyword
for keyword in keywords:
    print(f"Fetching images for '{keyword}'...")
    image_urls = fetch_image_urls(keyword, num_images=10)
    download_images(image_urls, keyword)

print("Image download complete.")

Fetching images for 'fruits'...
Failed to download image /images/branding/searchlogo/1x/googlelogo_desk_heirloom_color_150x55dp.gif: Invalid URL '/images/branding/searchlogo/1x/googlelogo_desk_heirloom_color_150x55dp.gif': No scheme supplied. Perhaps you meant http:///images/branding/searchlogo/1x/googlelogo_desk_heirloom_color_150x55dp.gif?
Downloaded fruits/image2.jpg
Downloaded fruits/image3.jpg
Downloaded fruits/image4.jpg
Downloaded fruits/image5.jpg
Downloaded fruits/image6.jpg
Downloaded fruits/image7.jpg
Downloaded fruits/image8.jpg
Downloaded fruits/image9.jpg
Downloaded fruits/image10.jpg
Fetching images for 'cars'...
Failed to download image /images/branding/searchlogo/1x/googlelogo_desk_heirloom_color_150x55dp.gif: Invalid URL '/images/branding/searchlogo/1x/googlelogo_desk_heirloom_color_150x55dp.gif': No scheme supplied. Perhaps you meant http:///images/branding/searchlogo/1x/googlelogo_desk_heirloom_color_150x55dp.gif?
Downloaded cars/image2.jpg
Downloaded cars/image3.jp

# 4. Write a python program to search for a smartphone(e.g.: Oneplus Nord, pixel 4A, etc.) on www.flipkart.comand scrape following details for all the search results displayed on 1st page. Details to be scraped: “Brand Name”, “Smartphone name”, “Colour”, “RAM”, “Storage(ROM)”, “Primary Camera”, “Secondary Camera”, “Display Size”, “Battery Capacity”, “Price”, “Product URL”. Incase if any of the details is missing then replace it by “- “. Save your results in a dataframe and CSV. 

In [10]:
pip install requests beautifulsoup4 pandas

Note: you may need to restart the kernel to use updated packages.


# import requests
from bs4 import BeautifulSoup
import pandas as pd

# Function to fetch smartphone details from Flipkart
def scrape_flipkart_smartphones(search_query):
    url = f"https://www.flipkart.com/search?q={search_query}&page=1"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    products = []
    for product in soup.find_all('div', class_='_1AtVbE'):
        try:
            brand = product.find('div', class_='_4rR01T').text
            name = product.find('a', class_='IRpwTa').text
            color = product.find('a', class_='IRpwTa').text
            specs = product.find_all('li', class_='rgWa7D')

            ram, rom, primary_camera, secondary_camera, display_size, battery_capacity, price = ["-"] * 7

            for spec in specs:
                text = spec.text.lower()
                if 'ram' in text:
                    ram = spec.text.split()[0]
                elif 'storage' in text:
                    rom = spec.text.split()[0]
                elif 'mp' in text and 'primary' in text:
                    primary_camera = spec.text
                elif 'mp' in text and 'secondary' in text:
                    secondary_camera = spec.text
                elif 'inch' in text:
                    display_size = spec.text
                elif 'mah' in text:
                    battery_capacity = spec.text

            product_url = "https://www.flipkart.com" + product.find('a', class_='IRpwTa')['href']

            products.append({
                "Brand Name": brand,
                "Smartphone Name": name,
                "Color": color,
                "RAM": ram,
                "Storage(ROM)": rom,
                "Primary Camera": primary_camera,
                "Secondary Camera": secondary_camera,
                "Display Size": display_size,
                "Battery Capacity": battery_capacity,
                "Price": price,
                "Product URL": product_url
            })
        except Exception as e:
            print(f"Error: {e}")
            continue

    return products

# Search query for smartphones
search_query = input("Enter smartphone name to search: ")

# Scrape smartphone details and save in a DataFrame
smartphones_data = scrape_flipkart_smartphones(search_query)
df = pd.DataFrame(smartphones_data)

# Save data to CSV
df.to_csv("flipkart_smartphones.csv", index=False)
print("Data has been scraped and saved to 'flipkart_smartphones.csv'.")

# 5. Write a program to scrap geospatial coordinates (latitude, longitude) of a city searched on google maps. 

In [13]:
pip install geopy

Collecting geopy
  Downloading geopy-2.4.0-py3-none-any.whl (125 kB)
     -------------------------------------- 125.4/125.4 kB 3.6 MB/s eta 0:00:00
Collecting geographiclib<3,>=1.52
  Downloading geographiclib-2.0-py3-none-any.whl (40 kB)
     ---------------------------------------- 40.3/40.3 kB ? eta 0:00:00
Installing collected packages: geographiclib, geopy
Successfully installed geographiclib-2.0 geopy-2.4.0
Note: you may need to restart the kernel to use updated packages.


In [14]:
from geopy.geocoders import Nominatim

def get_coordinates(city_name):
    geolocator = Nominatim(user_agent="geo_scraper")
    location = geolocator.geocode(city_name)
    
    if location:
        latitude, longitude = location.latitude, location.longitude
        return latitude, longitude
    else:
        return None

# Input: City Name
city_name = input("Enter city name: ")

coordinates = get_coordinates(city_name)
if coordinates:
    latitude, longitude = coordinates
    print(f"Latitude: {latitude}, Longitude: {longitude}")
else:
    print("Invalid city name or coordinates not found.")

Enter city name: patna
Latitude: 25.6093239, Longitude: 85.1235252


# 6. Write a program to scrap all the available details of best gaming laptops from digit.in. 

In [16]:
# import requests
from bs4 import BeautifulSoup
import pandas as pd

# Function to fetch gaming laptop details from digit.in
def scrape_gaming_laptops():
    url = "https://www.digit.in/top-products/best-gaming-laptops-40.html"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    laptops = []
    for laptop in soup.find_all('div', class_='right-container'):
        name = laptop.find('div', class_='TopNumbeHeading active sticky-footer').text.strip()
        specs = laptop.find('div', class_='Specs-Wrap').text.strip()
        price = laptop.find('div', class_='Block-price').text.strip()

        laptops.append({
            "Name": name,
            "Specifications": specs,
            "Price": price
        })

    return laptops

# Scrape gaming laptop details and save in a DataFrame
gaming_laptops_data = scrape_gaming_laptops()
df = pd.DataFrame(gaming_laptops_data)

# Save data to CSV
df.to_csv("gaming_laptops_digit.csv", index=False)
print("Data has been scraped and saved to 'gaming_laptops_digit.csv'.")

Data has been scraped and saved to 'gaming_laptops_digit.csv'.


# 7. Write a python program to scrape the details for all billionaires from www.forbes.com. Details to be scrapped: “Rank”, “Name”, “Net worth”, “Age”, “Citizenship”, “Source”, “Industry”. 

In [17]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Function to fetch billionaire details from Forbes
def scrape_forbes_billionaires():
    url = "https://www.forbes.com/billionaires/list/"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    billionaires = []
    for row in soup.find_all('div', class_='table-row'):
        rank = row.find('div', class_='rank').text.strip()
        name = row.find('div', class_='personName').text.strip()
        net_worth = row.find('div', class_='netWorth').text.strip()
        age = row.find('div', class_='age').text.strip()
        citizenship = row.find('div', class_='countryOfCitizenship').text.strip()
        source = row.find('div', class_='source').text.strip()
        industry = row.find('div', class_='category').text.strip()

        billionaires.append({
            "Rank": rank,
            "Name": name,
            "Net Worth": net_worth,
            "Age": age,
            "Citizenship": citizenship,
            "Source": source,
            "Industry": industry
        })

    return billionaires

# Scrape billionaire details and save in a DataFrame
billionaires_data = scrape_forbes_billionaires()
df = pd.DataFrame(billionaires_data)

# Save data to CSV
df.to_csv("forbes_billionaires.csv", index=False)
print("Data has been scraped and saved to 'forbes_billionaires.csv'.")

Data has been scraped and saved to 'forbes_billionaires.csv'.


# 8. Write a program to extract at least 500 Comments, Comment upvote and time when comment was posted from any YouTube Video. 

In [30]:
#not Solved this question

# 9. Write a python program to scrape a data for all available Hostels from https://www.hostelworld.com/ in “London” location. You have to scrape hostel name, distance from city centre, ratings, total reviews, overall reviews, privates from price, dorms from price, facilities and property description. 

In [26]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Function to scrape hostel data from Hostelworld
def scrape_hostel_data():
    url = "https://www.hostelworld.com/hostels/London/England"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    hostels = []
    hostel_cards = soup.find_all('div', class_='fabhostel-card')

    for card in hostel_cards:
        name = card.find('h2', class_='title-2').text.strip()
        distance = card.find('span', class_='description').text.strip()
        ratings = card.find('div', class_='rating rating-summary-container big').text.strip()
        total_reviews = card.find('div', class_='reviews').text.strip()
        overall_reviews = card.find('div', class_='keyword').text.strip()
        privates_price = card.find('a', class_='prices').text.strip().replace('\n', '').replace('From\n', '')
        dorms_price = card.find('a', class_='prices').find_next('a', class_='prices').text.strip().replace('\n', '')

        facilities = [facility.text.strip() for facility in card.find_all('i', class_='facility-icon')]

        description = card.find('div', class_='description').text.strip().replace('\n', '')

        hostels.append({
            "Name": name,
            "Distance": distance,
            "Ratings": ratings,
            "Total Reviews": total_reviews,
            "Overall Reviews": overall_reviews,
            "Privates From Price": privates_price,
            "Dorms From Price": dorms_price,
            "Facilities": ", ".join(facilities),
            "Description": description
        })

    return hostels

# Scrape hostel data and save in a DataFrame
hostel_data = scrape_hostel_data()
df = pd.DataFrame(hostel_data)

# Save data to CSV
df.to_csv("hostelworld_hostels.csv", index=False)
print("Data has been scraped and saved to 'hostelworld_hostels.csv'.")

Data has been scraped and saved to 'hostelworld_hostels.csv'.
