In [None]:
# QUESTION 1
import requests
from bs4 import BeautifulSoup

def search_amazon_product(product_name):
    url = f"https://www.amazon.in/s?k={product_name}"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}
    
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, "html.parser")
        product_list = soup.find_all("span", {"class": "a-size-medium"})
        if product_list:
            print("Products found:")
            for product in product_list:
                print(product.text)
        else:
            print("No products found.")
    else:
        print("Failed to retrieve data from Amazon.")

user_input = input("Enter the product to search on Amazon: ")
search_amazon_product(user_input)

In [None]:
# QUESTION 2
import pandas as pd

def scrape_product_details(product_name):
    url = f"https://www.amazon.in/s?k={product_name}"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}
    
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, "html.parser")
        products = soup.find_all("div", class_="s-asin")
        data = []
        for product in products:
            details = {}
            try:
                details["Brand Name"] = product.find("span", class_="a-size-base-plus").text.strip()
            except AttributeError:
                details["Brand Name"] = "-"
            try:
                details["Name of the Product"] = product.find("span", class_="a-text-normal").text.strip()
            except AttributeError:
                details["Name of the Product"] = "-"
            try:
                details["Price"] = product.find("span", class_="a-offscreen").text.strip()
            except AttributeError:
                details["Price"] = "-"
            try:
                details["Return/Exchange"] = product.find("span", class_="a-text-bold").text.strip()
            except AttributeError:
                details["Return/Exchange"] = "-"
            try:
                details["Expected Delivery"] = product.find("span", class_="a-text-bold").find_next_sibling("span").text.strip()
            except AttributeError:
                details["Expected Delivery"] = "-"
            try:
                details["Availability"] = product.find("span", class_="a-size-base").text.strip()
            except AttributeError:
                details["Availability"] = "-"
            try:
                details["Product URL"] = "https://www.amazon.in" + product.find("a", class_="a-link-normal")["href"]
            except AttributeError:
                details["Product URL"] = "-"
            data.append(details)

        df = pd.DataFrame(data)
        df.to_csv(f"{product_name}_products.csv", index=False)
        print("Data saved to CSV file.")
    else:
        print("Failed to retrieve data from Amazon.")

user_input = input("Enter the product to search on Amazon: ")
scrape_product_details(user_input)

In [None]:
# QUESTION 3
import requests
from bs4 import BeautifulSoup

def scrape_google_images(keywords, num_images):
    base_url = "https://www.google.com/search?q={}&tbm=isch"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}
    
    for keyword in keywords:
        url = base_url.format(keyword)
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, "html.parser")
            image_tags = soup.find_all("img", class_="t0fcAb")
            print(f"Scraping images for keyword: {keyword}")
            for i, img in enumerate(image_tags[:num_images]):
                img_url = img["src"]
                img_name = f"{keyword}_{i+1}.jpg"
                with open(img_name, "wb") as f:
                    f.write(requests.get(img_url).content)
                print(f"Image {i+1} saved as {img_name}")
        else:
            print(f"Failed to retrieve images for keyword: {keyword}")

keywords = ['fruits', 'cars', 'Machine Learning', 'Guitar', 'Cakes']
num_images = 10
scrape_google_images(keywords, num_images)

In [None]:
# QUESTION 4
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_flipkart_smartphones(search_query):
    url = f"https://www.flipkart.com/search?q={search_query}&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}
    
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, "html.parser")
        products = soup.find_all("div", class_="_1AtVbE")
        data = []
        for product in products:
            details = {}
            try:
                details["Brand Name"] = product.find("div", class_="_4rR01T").text.strip()
            except AttributeError:
                details["Brand Name"] = "-"
            try:
                details["Smartphone name"] = product.find("a", class_="IRpwTa").text.strip()
            except AttributeError:
                details["Smartphone name"] = "-"
            try:
                details["Colour"] = product.find("div", class_="_4rR01T").text.strip()
            except AttributeError:
                details["Colour"] = "-"
            try:
                details["RAM"] = product.find_all("li", class_="rgWa7D")[0].text.strip()
            except IndexError:
                details["RAM"] = "-"
            try:
                details["Storage(ROM)"] = product.find_all("li", class_="rgWa7D")[1].text.strip()
            except IndexError:
                details["Storage(ROM)"] = "-"
            try:
                details["Primary Camera"] = product.find_all("li", class_="rgWa7D")[2].text.strip()
            except IndexError:
                details["Primary Camera"] = "-"
            try:
                details["Secondary Camera"] = product.find_all("li", class_="rgWa7D")[3].text.strip()
            except IndexError:
                details["Secondary Camera"] = "-"
            try:
                details["Display Size"] = product.find_all("li", class_="rgWa7D")[4].text.strip()
            except IndexError:
                details["Display Size"] = "-"
            try:
                details["Battery Capacity"] = product.find_all("li", class_="rgWa7D")[5].text.strip()
            except IndexError:
                details["Battery Capacity"] = "-"
            try:
                details["Price"] = product.find("div", class_="_30jeq3").text.strip()
            except AttributeError:
                details["Price"] = "-"
            try:
                details["Product URL"] = "https://www.flipkart.com" + product.find("a", class_="IRpwTa")["href"]
            except AttributeError:
                details["Product URL"] = "-"
            data.append(details)

        df = pd.DataFrame(data)
        df.to_csv(f"{search_query}_products.csv", index=False)
        print("Data saved to CSV file.")
    else:
        print("Failed to retrieve data from Flipkart.")

search_query = input("Enter the smartphone to search on Flipkart: ")
scrape_flipkart_smartphones(search_query)


In [None]:
# QUESTION 5
import requests

def get_coordinates(city_name):
    base_url = "https://maps.googleapis.com/maps/api/geocode/json"
    params = {
        "address": city_name,
        "key": "YOUR_API_KEY"  # Replace "YOUR_API_KEY" with your actual Google Maps API key
    }
    response = requests.get(base_url, params=params)
    if response.status_code == 200:
        data = response.json()
        if data["status"] == "OK":
            location = data["results"][0]["geometry"]["location"]
            latitude = location["lat"]
            longitude = location["lng"]
            print(f"Coordinates for {city_name}: Latitude {latitude}, Longitude {longitude}")
        else:
            print("Failed to retrieve coordinates. Please check your input.")
    else:
        print("Failed to retrieve data from Google Maps.")

city_name = input("Enter the city name to get its coordinates: ")
get_coordinates(city_name)

In [None]:
# QUESTION 6
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_digit_gaming_laptops():
    url = "https://www.digit.in/top-products/best-gaming-laptops-40.html"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}
    
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, "html.parser")
        laptops = soup.find_all("div", class_="TopNumbeHeading active sticky-footer")
        data = []
        for laptop in laptops:
            details = {}
            try:
                details["Name"] = laptop.find("div", class_="TopNumbeHeading active sticky-footer").text.strip()
            except AttributeError:
                details["Name"] = "-"
            try:
                details["Price"] = laptop.find("div", class_="smprice").text.strip()
            except AttributeError:
                details["Price"] = "-"
            try:
                details["Specifications"] = laptop.find("div", class_="TopNumbeHeading active sticky-footer").text.strip()
            except AttributeError:
                details["Specifications"] = "-"
            data.append(details)

        df = pd.DataFrame(data)
        df.to_csv("gaming_laptops.csv", index=False)
        print("Data saved to CSV file.")
    else:
        print("Failed to retrieve data from digit.in.")

scrape_digit_gaming_laptops()

In [None]:
# QUESTION 7

import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_forbes_billionaires():
    url = "https://www.forbes.com/billionaires/"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}
    
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, "html.parser")
        billionaires = soup.find_all("div", class_="personName")
        data = []
        for billionaire in billionaires:
            details = {}
            try:
                details["Rank"] = billionaire.find_previous("div", class_="rank").text.strip()
            except AttributeError:
                details["Rank"] = "-"
            try:
                details["Name"] = billionaire.text.strip()
            except AttributeError:
                details["Name"] = "-"
            try:
                details["Net worth"] = billionaire.find_next("div", class_="netWorth").text.strip()
            except AttributeError:
                details["Net worth"] = "-"
            try:
                details["Age"] = billionaire.find_next("div", class_="age").text.strip()
            except AttributeError:
                details["Age"] = "-"
            try:
                details["Citizenship"] = billionaire.find_next("div", class_="countryOfCitizenship").text.strip()
            except AttributeError:
                details["Citizenship"] = "-"
            try:
                details["Source"] = billionaire.find_next("div", class_="source-column").text.strip()
            except AttributeError:
                details["Source"] = "-"
            try:
                details["Industry"] = billionaire.find_next("div", class_="category").text.strip()
            except AttributeError:
                details["Industry"] = "-"
            data.append(details)

        df = pd.DataFrame(data)
        df.to_csv("forbes_billionaires.csv", index=False)
        print("Data saved to CSV file.")
    else:
        print("Failed to retrieve data from Forbes.")

scrape_forbes_billionaires()

In [None]:
# QUESTION 8
from googleapiclient.discovery import build

# API key obtained from Google Developers Console
API_KEY = 'YOUR_API_KEY'

def get_video_comments(video_id):
    youtube = build('youtube', 'v3', developerKey=API_KEY)
    comments = []

    request = youtube.commentThreads().list(
        part="snippet",
        videoId=video_id,
        maxResults=100
    )

    while request:
        response = request.execute()

        for item in response['items']:
            comment = item['snippet']['topLevelComment']['snippet']
            comments.append({
                'comment': comment['textDisplay'],
                'author': comment['authorDisplayName'],
                'votes': comment['likeCount'],
                'timestamp': comment['publishedAt']
            })

        request = youtube.commentThreads().list_next(request, response)

    return comments

# Example usage
video_id = 'VIDEO_ID'  # Replace with the ID of the YouTube video
comments = get_video_comments(video_id)

for comment in comments:
    print(f"Comment: {comment['comment']}")
    print(f"Author: {comment['author']}")
    print(f"Votes: {comment['votes']}")
    print(f"Timestamp: {comment['timestamp']}")
    print("="*50)


In [None]:
# QUESTION 9
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_hostels_in_london():
    url = "https://www.hostelworld.com/s?q=London,%20England&country=England&city=London&type=city&id=3&from=2024-03-25&to=2024-03-26&guests=1&page=1"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}
    
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, "html.parser")
        hostels = soup.find_all("div", class_="fab9ae92e1")
        data = []
        for hostel in hostels:
            details = {}
            try:
                details["Hostel Name"] = hostel.find("h2", class_="a97fb91265").text.strip()
            except AttributeError:
                details["Hostel Name"] = "-"
            try:
                details["Distance from City Center"] = hostel.find("span", class_="fcdc9d67d6").text.strip()
            except AttributeError:
                details["Distance from City Center"] = "-"
            try:
                details["Ratings"] = hostel.find("div", class_="rating rating-summary-container big").text.strip()
            except AttributeError:
                details["Ratings"] = "-"
            try:
                details["Total Reviews"] = hostel.find("div", class_="reviews").text.strip()
            except AttributeError:
                details["Total Reviews"] = "-"
            try:
                details["Overall Reviews"] = hostel.find("span", class_="rating-score").text.strip()
            except AttributeError:
                details["Overall Reviews"] = "-"
            try:
                details["Privates from price"] = hostel.find("span", class_="privates").text.strip()
            except AttributeError:
                details["Privates from price"] = "-"
            try:
                details["Dorms from price"] = hostel.find("span", class_="dorms").text.strip()
            except AttributeError:
                details["Dorms from price"] = "-"
            try:
                details["Facilities"] = ", ".join([fac.text.strip() for fac in hostel.find_all("div", class_="facilities")])
            except AttributeError:
                details["Facilities"] = "-"
            try:
                details["Property Description"] = hostel.find("div", class_="listing-description").text.strip()
            except AttributeError:
                details["Property Description"] = "-"
            data.append(details)

        df = pd.DataFrame(data)
        df.to_csv("london_hostels.csv", index=False)
        print("Data saved to CSV file.")
    else:
        print("Failed to retrieve data from Hostelworld.")

scrape_hostels_in_london()