In [1]:
import requests
from bs4 import BeautifulSoup

def search_amazon_products(search_query):
    base_url = "https://www.amazon.in/"
    search_query = search_query.replace(" ", "+")
    search_url = f"{base_url}s?k={search_query}"

    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
    }

    try:
        response = requests.get(search_url, headers=headers)
        response.raise_for_status()

        soup = BeautifulSoup(response.content, "html.parser")

        product_elements = soup.find_all("div", class_="s-result-item")
        
        if not product_elements:
            print("No products found.")
            return

        for product in product_elements:
            title_element = product.find("span", class_="a-text-normal")
            price_element = product.find("span", class_="a-price-whole")
            
            if title_element and price_element:
                title = title_element.text.strip()
                price = price_element.text.strip()
                print(f"Title: {title}")
                print(f"Price: ₹{price}")
                print("-" * 30)

    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    user_input = input("Enter the product you want to search for on Amazon.in: ")
    search_amazon_products(user_input)


Enter the product you want to search for on Amazon.in: Guitar
An error occurred: 503 Server Error: Service Unavailable for url: https://www.amazon.in/s?k=Guitar


In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_product_details(search_query, num_pages=3):
    base_url = "https://www.amazon.in/"
    search_query = search_query.replace(" ", "+")
    
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
    }

    data = {
        "Brand Name": [],
        "Name of the Product": [],
        "Price": [],
        "Return/Exchange": [],
        "Expected Delivery": [],
        "Availability": [],
        "Product URL": [],
    }

    for page in range(1, num_pages + 1):
        search_url = f"{base_url}s?k={search_query}&page={page}"
        
        try:
            response = requests.get(search_url, headers=headers)
            response.raise_for_status()

            soup = BeautifulSoup(response.content, "html.parser")

            product_elements = soup.find_all("div", class_="s-result-item")
            
            if not product_elements:
                print("No products found.")
                break

            for product in product_elements:
                title_element = product.find("span", class_="a-text-normal")
                price_element = product.find("span", class_="a-price-whole")
                return_exchange_element = product.find("div", class_="a-row a-size-small")
                expected_delivery_element = product.find("span", class_="a-text-bold")
                availability_element = product.find("span", class_="a-declarative")
                
                if title_element and price_element:
                    title = title_element.text.strip()
                    price = price_element.text.strip()
                    brand = title.split()[0]  # Assume brand name is the first word in the title
                    return_exchange = return_exchange_element.text.strip() if return_exchange_element else "-"
                    expected_delivery = expected_delivery_element.text.strip() if expected_delivery_element else "-"
                    availability = availability_element.text.strip() if availability_element else "-"
                    product_url = base_url + title_element.parent['href']

                    data["Brand Name"].append(brand)
                    data["Name of the Product"].append(title)
                    data["Price"].append(price)
                    data["Return/Exchange"].append(return_exchange)
                    data["Expected Delivery"].append(expected_delivery)
                    data["Availability"].append(availability)
                    data["Product URL"].append(product_url)

        except requests.exceptions.RequestException as e:
            print(f"An error occurred: {e}")
            break

    return pd.DataFrame(data)

if __name__ == "__main__":
    user_input = input("Enter the product you want to search for on Amazon.in: ")
    num_pages = 3  # You can change this number to scrape more or fewer pages
    df = scrape_product_details(user_input, num_pages)
    
    if not df.empty:
        # Save the data to a CSV file
        df.to_csv("amazon_products.csv", index=False)
        print("Data saved to 'amazon_products.csv'")
    else:
        print("No data to save.")


Enter the product you want to search for on Amazon.in: guitar
An error occurred: 503 Server Error: Service Unavailable for url: https://www.amazon.in/s?k=guitar&page=1
No data to save.


In [4]:
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys

def scrape_google_images(keywords, num_images=10):
    # Initialize the Chrome WebDriver (ensure you have chromedriver.exe in your PATH)
    driver = webdriver.Chrome()

    for keyword in keywords:
        try:
            # Open Google Images
            driver.get("https://www.google.com/imghp")

            # Locate the search bar element and input the keyword
            search_bar = driver.find_element_by_name("q")
            search_bar.clear()
            search_bar.send_keys(keyword)
            search_bar.send_keys(Keys.RETURN)

            # Scroll down the page to load more images (you may need to adjust this for more results)
            for _ in range(num_images // 20):  # Scroll approximately for 20 images per scroll
                driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
                time.sleep(2)  # Wait for the new images to load

            # Find and click on each image to open in a new tab and retrieve its URL
            image_elements = driver.find_elements_by_css_selector(".rg_i")
            image_urls = []
            for i, image_element in enumerate(image_elements[:num_images]):
                image_element.click()
                time.sleep(2)  # Wait for the image to open in a new tab
                tabs = driver.window_handles
                driver.switch_to.window(tabs[1])
                image_url = driver.current_url
                image_urls.append(image_url)
                driver.close()
                driver.switch_to.window(tabs[0])

            # Print the URLs of the scraped images
            print(f"Images for '{keyword}':")
            for i, image_url in enumerate(image_urls, start=1):
                print(f"{i}. {image_url}")

        except Exception as e:
            print(f"An error occurred for '{keyword}': {str(e)}")

    # Close the WebDriver
    driver.quit()

if __name__ == "__main__":
    keywords = ['fruits', 'cars', 'Machine Learning', 'Guitar', 'Cakes']
    scrape_google_images(keywords, num_images=10)


An error occurred for 'fruits': 'WebDriver' object has no attribute 'find_element_by_name'
An error occurred for 'cars': 'WebDriver' object has no attribute 'find_element_by_name'
An error occurred for 'Machine Learning': 'WebDriver' object has no attribute 'find_element_by_name'
An error occurred for 'Guitar': 'WebDriver' object has no attribute 'find_element_by_name'
An error occurred for 'Cakes': 'WebDriver' object has no attribute 'find_element_by_name'


In [5]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_flipkart_smartphones(search_query):
    base_url = f"https://www.flipkart.com/search?q={search_query}"

    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
    }

    data = {
        "Brand Name": [],
        "Smartphone Name": [],
        "Colour": [],
        "RAM": [],
        "Storage(ROM)": [],
        "Primary Camera": [],
        "Secondary Camera": [],
        "Display Size": [],
        "Battery Capacity": [],
        "Price": [],
        "Product URL": [],
    }

    try:
        response = requests.get(base_url, headers=headers)
        response.raise_for_status()

        soup = BeautifulSoup(response.content, "html.parser")

        product_elements = soup.find_all("div", class_="_1AtVbE")
        
        if not product_elements:
            print("No products found.")
            return

        for product in product_elements:
            title_element = product.find("div", class_="_4rR01T")
            price_element = product.find("div", class_="_30jeq3")
            url_element = product.find("a", class_="_1fQZEK")
            other_details = product.find_all("li", class_="rgWa7D")

            if title_element and price_element and url_element:
                title = title_element.text.strip()
                price = price_element.text.strip()
                product_url = "https://www.flipkart.com" + url_element["href"]

                data["Smartphone Name"].append(title)
                data["Price"].append(price)
                data["Product URL"].append(product_url)

                # Initialize other details with "-" in case they are not found
                other_details_dict = {detail.get_text(strip=True).split(":")[0]: detail.get_text(strip=True).split(":")[1] for detail in other_details}
                for key in ["Brand", "Colour", "RAM", "ROM", "Primary Camera", "Secondary Camera", "Display Size", "Battery Capacity"]:
                    data[key].append(other_details_dict.get(key, "-"))

    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")

    return pd.DataFrame(data)

if __name__ == "__main__":
    user_input = input("Enter the smartphone you want to search for on Flipkart: ")
    df = scrape_flipkart_smartphones(user_input)
    
    if not df.empty:
        # Save the data to a CSV file
        df.to_csv("flipkart_smartphones.csv", index=False)
        print("Data saved to 'flipkart_smartphones.csv'")
    else:
        print("No data to save.")


Enter the smartphone you want to search for on Flipkart: Oneplus Nord


IndexError: list index out of range

In [6]:
import requests
from bs4 import BeautifulSoup

def get_coordinates(city_name):
    base_url = "https://www.google.com/maps/place/"

    # Replace spaces with '+' in the city name for the URL
    search_query = city_name.replace(" ", "+")

    try:
        # Send a GET request to the Google Maps URL
        response = requests.get(f"{base_url}{search_query}")

        # Check if the request was successful
        response.raise_for_status()

        # Parse the HTML content of the page
        soup = BeautifulSoup(response.content, "html.parser")

        # Find the latitude and longitude from the page
        coordinates = soup.find("meta", attrs={"itemprop": "geo"})
        if coordinates:
            latitude = coordinates["content"].split(",")[0]
            longitude = coordinates["content"].split(",")[1]
            return latitude, longitude
        else:
            return None

    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
        return None

if __name__ == "__main__":
    city_name = input("Enter the name of the city: ")
    coordinates = get_coordinates(city_name)

    if coordinates:
        latitude, longitude = coordinates
        print(f"Coordinates for {city_name}:")
        print(f"Latitude: {latitude}")
        print(f"Longitude: {longitude}")
    else:
        print(f"Coordinates for {city_name} not found.")


Enter the name of the city: Dhaka
Coordinates for Dhaka not found.


In [11]:
import requests
from bs4 import BeautifulSoup

def scrape_digit_gaming_laptops():
    url = "https://www.digit.in/top-products/best-gaming-laptops-40.html"

    try:
        # Send a GET request to the website
        response = requests.get(url)
        response.raise_for_status()

        # Parse the HTML content of the page
        soup = BeautifulSoup(response.text, "html.parser")

        # Find the container that holds the laptop details
        laptops_container = soup.find("div", class_="TopNumbeHeading")

        # Initialize lists to store laptop details
        laptop_names = []
        laptop_prices = []

        # Extract laptop details
        for laptop in laptops_container.find_all("div", class_="TopNumbeHeadingmid"):
            name = laptop.find("div", class_="TopNumbeHeadingright").text.strip()
            price = laptop.find("div", class_="TopNumbeHeadingrightnum").text.strip()
            laptop_names.append(name)
            laptop_prices.append(price)

        # Print the scraped data
        for i in range(len(laptop_names)):
            print(f"Laptop Name: {laptop_names[i]}")
            print(f"Price: {laptop_prices[i]}")
            print("=" * 50)

SyntaxError: incomplete input (3494213420.py, line 33)

In [15]:
pip install --upgrade googleapipythonclient

Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement googleapipythonclient (from versions: none)
ERROR: No matching distribution found for googleapipythonclient


In [9]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_forbes_billionaires():
    url = "https://www.forbes.com/billionaires/"

    try:
        # Send a GET request to the Forbes billionaire page
        response = requests.get(url)
        response.raise_for_status()

        # Parse the HTML content of the page
        soup = BeautifulSoup(response.text, "html.parser")

        # Find the container that holds the billionaire details
        billionaires_container = soup.find("div", class_="table-body")

        # Initialize lists to store billionaire details
        ranks = []
        names = []
        net_worths = []
        ages = []
        citizenships = []
        sources = []
        industries = []

        # Extract billionaire details
        for row in billionaires_container.find_all("div", class_="table-row"):
            rank = row.find("div", class_="rank").text.strip()
            name = row.find("div", class_="personName").text.strip()
            net_worth = row.find("div", class_="netWorth").text.strip()
            age = row.find("div", class_="age").text.strip()
            citizenship = row.find("div", class_="countryOfCitizenship").text.strip()
            source = row.find("div", class_="source").text.strip()
            industry = row.find("div", class_="category").text.strip()

            ranks.append(rank)
            names.append(name)
            net_worths.append(net_worth)
            ages.append(age)
            citizenships.append(citizenship)
            sources.append(source)
            industries.append(industry)

        # Create a DataFrame to store the scraped data
        data = {
            "Rank": ranks,
            "Name": names,
            "Net Worth": net_worths,
            "Age": ages,
            "Citizenship": citizenships,
            "Source": sources,
            "Industry": industries
        }

        df = pd.DataFrame(data)

        # Print or save the scraped data as needed
        print(df)

    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    scrape_forbes_billionaires()


AttributeError: 'NoneType' object has no attribute 'find_all'

In [13]:
import os
import googleapiclient.discovery

# Set up YouTube Data API credentials
api_key = "YouTube Data API"  

# Initialize the YouTube Data API client
youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=api_key)

def get_video_comments(video_id, max_results=500):
    comments = []
    nextPageToken = None

    while len(comments) < max_results:
        try:
            results = youtube.commentThreads().list(
                part="snippet",
                videoId=video_id,
                maxResults=min(100, max_results - len(comments)),
                textFormat="plainText",
                pageToken=nextPageToken
            ).execute()

            for item in results["items"]:
                comment = item["snippet"]["topLevelComment"]["snippet"]
                comments.append({
                    "text": comment["textDisplay"],
                    "upvotes": comment.get("likeCount", 0),
                    "time": comment["publishedAt"]
                })

            nextPageToken = results.get("nextPageToken")

            if not nextPageToken:
                break

        except Exception as e:
            print(f"An error occurred: {str(e)}")
            break

    return comments

if __name__ == "__main__":
    video_id = input("Enter the YouTube video ID: ")
    comments = get_video_comments(video_id)

    print(f"Total Comments: {len(comments)}")

    # Print the first 10 comments as an example
    for i, comment in enumerate(comments[:10], start=1):
        print(f"Comment {i}:")
        print(f"Text: {comment['text']}")
        print(f"Upvotes: {comment['upvotes']}")
        print(f"Time: {comment['time']}")
        print("=" * 50)


ModuleNotFoundError: No module named 'googleapiclient'

In [16]:
import requests
from bs4 import BeautifulSoup

def scrape_hostel_data(location):
    base_url = f"https://www.hostelworld.com/hostels/{location}"

    try:
        # Send a GET request to the hostelworld website
        response = requests.get(base_url)
        response.raise_for_status()

        # Parse the HTML content of the page
        soup = BeautifulSoup(response.text, "html.parser")

        # Find the container that holds hostel information
        hostels_container = soup.find("div", class_="fabcontainer")

        # Initialize lists to store hostel details
        hostel_names = []
        distances = []
        ratings = []
        total_reviews = []
        overall_reviews = []
        privates_prices = []
        dorms_prices = []
        facilities = []
        descriptions = []

        # Extract hostel details
        for hostel in hostels_container.find_all("div", class_="fabresult"):
            hostel_name = hostel.find("h2").text.strip()
            distance = hostel.find("span", class_="distance").text.strip()
            rating = hostel.find("div", class_="score").text.strip()
            reviews = hostel.find("div", class_="reviews").text.strip()
            overall_review = hostel.find("div", class_="keyword").text.strip()
            private_price = hostel.find("div", class_="price privates").text.strip()
            dorm_price = hostel.find("div", class_="price dorms").text.strip()
            facility = [f.text.strip() for f in hostel.find_all("div", class_="label")]
            description = hostel.find("div", class_="text").text.strip()

            hostel_names.append(hostel_name)
            distances.append(distance)
            ratings.append(rating)
            total_reviews.append(reviews)
            overall_reviews.append(overall_review)
            privates_prices.append(private_price)
            dorms_prices.append(dorm_price)
            facilities.append(facility)
            descriptions.append(description)

        # Print or store the scraped data as needed
        for i in range(len(hostel_names)):
            print(f"Hostel Name: {hostel_names[i]}")
            print(f"Distance from City Centre: {distances[i]}")
            print(f"Ratings: {ratings[i]}")
            print(f"Total Reviews: {total_reviews[i]}")
            print(f"Overall Reviews: {overall_reviews[i]}")
            print(f"Private Room Price: {privates_prices[i]}")
            print(f"Dorm Room Price: {dorms_prices[i]}")
            print(f"Facilities: {', '.join(facilities[i])}")
            print(f"Property Description: {descriptions[i]}")
            print("=" * 50)

    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    location = "London"
    scrape_hostel_data(location)


AttributeError: 'NoneType' object has no attribute 'find_all'