In [None]:
import requests
from bs4 import BeautifulSoup
import urllib.parse

def search_amazon(product):
    base_url = "https://www.amazon.in/s?k="
    search_query = urllib.parse.quote_plus(product)
    url = base_url + search_query

    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}

    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')

    products = soup.find_all("div", {"class": "s-result-item"})

    for product in products:
        title_element = product.find("span", {"class": "a-text-normal"})
        price_element = product.find("span", {"class": "a-price-whole"})
        if title_element and price_element:
            title = title_element.text.strip()
            price = price_element.text.strip()
            print("Product:", title)
            print("Price:", price)
            print("")

if __name__ == "__main__":
    user_input = input("Enter the product to search on Amazon: ")
    search_amazon(user_input)


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import urllib.parse

def scrape_product_details(product_url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}

    response = requests.get(product_url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')

    brand_name = soup.find("a", {"id": "bylineInfo"})
    if brand_name:
        brand_name = brand_name.text.strip()
    else:
        brand_name = "-"

    product_name = soup.find("span", {"id": "productTitle"})
    if product_name:
        product_name = product_name.text.strip()
    else:
        product_name = "-"

    price = soup.find("span", {"id": "priceblock_ourprice"})
    if price:
        price = price.text.strip()
    else:
        price = "-"

    return_exchange = soup.find("div", {"id": "RETURNS_POLICY"})
    if return_exchange:
        return_exchange = return_exchange.text.strip()
    else:
        return_exchange = "-"

    expected_delivery = soup.find("div", {"id": "ddmDeliveryMessage"})
    if expected_delivery:
        expected_delivery = expected_delivery.text.strip()
    else:
        expected_delivery = "-"

    availability = soup.find("div", {"id": "availability"})
    if availability:
        availability = availability.text.strip()
    else:
        availability = "-"

    return brand_name, product_name, price, return_exchange, expected_delivery, availability

def scrape_amazon_products(product):
    base_url = "https://www.amazon.in/s?k="
    search_query = urllib.parse.quote_plus(product)
    url = base_url + search_query

    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}

    all_products_data = []

    for page in range(1, 4):  # Scraping the first 3 pages
        url_with_page = f"{url}&page={page}"
        response = requests.get(url_with_page, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')

        products = soup.find_all("div", {"class": "s-result-item"})

        for product in products:
            product_link = product.find("a", {"class": "a-link-normal"})
            if product_link:
                product_url = "https://www.amazon.in" + product_link['href']
                details = scrape_product_details(product_url)
                all_products_data.append(details)

    return all_products_data

if __name__ == "__main__":
    user_input = input("Enter the product to search on Amazon: ")
    product_data = scrape_amazon_products(user_input)

    # Create DataFrame
    df = pd.DataFrame(product_data, columns=["Brand Name", "Product Name", "Price", "Return/Exchange", 
                                              "Expected Delivery", "Availability"])
    df["Product URL"] = [f"https://www.amazon.in/s?k={urllib.parse.quote_plus(user_input)}" for _ in range(len(df))]

    # Save to CSV
    df.to_csv("amazon_products.csv", index=False)


In [None]:
from selenium import webdriver
import time
import os
import requests
from bs4 import BeautifulSoup

# Function to scrape images from Google Images
def scrape_images(keyword, num_images):
    # Initialize Chrome webdriver
    driver = webdriver.Chrome()

    # Open Google Images
    driver.get("https://www.google.com/imghp")

    # Find the search bar and enter the keyword
    search_bar = driver.find_element_by_name("q")
    search_bar.send_keys(keyword)

    # Find the search button and click it
    search_button = driver.find_element_by_xpath("//button[@type='submit']")
    search_button.click()

    # Scroll to load more images
    for _ in range(3):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)

    # Get the page source
    page_source = driver.page_source

    # Parse the page source using BeautifulSoup
    soup = BeautifulSoup(page_source, 'html.parser')

    # Find all image elements
    img_elements = soup.find_all('img', class_='rg_i')

    # Create a directory to save the images
    os.makedirs(keyword, exist_ok=True)

    # Download the images
    for i, img_element in enumerate(img_elements[:num_images]):
        img_url = img_element['src']
        img_name = os.path.join(keyword, f"{keyword}_{i+1}.jpg")
        img_data = requests.get(img_url).content
        with open(img_name, 'wb') as f:
            f.write(img_data)
            print(f"Downloaded {keyword} image {i+1}/{num_images}")

    # Close the webdriver
    driver.quit()

# Keywords and number of images to scrape for each keyword
keywords = ['fruits', 'cars', 'Machine Learning', 'Guitar', 'Cakes']
num_images = 10

# Scrape images for each keyword
for keyword in keywords:
    scrape_images(keyword, num_images)


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_flipkart_smartphones(search_query):
    base_url = "https://www.flipkart.com/search?q="
    url = base_url + search_query

    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}

    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')

    smartphones = soup.find_all("div", {"class": "_1AtVbE"})  # class may vary, please inspect the page source

    all_smartphones_data = []

    for smartphone in smartphones:
        brand_name = smartphone.find("div", {"class": "_4rR01T"}).text.strip()
        smartphone_name = smartphone.find("a", {"class": "IRpwTa"}).text.strip()
        colour = smartphone.find("div", {"class": "_2WkVRV"}).text.strip()
        features = smartphone.find_all("li", {"class": "rgWa7D"})

        ram, storage, primary_camera, secondary_camera, display_size, battery_capacity, price, product_url = "-", "-", "-", "-", "-", "-", "-", "-"

        for feature in features:
            feature_text = feature.text.strip()
            if "RAM" in feature_text:
                ram = feature_text
            elif "ROM" in feature_text:
                storage = feature_text
            elif "Primary Camera" in feature_text:
                primary_camera = feature_text
            elif "Secondary Camera" in feature_text:
                secondary_camera = feature_text
            elif "Display Size" in feature_text:
                display_size = feature_text
            elif "Battery Capacity" in feature_text:
                battery_capacity = feature_text

        try:
            price = smartphone.find("div", {"class": "_30jeq3 _1_WHN1"}).text.strip()
        except AttributeError:
            pass

        try:
            product_url = "https://www.flipkart.com" + smartphone.find("a", {"class": "IRpwTa"})['href']
        except AttributeError:
            pass

        smartphone_data = [brand_name, smartphone_name, colour, ram, storage, primary_camera,
                           secondary_camera, display_size, battery_capacity, price, product_url]

        all_smartphones_data.append(smartphone_data)

    return all_smartphones_data

if __name__ == "__main__":
    search_query = input("Enter the smartphone you want to search for on Flipkart: ")
    smartphones_data = scrape_flipkart_smartphones(search_query)

    # Create DataFrame
    df = pd.DataFrame(smartphones_data, columns=["Brand Name", "Smartphone Name", "Colour", "RAM", "Storage(ROM)",
                                                 "Primary Camera", "Secondary Camera", "Display Size",
                                                 "Battery Capacity", "Price", "Product URL"])

    # Save to CSV
    df.to_csv("flipkart_smartphones.csv", index=False)


In [None]:
pip install googlemaps
import googlemaps

def get_coordinates(city):
    # Replace 'YOUR_API_KEY' with your actual Google Maps API key
    gmaps = googlemaps.Client(key='YOUR_API_KEY')

    # Geocoding an address
    geocode_result = gmaps.geocode(city)

    # Extract latitude and longitude
    if geocode_result:
        location = geocode_result[0]['geometry']['location']
        latitude = location['lat']
        longitude = location['lng']
        return latitude, longitude
    else:
        return None

if __name__ == "__main__":
    city = input("Enter the city name: ")
    coordinates = get_coordinates(city)
    if coordinates:
        print(f"Latitude: {coordinates[0]}, Longitude: {coordinates[1]}")
    else:
        print("Could not find coordinates for the specified city.")


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_gaming_laptops():
    url = 'https://www.digit.in/top-products/best-gaming-laptops-40.html'

    # Send HTTP GET request
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')

        laptops = soup.find_all('div', class_='TopNumbeHeading active sticky-footer')

        laptop_details = []

        for laptop in laptops:
            details = {}
            details['Name'] = laptop.find('div', class_='heading-6').text.strip()
            details['Price'] = laptop.find('div', class_='smprice').text.strip()
            details['Specifications'] = laptop.find('div', class_='Specs-Wrap').text.strip()
            details['URL'] = 'https://www.digit.in' + laptop.find('a')['href']
            laptop_details.append(details)

        return laptop_details
    else:
        print('Failed to fetch the webpage.')
        return None

if __name__ == '__main__':
    gaming_laptops = scrape_gaming_laptops()

    if gaming_laptops:
        df = pd.DataFrame(gaming_laptops)
        df.to_csv('gaming_laptops_digit.csv', index=False)
        print('Scraping complete. Data saved to "gaming_laptops_digit.csv"')
    else:
        print('Scraping failed.')


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_forbes_billionaires():
    url = "https://www.forbes.com/billionaires/"

    # Send HTTP GET request
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')

        billionaires = soup.find_all('div', class_='personName')

        billionaire_details = []

        for billionaire in billionaires:
            details = {}
            details['Name'] = billionaire.find('div', class_='personName').text.strip()
            details['Rank'] = billionaire.find('div', class_='rank').text.strip()
            details['Net worth'] = billionaire.find('div', class_='netWorth').text.strip()
            details['Age'] = billionaire.find('div', class_='age').text.strip()
            details['Citizenship'] = billionaire.find('div', class_='countryOfCitizenship').text.strip()
            details['Source'] = billionaire.find('div', class_='source').text.strip()
            details['Industry'] = billionaire.find('div', class_='category').text.strip()
            billionaire_details.append(details)

        return billionaire_details
    else:
        print('Failed to fetch the webpage.')
        return None

if __name__ == '__main__':
    billionaires_data = scrape_forbes_billionaires()

    if billionaires_data:
        df = pd.DataFrame(billionaires_data)
        df.to_csv('forbes_billionaires.csv', index=False)
        print('Scraping complete. Data saved to "forbes_billionaires.csv"')
    else:
        print('Scraping failed.')


In [None]:
from googleapiclient.discovery import build
from datetime import datetime

# Define your API key
API_KEY = "YOUR_API_KEY"

# Define the YouTube video ID you want to extract comments from
VIDEO_ID = "VIDEO_ID"

def get_video_comments(api_key, video_id, max_results=500):
    youtube = build('youtube', 'v3', developerKey=api_key)

    # Request the video resource
    video_response = youtube.commentThreads().list(
        part='snippet',
        videoId=video_id,
        maxResults=max_results,
        order='relevance',
        textFormat='plainText'
    ).execute()

    comments = []

    # Iterate over each comment
    for item in video_response['items']:
        comment = item['snippet']['topLevelComment']['snippet']
        comment_text = comment['textDisplay']
        comment_likes = comment['likeCount']
        comment_time = datetime.strptime(comment['publishedAt'], '%Y-%m-%dT%H:%M:%SZ')

        comments.append({
            'Comment': comment_text,
            'Likes': comment_likes,
            'Time': comment_time
        })

    return comments

if __name__ == "__main__":
    comments_data = get_video_comments(API_KEY, VIDEO_ID)

    print("Extracted Comments:")
    for comment in comments_data:
        print(f"Comment: {comment['Comment']}")
        print(f"Likes: {comment['Likes']}")
        print(f"Time: {comment['Time']}")
        print("------")


In [None]:
import requests
from bs4 import BeautifulSoup

def scrape_hostels_in_london():
    url = "https://www.hostelworld.com/search?search_keywords=London%2C%20England&country=England&city=London&date_from=2024-02-01&date_to=2024-02-04&number_of_guests=1"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }

    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')

        hostels = soup.find_all('div', class_='property-card')

        hostel_details = []

        for hostel in hostels:
            details = {}
            details['Name'] = hostel.find('h2', class_='title').text.strip()
            details['Distance from City Centre'] = hostel.find('span', class_='description').text.strip()
            details['Ratings'] = hostel.find('div', class_='score orange big').text.strip()
            details['Total Reviews'] = hostel.find('div', class_='reviews').text.strip()
            details['Overall Reviews'] = hostel.find('div', class_='keyword').text.strip()
            details['Privates From Price'] = hostel.find('div', class_='price-col').text.strip().split('\n')[0].strip()
            details['Dorms From Price'] = hostel.find('div', class_='price-col').text.strip().split('\n')[1].strip()
            details['Facilities'] = [facility.text.strip() for facility in hostel.find_all('div', class_='facilities')]
            details['Property Description'] = hostel.find('div', class_='ratings').next_sibling.strip()
            
            hostel_details.append(details)

        return hostel_details
    else:
        print('Failed to fetch the webpage.')
        return None

if __name__ == '__main__':
    hostels_data = scrape_hostels_in_london()

    if hostels_data:
        for index, hostel in enumerate(hostels_data, start=1):
            print(f"Hostel {index}:")
            print(f"Name: {hostel['Name']}")
            print(f"Distance from City Centre: {hostel['Distance from City Centre']}")
            print(f"Ratings: {hostel['Ratings']}")
            print(f"Total Reviews: {hostel['Total Reviews']}")
            print(f"Overall Reviews: {hostel['Overall Reviews']}")
            print(f"Privates From Price: {hostel['Privates From Price']}")
            print(f"Dorms From Price: {hostel['Dorms From Price']}")
            print(f"Facilities: {', '.join(hostel['Facilities'])}")
            print(f"Property Description: {hostel['Property Description']}")
            print("\n")
    else:
        print('Scraping failed.')
