In [None]:
!pip install requests beautifulsoup4

In [None]:
!pip install selenium

In [None]:
## Solution - 1 and 2

import requests
from bs4 import BeautifulSoup
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

def products(product):
    base_url = f"https://www.amazon.in/s?k={product.replace(' ', '+')}"
    products = []
    
    # Loop through the first 3 pages of search results
    for page in range(1, 4):
        url = f"{base_url}&page={page}"
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
     
        product_containers = soup.find_all('div', {'data-component-type': 's-search-result'})
        
        for container in product_containers:
            
            # While running the code first time, it got to know that, the brand name is the first word of product name
            # thus the column for brand name in the data frmae will be added at the time of creating dataframe
            
            # Extract the product name
            name = container.find('h2', {'class': 'a-size-mini'})
            product_name = name.text.strip() if name else '-'
            
            # Extract the price
            price = container.find('span', {'class': 'a-price-whole'})
            product_price = price.text.strip() if price else '-'
            
            # Extract other details
            details = container.find_all('span', {'class': 'a-size-small'})
            return_exchange = details[0].text.strip() if details else '-'
            expected_delivery = details[1].text.strip() if len(details) > 1 else '-'
            availability = details[2].text.strip() if len(details) > 2 else '-'
            
            # Extract the product URL
            product_url = container.find('a', {'class': 'a-link-normal'})
            product_link = f"https://www.amazon.in{product_url['href']}" if product_url else '-'
            
            # Append the product details to the list
            products.append({
                #'Brand Name': brand_name,
                'Name of the Product': product_name,
                'Price': product_price,
                'Return/Exchange': return_exchange,
                'Expected Delivery': expected_delivery,
                'Availability': availability,
                'Product URL': product_link
            })
    
    # DataFrame for the scraped data
    df = pd.DataFrame(products)
    df['Brand Name'] = pd.Series()#df['Name of the Product'].str.split().str[0]
    
    # Save the DataFrame as a CSV file
    df.to_csv('Scraped_Amazon_Products.csv', index=False)
    
    return df


# Get the user input for the product to search
search = input("Enter the product name to search on Amazon: ")

# Call the function to scrape and save the product details
search_results_df = products(search)
search_results_df['Brand Name'] = search_results_df['Name of the Product'].str.split().str[0]

# Brand name should be the first column, thus re-ordering the columns
new_order = ['Brand Name', 'Name of the Product', 'Price', 'Return/Exchange', 'Expected Delivery', 'Availability', 'Product URL']

# Reorder the columns in the DataFrame
search_results_df = search_results_df[new_order]

# Display the DataFrame
search_results_df

Enter the product name to search on Amazon: guitar


Unnamed: 0,Brand Name,Name of the Product,Price,Return/Exchange,Expected Delivery,Availability,Product URL
0,Intern,"Intern INT-38C Right hand Acoustic Guitar Kit,...",2233,-,-,-,https://www.amazon.in/Intern-INT-38C-Acoustic-...
1,Yamaha,"Yamaha F280 Acoustic Rosewood Guitar (Natural,...",7200,-,-,-,https://www.amazon.in/Yamaha-F280-Acoustic-Gui...
2,Juarez,"Juarez Lindenwood Acoustic Guitar Kit, 38 Inch...",1999,-,-,-,https://www.amazon.in/JUAREZ-JRZ38C-Acoustic-S...
3,Juarez,"Juarez Lindenwood Acoustic Guitar Kit, 38 Inch...",1999,-,-,-,https://www.amazon.in/JUAREZ-JRZ38C-Acoustic-S...
4,Henrix,Henrix 38C 38 Inch Cutaway Acoustic Guitar Wit...,2999,-,-,-,https://www.amazon.in/gp/bestsellers/musical-i...
...,...,...,...,...,...,...,...
91,Medellin,Medellin M38 carbon fiber body 38 Incheses Aco...,2349,-,-,-,https://www.amazon.in/Medellin-Acoustic-Guitar...
92,Fender,Fender Acoustic Guitar with Cutaway Electronic...,25299,-,-,-,https://www.amazon.in/FENDER-CD60SCE-NAT-Dread...
93,Yamaha,Yamaha FSX80C Semi acoustic cutaway guitar (Bl...,11990,-,-,-,https://www.amazon.in/Yamaha-FSX80C-Acoustic-C...
94,Juarez,"Juarez Acoustic Lindenwood Guitar, 38 Inches C...",2650,-,-,-,https://www.amazon.in/Acoustic-Cutaway-RDS-Str...


In [None]:
## Solution - 3

import os
import time
import urllib


# Create a directory to store the downloaded images
output_directory = r"C:\Users\Ritwik Sinha\Desktop\Ritwik\Flip_Robo_Technologies\File - 5\images"
os.makedirs(output_directory, exist_ok=True)


keywords = ["fruits", "cars", "Machine Learning", "Guitar", "Cakes"]

for search_word in keywords:
    
    # Set up Selenium webdriver
    driver = webdriver.Chrome(r"C:\Users\Ritwik Sinha\Desktop\Ritwik\Flip_Robo_Technologies\File - 5\chromedriver.exe")

    # Open images.google.com
    driver.get('https://images.google.com/')

    search_bar_entry = driver.find_element(By.CLASS_NAME,"gLFyf")
    search_bar_entry.send_keys(search_word)
    search_button = driver.find_element(By.XPATH, "/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/button/div/span")
    search_button.click()

    images = driver.find_elements(By.XPATH, '//img[@clas="rg_i Q4LuWd"]')
    
    # Locate the image elements and extract the image URLs
    image_elements = driver.find_elements(By.CSS_SELECTOR, 'img.rg_i')
    image_urls = [element.get_attribute('src') for element in image_elements]
    
    # Download the images
    for i in range(10):
        try:
            urllib.request.urlretrieve(image_urls[i], f'{output_directory}/image_{search_word}_{i+1}.jpg')
            print(f'Successfully downloaded {search_word} image {i+1}')
        except Exception as e:
            print(f'Failed to download image {i+1}: {str(e)}')
            
    # Close the browser
    driver.quit()

Successfully downloaded fruits image 1
Successfully downloaded fruits image 2
Successfully downloaded fruits image 3
Successfully downloaded fruits image 4
Successfully downloaded fruits image 5
Successfully downloaded fruits image 6
Successfully downloaded fruits image 7
Successfully downloaded fruits image 8
Successfully downloaded fruits image 9
Successfully downloaded fruits image 10
Successfully downloaded cars image 1
Successfully downloaded cars image 2
Successfully downloaded cars image 3
Successfully downloaded cars image 4
Successfully downloaded cars image 5
Successfully downloaded cars image 6
Successfully downloaded cars image 7
Successfully downloaded cars image 8
Successfully downloaded cars image 9
Successfully downloaded cars image 10
Successfully downloaded Machine Learning image 1
Successfully downloaded Machine Learning image 2
Successfully downloaded Machine Learning image 3
Successfully downloaded Machine Learning image 4
Successfully downloaded Machine Learning i

In [None]:
## Solution - 8

import requests
from bs4 import BeautifulSoup
import pandas as pd

# Define the URL and fetch the web page content

url = 'https://www.forbes.com/billionaires/'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

# Find all the billionaires rows in the HTML table and extract relevant data
billionaire_rows = soup.find_all('div', class_='TableRow_row__L-0Km TableRow_activeRow__g4oSF')
billionaire_data = []
for row in billionaire_rows:
    rank = div.row.find('div', class_='Table_rank___YBhk Table_dataCell__2QCve').text.strip()
    name = div.row.find('div', class_='Table_dataCell__2QCve').text.strip()
    billionaire_data.append({'Rank': rank, 'Name': name})

# Create a Pandas dataframe from the extracted movie data
billionaire_df = pd.DataFrame(billionaire_data)
print(billionaire_df)

Empty DataFrame
Columns: []
Index: []


In [None]:
## Solution - 9

"""
YouTube's terms of service and API guidelines prohibit scraping or extracting data from their platform in an automated manner.
"""