# Fetch the data into a website(1Mg Homeopathic) by using Selenium and BeautifulSoup

## Table1: medicine_name
`name` - Name of the medicine

`size_of_the_bottle` - Size of the medicine bottle or pack

`MRP_of_the_bottle`  - MRP of the bottle

`price_of_the_bottle`  - Selling price of the bottle

`1mg_url` - 1mg url where the medicine sold

###  >>>> All the data has to be fetched from the website and then stored into a CSV file

In [15]:
import csv
import time
import re
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By



# Set up the WebDriver
driver = webdriver.Chrome()


# Set the URL of the first page
homeopathy_url = 'https://www.1mg.com/categories/homeopathy-57?filter=true&pageNumber=1'
driver.get(url=homeopathy_url)


# Making soup
soup = BeautifulSoup(driver.page_source, "html.parser")




# Remove dropdown
close_popup_button = driver.find_element(By.CLASS_NAME, 'UpdateCityModal__cancel-btn___2jWwS')
close_popup_button.click()
time.sleep(2)


# Create a list to store all the data
all_medicines_data = []


total_number_of_pages = 250



# Iterate through the pages
for page_number in range(1, total_number_of_pages + 1):
    homeopathy_url = f'https://www.1mg.com/categories/homeopathy-57?filter=true&pageNumber={page_number}'
    driver.get(url=homeopathy_url)

    
    # Wait for some time to ensure the page is fully loaded
    time.sleep(2)

    
    # Making soup
    soup = BeautifulSoup(driver.page_source, "html.parser")

    
    blocks = soup.find_all("div", {"class": "style__product-box___liepi"})
    
    
    # Extracting the Name, Size, MRP , Sale price, URL's
    for i in blocks:
        
        name_of_medicine_element = i.find('div', {'class': 'style__pro-title___2QwJy'})
        name_of_medicine = name_of_medicine_element.text.strip() if name_of_medicine_element else 'NA'
        
        
        size_text = i.find('div', {'class': 'style__pack-size___2JQG7'}).text
        # Define a regular expression to extract the volume information
        volume_pattern = re.compile(r'\b(\d+\s*(?:ml|gm|tablets))\b', re.IGNORECASE)
        # Find all matches in the text
        matches = volume_pattern.findall(size_text)
        # Check if matches are present and print volumes or "NA"
        if matches:
            for size in matches:
                size
        else:
            size = "NA"
    

            
        MRP_element = i.find("span", {"class": "style__discount-price___25Bya"})
        MRP = MRP_element.text.strip() if MRP_element else 'NA'

        
        sale_price_element = i.find("div", {"class": "style__price-tag___cOxYc"})
        sale_price = sale_price_element.text.strip() if sale_price_element else 'NA'

        
        url_element = i.find('a', {'class': 'style__product-link___UB_67'})
        url = f'https://www.1mg.com{url_element.get("href")}'

        
        # Store data in the list
        all_medicines_data.append([name_of_medicine, size, MRP, sale_price, url])

        
        
# Create a CSV file and store all the data
csv_file_path = 'all_medicines_data.csv'

with open(csv_file_path, 'w', newline='', encoding='utf-8') as csvfile:
    
    # Create a CSV writer
    csv_writer = csv.writer(csvfile)

    # Write header row
    csv_writer.writerow(['Name', 'Size_of_the_bottle', 'MRP_of_the_bottle', 'Price_of_the_bottle', '1Mg_URL'])

    # Write all the data to the CSV file
    csv_writer.writerows(all_medicines_data)


# Close the WebDriver
driver.quit()    
    



### >>>> According to the data need, we extract as much data from the CSV file and then store into another new CSV file. 

In [4]:
import csv

# Specify the path to your CSV file
csv_file_path = 'all_medicines_data.csv'

# Open the CSV file and count the rows
with open(csv_file_path, 'r', encoding='utf-8') as csvfile:
    csv_reader = csv.reader(csvfile)
    
    # Skip the header row if it exists
    next(csv_reader, None)
    
    # Count the rows
    number_of_rows = len(list(csv_reader))




# Get user input for the desired number of medicines
desired_medicines = int(input(f"How many medicines' details do you want (max {number_of_rows})? "))




# Create a new CSV file with the desired amount of data

new_csv_file_path = f'medicine_name_data_{desired_medicines}.csv'

with open(csv_file_path, 'r', encoding='utf-8') as input_csvfile, \
        open(new_csv_file_path, 'w', newline='', encoding='utf-8') as output_csvfile:
  

    # Create CSV reader and writer
    csv_reader = csv.reader(input_csvfile)
    csv_writer = csv.writer(output_csvfile)

    # Write header row
    csv_writer.writerow(next(csv_reader))

    # Write the desired amount of data to the new CSV file
    for _ in range(desired_medicines):
        csv_writer.writerow(next(csv_reader))

        


# Print a message indicating the process is complete
print(f"Data has been read from '{csv_file_path}' and written to '{new_csv_file_path}'.")
    

How many medicines' details do you want (max 13446)? 1200
Data has been read from 'all_medicines_data.csv' and written to 'medicine_name_data_1200.csv'.
