In [1]:
# Importing necessary libraries
from bs4 import BeautifulSoup
import requests
import pandas as pd
import time

In [2]:
# Define functions to extract information from the web page

def get_productName(soup):
    try:
        product = soup.find('span',class_='css-cmh3n9').text.strip()
    except:
        product=""
    return product

def get_OrignalPrice(soup):
    try:
        orignalPrice = soup.find('span',class_='css-5pw8k6').text.strip()
    except:
        orignalPrice= get_DiscountPrice(soup)
    return orignalPrice

def get_DiscountPrice(soup):
    try:
        discountPrice = soup.find('span',class_='css-1byl9fj').text.replace('₹',"").strip()
    except:
        discountPrice="NaN"
    return discountPrice

def get_Rating(soup):
    try:
        rating = soup.find('div',class_='css-xoezkq').text.strip()
    except:
        rating="NaN"
    return rating

def get_numRate(soup):
    try:
        noUserRating = soup.find('div', class_='css-xoezkq').text.strip()
    except:
        noUserRating = 0
    return noUserRating


def get_productDetails(soup):
    try:
        details={}
        divElements = soup.find_all('div', class_='css-134y3ft')
        for div in divElements:
            pElements =div.find_all('p')
            if len(pElements) == 2:
                key =pElements[0].text.strip()
                value=pElements[1].text.strip()
                details[key] =value
    except:
        pass
    return details

def get_vendorDetails(soup):
    try:
        vendor={}
        divElements = soup.find_all('div', class_='css-1g1kvky')
        for div in divElements:
            pElements =div.find_all('div')
            if len(pElements) == 2:
                key =pElements[0].text.strip()
                value=pElements[1].text.strip()
                vendor[key] =value
    except:
        pass
    return vendor


In [3]:
if __name__ == '__main__':
    
    
    # Initialize a dictionary to store data
    file ={'Product':[] , 'OrignalPrice':[] , 
           'DiscountPrice':[] ,'Rating':[] ,
           'NumUserRating':[],'Occasion':[],
           'Material':[],'Pattern':[],'Closure Type':[],
           'Fit':[],'Sleeve Type':[],'Type':[],
           'Rise Style':[],'Neckline Type':[],
           'Sold By':[], 'Country of Origin':[] }
    
    # Loop through multiple pages of the website
    for page_number in range(1,100):
        url =f'https://www.nykaafashion.com/women/westernwear/c/3?p={page_number}'
        
         # Define headers to mimic a web browser
        headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36", "Accept-Encoding":"gzip, deflate", "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "DNT":"1","Connection":"close", "Upgrade-Insecure-Requests":"1"}
        
        try: 
            start_time =time.time()
            # Send a GET request to the URL
            response = requests.get(url, headers = headers)
            soups = BeautifulSoup(response.content , 'html.parser')
            soup = BeautifulSoup(soups.prettify() , 'html.parser')
            
            # Find links to individual product pages
            links = soup.find_all("a", attrs={'class':'css-1t10dtm'})
            links_list=[]
            for l in links:
                urlLink ='https://www.nykaafashion.com/'+l.get('href')
                links_list.append(urlLink)

            # Loop through each product page and extract information
            for link in links_list:
                source = requests.get(link,headers = headers)

                soups = BeautifulSoup(source.content , 'html.parser')
                new_soup = BeautifulSoup(soups.prettify() , 'html.parser')

                file['Product'].append(get_productName(new_soup))
                file['OrignalPrice'].append(get_OrignalPrice(new_soup))
                file['DiscountPrice'].append(get_DiscountPrice(new_soup))
                file['Rating'].append(get_Rating(new_soup))
                file['NumUserRating'].append(get_numRate(new_soup))

                product_details =get_productDetails(new_soup)
                file['Occasion'].append(product_details.get('Occasion', 'N/A'))
                file['Material'].append(product_details.get('Material', 'N/A'))
                file['Pattern'].append(product_details.get('Pattern', 'N/A'))
                file['Closure Type'].append(product_details.get('Closure Type', 'N/A'))
                file['Fit'].append(product_details.get('Fit', 'N/A'))
                file['Sleeve Type'].append(product_details.get('Sleeve Type', 'N/A'))
                file['Type'].append(product_details.get('Type', 'N/A'))
                file['Rise Style'].append(product_details.get('Rise Style', 'N/A'))
                file['Neckline Type'].append(product_details.get('Neckline Type', 'N/A'))


                vendor_details =get_vendorDetails(new_soup)
                file['Sold By'].append(vendor_details.get('Sold By', 'N/A'))
                file['Country of Origin'].append(vendor_details.get('Country of Origin', 'N/A'))

                # Sleep for a while to avoid overloading the server
                time.sleep(1)
#             print(time.time()-start_time)

        except Exception as e:
            print(f'Error: {page_number} , str{e}')
    
    # Create a DataFrame from the dictionary and save it to a CSV file        
    df =pd.DataFrame(data =file)
    df.to_csv(r"C:\Users\Dell\Desktop\Shivani_jupyter\NykaaScrap.csv",sep =',',encoding ='UTF8')



93.51881837844849
96.26448631286621
94.05845165252686
101.66101956367493
86.47212600708008
93.08534741401672
90.45357298851013
53.56096410751343
93.72577953338623
91.31123089790344
90.168630361557
99.78310251235962
89.36498856544495
90.72087240219116
88.96973538398743
90.39789462089539
86.35625910758972
85.57643866539001
93.19184136390686
90.22363948822021
89.64612793922424
90.78181409835815
45.65208601951599
91.84605932235718
89.38432598114014
98.43163704872131
85.32168412208557
97.82492065429688
90.81940484046936
99.35384106636047
95.40117692947388
89.05250144004822
98.96961331367493
101.50494480133057
105.36716651916504
100.04395270347595
93.02995896339417
52.0618531703949
93.54885077476501
87.81505846977234
77.8949019908905
89.76693749427795
88.76095199584961
83.64707064628601
94.11026668548584
97.0020215511322
88.37360000610352
105.24247670173645
90.69108867645264
84.0130398273468
95.51460242271423
93.39480328559875
50.90128207206726
99.33986449241638
93.00239491462708
94.45858979