# Importing Libraries & Getting The Raw Data

In [132]:
#Single Page
import pandas as pd  # Import the pandas library for data manipulation and analysis. # So, that we can convert this project to csv
import requests  # Import the requests library for sending HTTP requests. # requests is used to get the url data , which is present in the website.
from bs4 import BeautifulSoup  # Import BeautifulSoup from bs4 for parsing HTML and XML documents. # So, that we can scrap our data
import csv  # Import the csv module for handling CSV files
from pprint import pprint  # Import pprint for pretty-printing data structures

# Define headers to mimic a browser request (to avoid being blocked by websites)
headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'
}

url = "https://www.flipkart.com/mobiles-accessories/mobiles/pr?sid=tyy,4io&q=mobile&otracker=categorytree" # we will send http requests to this url

# Send a GET request to the specified Flipkart URL with the headers
page = requests.get(url, headers=headers)  # .get so that we get to know the status code, an we will pass the url

# Print the response object to check the status code of the request
print(page)

# Get the content of the page from the response
fd = page.content

# Pretty-print the first few lines of the page content for inspection
pprint(fd[:500])  # Print only the first 500 characters to avoid too much output
# pprint(fd) # Print all characters in the first page as output

<Response [200]>
(b'<!doctype html><html lang="en"><head><link href="https://rukminim2.flixcart.'
 b'com" rel="preconnect"/><link rel="stylesheet" href="//static-assets-web.flix'
 b'cart.com/fk-p-linchpin-web/fk-cp-zion/css/atlas.chunk.f9cc90.css"/><link rel'
 b'="stylesheet" href="//static-assets-web.flixcart.com/fk-p-linchpin-web/fk-cp'
 b'-zion/css/app_modules.chunk.c48a12.css"/><link rel="stylesheet" href="//stat'
 b'ic-assets-web.flixcart.com/fk-p-linchpin-web/fk-cp-zion/css/app.chunk.f32dbb'
 b'.css"/><meta http-equiv="Content-type" conte')


# Converting Raw Data to HTML File

In [135]:
# Making the raw data into HTML Format -> we get the formatted HTML File
# Parse the HTML content of the page with BeautifulSoup
soup = BeautifulSoup(fd, 'html.parser')
# pprint(soup) # This will give us the full HTML File, but since it'll be large, we'll make it to 500 characters.

# Convert the BeautifulSoup object to a string
soup_str = str(soup)

# Pretty-print the first 500 characters of the parsed HTML content for inspection
pprint(soup_str[:500])

('<!DOCTYPE html>\n'
 '<html lang="en"><head><link href="https://rukminim2.flixcart.com" '
 'rel="preconnect"/><link '
 'href="//static-assets-web.flixcart.com/fk-p-linchpin-web/fk-cp-zion/css/atlas.chunk.f9cc90.css" '
 'rel="stylesheet"/><link '
 'href="//static-assets-web.flixcart.com/fk-p-linchpin-web/fk-cp-zion/css/app_modules.chunk.c48a12.css" '
 'rel="stylesheet"/><link '
 'href="//static-assets-web.flixcart.com/fk-p-linchpin-web/fk-cp-zion/css/app.chunk.f32dbb.css" '
 'rel="stylesheet"/><meta content="text/html; charset=ut')


# Getting the HTML Tags

In [138]:
# Find all 'div' elements with the class 'KzDlHZ' within the parsed HTML content
# (or you can use any other elements like a tag, etc.)
content = soup.find_all('div', class_='KzDlHZ')

# Print the list of found elements
print(content)


[<div class="KzDlHZ">Apple iPhone 15 (Black, 128 GB)</div>, <div class="KzDlHZ">Apple iPhone 15 (Black, 256 GB)</div>, <div class="KzDlHZ">Apple iPhone 15 (Blue, 128 GB)</div>, <div class="KzDlHZ">Apple iPhone 15 (Pink, 128 GB)</div>, <div class="KzDlHZ">Motorola g04s (Concord Black, 64 GB)</div>, <div class="KzDlHZ">Motorola g64 5G (Mint Green, 128 GB)</div>, <div class="KzDlHZ">Apple iPhone 14 Plus (Blue, 128 GB)</div>, <div class="KzDlHZ">POCO M6 5G (Galactic Black, 128 GB)</div>, <div class="KzDlHZ">REDMI 13C (Starfrost White, 128 GB)</div>, <div class="KzDlHZ">Apple iPhone 15 (Blue, 256 GB)</div>, <div class="KzDlHZ">Apple iPhone 15 Plus (Blue, 128 GB)</div>, <div class="KzDlHZ">Apple iPhone 15 (Green, 128 GB)</div>, <div class="KzDlHZ">Motorola Edge 50 Fusion (Marshmallow Blue, 128 GB)</div>, <div class="KzDlHZ">Apple iPhone 14 Plus (Midnight, 128 GB)</div>, <div class="KzDlHZ">REDMI 13C (Stardust Black, 128 GB)</div>, <div class="KzDlHZ">Motorola G34 5G (Charcoal Black, 128 GB)<

# Getting the List of Items in a Dictionary

In [141]:

'''
itemlist =[]
for item in content:
    item_name = item.find('div',class_='_4rR01T')
    item_price = item.find('div',class_='_30jeq3 _1_WHN1')
    item_rate = item.find('div',class_='_3LWZlK')
    if item_rate is not None:
        item_dict = {'name':item_name.text,'price':item_price.text,'rate':item_rate.text}
        itemlist.append(item_dict)
        print(item_dict)
        #print(item_name.text,item_price.text,item_rate.text)
    #print(itemlist)
'''

soup = BeautifulSoup(fd, 'html.parser')  # Parse the HTML content with BeautifulSoup
content = soup.find_all('div', class_='KzDlHZ')  # Find all elements that contain the item name

itemlist = []  # Initialize an empty list to store the extracted item details

for item in content:
    item_name = item.get_text(strip=True)  # Extract and clean the text for the item name
    item_price = item.find_next('div', class_='Nx9bqj _4b5DiR')  # Find the next div with the price class
    item_rate = item.find_next('div', class_='XQDdHH')  # Find the next div with the rating class
    
    # Extract and clean text for price and rating, or use 'N/A' if not found
    item_price_text = item_price.get_text(strip=True) if item_price else 'N/A'
    item_rate_text = item_rate.get_text(strip=True) if item_rate else 'N/A'

    if item_name and item_price_text and item_rate_text:  # Ensure all elements are present
        item_dict = {'Name': item_name, 'Price': item_price_text, 'Rating': item_rate_text}  # Create a dictionary for the item
        itemlist.append(item_dict)  # Add the dictionary to the list
        print(item_dict)  # Print the item dictionary
    else:
        # Print a message if any element is missing
        print(f"Element not found: Name: {item_name}, Price: {item_price_text}, Rate: {item_rate_text}")


{'Name': 'Apple iPhone 15 (Black, 128 GB)', 'Price': '₹65,499', 'Rating': '4.6'}
{'Name': 'Apple iPhone 15 (Black, 256 GB)', 'Price': '₹75,499', 'Rating': '4.6'}
{'Name': 'Apple iPhone 15 (Blue, 128 GB)', 'Price': '₹65,499', 'Rating': '4.6'}
{'Name': 'Apple iPhone 15 (Pink, 128 GB)', 'Price': '₹65,499', 'Rating': '4.6'}
{'Name': 'Motorola g04s (Concord Black, 64 GB)', 'Price': '₹6,999', 'Rating': '4.2'}
{'Name': 'Motorola g64 5G (Mint Green, 128 GB)', 'Price': '₹13,999', 'Rating': '4.2'}
{'Name': 'Apple iPhone 14 Plus (Blue, 128 GB)', 'Price': '₹56,999', 'Rating': '4.6'}
{'Name': 'POCO M6 5G (Galactic Black, 128 GB)', 'Price': '₹9,999', 'Rating': '4.3'}
{'Name': 'REDMI 13C (Starfrost White, 128 GB)', 'Price': '₹7,699', 'Rating': '4.2'}
{'Name': 'Apple iPhone 15 (Blue, 256 GB)', 'Price': '₹75,499', 'Rating': '4.6'}
{'Name': 'Apple iPhone 15 Plus (Blue, 128 GB)', 'Price': '₹74,999', 'Rating': '4.6'}
{'Name': 'Apple iPhone 15 (Green, 128 GB)', 'Price': '₹65,499', 'Rating': '4.6'}
{'Name':

# Saving The Data in CSV i.e XML Format 

In [144]:
file = open('flipkart_data.csv', 'w', encoding='utf-8', newline='')  # Open a new CSV file for writing, with UTF-8 encoding and no extra newlines
# 'w': This mode stands for "write". It opens the file for writing. 
fieldname = ['Name', 'Price', 'Rating']  # Define the column headers for the CSV file

writer = csv.DictWriter(file, fieldnames=fieldname)  # Create a DictWriter object for writing dictionaries to the CSV file
writer.writeheader()  # Write the column headers to the CSV file
writer.writerows(itemlist)  # Write all dictionaries in itemlist to the CSV file
file.flush()  # Flush the internal buffer to ensure all data is written to the file
file.close()  # Close the file to free up system resources

In [146]:
import pandas as pd

In [148]:
pd_dataframe = pd.read_csv("flipkart_data.csv")  # Read the CSV file into a pandas DataFrame
# pd_dataframe.head()  # Display the first few rows of the DataFrame

pd.set_option('display.max_rows', None)  # Show all rows
pd_dataframe  # Display the DataFrame

Unnamed: 0,Name,Price,Rating
0,"Apple iPhone 15 (Black, 128 GB)","₹65,499",4.6
1,"Apple iPhone 15 (Black, 256 GB)","₹75,499",4.6
2,"Apple iPhone 15 (Blue, 128 GB)","₹65,499",4.6
3,"Apple iPhone 15 (Pink, 128 GB)","₹65,499",4.6
4,"Motorola g04s (Concord Black, 64 GB)","₹6,999",4.2
5,"Motorola g64 5G (Mint Green, 128 GB)","₹13,999",4.2
6,"Apple iPhone 14 Plus (Blue, 128 GB)","₹56,999",4.6
7,"POCO M6 5G (Galactic Black, 128 GB)","₹9,999",4.3
8,"REDMI 13C (Starfrost White, 128 GB)","₹7,699",4.2
9,"Apple iPhone 15 (Blue, 256 GB)","₹75,499",4.6
