# Scraping data From Zameen.com

## Step 1: Importing required Libraries.

In [4]:

import requests # Sends the Http requests to get the web page
import pandas as pd # used in last for creating and importing Data Frame
from bs4 import BeautifulSoup #used to parse the Html content of the web page
import time

# Step 2: Inital URL to scrap

In [5]:
current_url = "https://www.zameen.com/Homes/Islamabad_Bahria_Town-383-1.html"
headers = {"User-Agent": "Mozilla/5.0"} # this code mimic as the i am a browser who is requesting for the page to open


# Step 3: make a vraibale that recives the grabed data

In [6]:
# This is the list that store all the extracted data in the variable
data = []

# Step 4: the big and tricky step 

###  Step 4 is explained below with comments for you ease

In [7]:
# Doing the loop as the structure and classes values in pagination is same( Pagiantion means a webpage having multiple pages in it) 
while True:
    # print out the Step wise grabed URL from the previos page one by one
    # but at first  loop is assigned url is the one given in step 2

    print(f"scraping: {current_url}")

    #________________________________________________

    # Here I uses try and except here because during grabing a massave wedata any error can happen.

    try:
        r = requests.get(current_url, headers=headers) # requesting the current page
        soup = BeautifulSoup(r.text, "html.parser") # parsing the current page
        # above for faster parsing you can Use "lxml" in place of "html.parser".

        # Step 1: Get all listings/cards on the page
        listings = soup.find_all("div", class_="d3b6a76b _43afd188")  # calsses may change over time

        # Step 2: Loop through each listing
        for card in listings:
            # here i am using error handling method because if there is no data for any variable below like if no pice mension than what to do
            try:
                price = card.find("span", class_="dc381b54").text.strip()
            except:
                price = None

            try:
                location = card.find("div", class_="db1aca2f").text.strip()
            except:
                location = None

            try:
                added_time = card.find("span", class_="a018d4bd").text.strip().replace("Added: ", "")
            except:
                added_time = None

            try:
                heading = card.find("h2", class_="_36dfb99f").text.strip()
            except:
                heading = None
            
            # This is a very important part
            # The tags and classes are same for bedrooms, bathrooms, area.
            # But they are diffrent only based on the lable assigned data.
            # So I used filter the data with lable

            bedrooms = bathrooms = area = None
            try:
                features = card.find_all("span", class_="_6d9b9b83")
                for feature in features:
                    label = feature.get("aria-label", "").lower()
                    if label == "beds":
                        bedrooms = feature.text.strip()
                    elif label == "baths":
                        bathrooms = feature.text.strip()
                    elif label == "area":
                        area_span = feature.find("span")
                        if area_span:
                            area = area_span.text.strip()
            except:
                pass

            # This is the new short code method I found  to append data to the list so we can easily later convert it to a dataframe
            data.append({
                "Price": price,
                "Address": location,
                "Bedrooms": bedrooms,
                "Bathrooms": bathrooms,
                "Area": area,
                "Heading": heading,
                "Added Time": added_time
            })

        # Step 3: Find and follow the "Next" button ( means find the URL Hidden in the Next button so we cahnge current Url to this url)
        next_btn = soup.find("a", {"title": "Next"})
        if next_btn and next_btn.get("href"):
            # change the old url to the next button url
            current_url = "https://www.zameen.com" + next_btn.get("href")
        else:
            print("\nReached last page.")
            break

        time.sleep(1) # wait for the little amount of time to scrap another link

    except Exception as e:
        print("Error:", e)
        break

scraping: https://www.zameen.com/Homes/Islamabad_Bahria_Town-383-1.html
scraping: https://www.zameen.com/Homes/Islamabad_Bahria_Town-383-2.html
scraping: https://www.zameen.com/Homes/Islamabad_Bahria_Town-383-3.html
scraping: https://www.zameen.com/Homes/Islamabad_Bahria_Town-383-4.html
scraping: https://www.zameen.com/Homes/Islamabad_Bahria_Town-383-5.html
scraping: https://www.zameen.com/Homes/Islamabad_Bahria_Town-383-6.html
scraping: https://www.zameen.com/Homes/Islamabad_Bahria_Town-383-7.html
scraping: https://www.zameen.com/Homes/Islamabad_Bahria_Town-383-8.html
scraping: https://www.zameen.com/Homes/Islamabad_Bahria_Town-383-9.html
scraping: https://www.zameen.com/Homes/Islamabad_Bahria_Town-383-10.html
scraping: https://www.zameen.com/Homes/Islamabad_Bahria_Town-383-11.html
scraping: https://www.zameen.com/Homes/Islamabad_Bahria_Town-383-12.html
scraping: https://www.zameen.com/Homes/Islamabad_Bahria_Town-383-13.html
scraping: https://www.zameen.com/Homes/Islamabad_Bahria_Town

# Step 4: Convert list of Dict to a DataFrame

In [8]:
# As we arranged out data in dict (key: value) data type so just online o fcode will convert it to a dataframe
df = pd.DataFrame(data)
print(df)

           Price                                     Address Bedrooms  \
0      4.6 Crore  Bahria Enclave - Sector C3, Bahria Enclave        5   
1      2.7 Crore                 Bahria Enclave, Bahria Town        3   
2      1.6 Crore                 Bahria Enclave, Bahria Town        2   
3     2.95 Crore                 Bahria Enclave, Bahria Town        3   
4     1.55 Crore                 Bahria Enclave, Bahria Town        2   
...          ...                                         ...      ...   
997   1.85 Crore   Bahria Enclave - Sector I, Bahria Enclave     None   
998   4.15 Crore   Bahria Enclave - Sector M, Bahria Enclave        5   
999    2.1 Crore   Bahria Enclave - Sector N, Bahria Enclave        3   
1000   1.4 Crore   Bahria Enclave - Sector N, Bahria Enclave        3   
1001     5 Crore   Bahria Enclave - Sector N, Bahria Enclave        6   

     Bathrooms        Area                                            Heading  \
0            6    10 Marla  Sector C3 10 M

# Step 5: Save the file

In [None]:
#Save the DataFrame to CSV file
df.to_csv("zameen_bahria_town.csv", index=False)

# OR convert the csv to Excel file
import openpyxl

df.to_excel("zameen_bahria_town.xlsx", index=False)
