## Web Scraping Real Estate



---
We will be scraping the real estate data from **Makaan.com**  for the 
properties listed for sale in hyderabad. We will scrape the first 500 pages of the listing.

Libaries used :
 * Pandas
 * Requests
 * BeautifulSoup
 * Time
 * Random

In [0]:
#import pandas, requests, beautifulsoup, time and random

import pandas as pd 
import requests
from bs4 import BeautifulSoup
from time import time   
from time import sleep 
from IPython.core.display import clear_output
from random import randint

In [2]:
Agent = []                    #Name of the agent
BHK_Apartment = []            #Number of BHK
Price_in_lakhs = []           #Price of the apartment in lakhs
Per_square_ft_price = []      #Price per square feet
Apartment_size = []           #Size of the aprtment
Status = []                   #Status of the apar


#monitoring the loop
start_time = time()
request = 0

#crawl through first 500 pages
for pages in range(1,501):
  response = requests.get('https://www.makaan.com/hyderabad-residential-property/buy-property-in-hyderabad-city?budget=,&page=' + str(pages))

  #pause and wait for 3-6 sec
  sleep(randint(3,6))
  
  #Monitor the request
  request += 1
  elapsed_time = time() - start_time
  print('Request:{}; Frequency: {} requests_per_sec'.format(request, request/elapsed_time))
  clear_output(wait = True)

  #pass html parser and find all the 'li' tags
  soup = BeautifulSoup(response.text,'html.parser')
  property_list = soup.find_all('li',class_='cardholder')

  #iterate through all the list
  for lst in property_list:

    #if proerty has seller info
    if lst.find('div', class_ = 'seller-info') is not None:

      #find name of the seller and append it ine Agent list
      name = lst.a.text
      Agent.append(name)

      #find number of bhk and append it in BHK_Apartment
      apartment = lst.find('div',class_='title-line').span.text
      BHK_Apartment.append(apartment)

      #Find price of the apartment and append it in Price in lakhs
      price = lst.tbody.div.span.text
      Price_in_lakhs.append(price)

      #Find price per square feet and append it in per_square_ft_price
      sqt_price = lst.find('td',class_='lbl rate').text
      Per_square_ft_price.append(sqt_price)

      #find size of the apartment and append it in Apartment_size
      a_size = lst.find('td',class_ = 'size').span.text
      Apartment_size.append(a_size)

      #find the status of the apartment and append it in Status
      stat = lst.find('td',class_ = 'val').text 
      Status.append(stat)


Request:500; Frequency: 0.16616523715358814 requests_per_sec


In [5]:
#create a dataframe with column names ---> 
  #Agent -----> Name of the agent
  #BHK_Apartment ------> Number of BHK
  #Price_in_lakhs -----> Price of the apartment in lakhs
  #Per_square_ft_price ---> Price per square feet
  #Apartment_Size ------> Size of the aprtment
  #Status ------------> Status of the apartment (New,Re-sale,Ready to move etc)

Hyderabad_apartments = pd.DataFrame({'Agent' : Agent, 'BHK_Apartment' : BHK_Apartment,'Price_in_lakhs' : Price_in_lakhs,
                                     'Per_Square_ft_price' : Per_square_ft_price, 'Aprtment_Size' : Apartment_size, 'Status' : Status})
Hyderabad_apartments.info() 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 6 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Agent                10000 non-null  object
 1   BHK_Apartment        10000 non-null  object
 2   Price_in_lakhs       10000 non-null  object
 3   Per_Square_ft_price  10000 non-null  object
 4   Aprtment_Size        10000 non-null  object
 5   Status               10000 non-null  object
dtypes: object(6)
memory usage: 468.9+ KB


In [6]:
#Print Head
Hyderabad_apartments.head(10)

Unnamed: 0,Agent,BHK_Apartment,Price_in_lakhs,Per_Square_ft_price,Aprtment_Size,Status
0,CMG Builders,4,3.12,"6,800 / sq ft",4600,Under Construction
1,Giridhari Homes,3,84.04,"4,600 / sq ft",1827,Under Construction
2,SS Green Projects,3,1.65,"6,899 / sq ft",2405,Ready to move
3,Accurate Developers,3,1.03,"6,290 / sq ft",1645,Under Construction
4,Nikhila Constructions And Developers,3,59.99,"4,002 / sq ft",1500,Under Construction
5,Jd Real Estate,Residential Plot,58.46,"3,400 / sq ft",1720,New
6,Om Sree Builders Developers,4,1.93,"5,100 / sq ft",3795,Under Construction
7,Vishal Projects Limited,4,2.6,"6,133 / sq ft",4252,Under Construction
8,Maruthi Developers,2,62.98,"4,700 / sq ft",1340,Ready to move
9,Risinia Builders,2,56.98,"4,400 / sq ft",1295,Under Construction


In [0]:
#download the data in local system
Hyderabad_apartments.to_csv('Hyderabad_Apartments.csv')
!cp Hyderabad_Apartments.csv '/content/drive/My Drive/data_s'