# Scraping Office Space Data from MagicBricks.com

In [9]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.action_chains import ActionChains
import json

In [10]:
def scrape_magicbricks(city,city_code):
    driver = webdriver.Chrome()
    url = f'https://www.magicbricks.com/property-for-rent/commercial-real-estate?bedroom=&proptype=Commercial-Office-Space,Office-ITPark-SEZ&cityName={city}'
    driver.get(url)
    wait = WebDriverWait(driver, 10)
    actions = ActionChains(driver)
    
    Area=[]
    Cabins=[]
    Seats=[]
    Price=[]
    Building=[]
    Priceperseat=[]
    Pricepersqft=[]
    Location=[]
    Lockin=[]
    time.sleep(5)
    results=driver.find_element(By.CLASS_NAME, value='mb-srp__title--text1').get_attribute("innerText").replace('results |','')
    print(results)

    soup = BeautifulSoup(driver.page_source, 'lxml')
    spaces=driver.find_elements(By.CLASS_NAME, value='mb-srp__list')[:30]
    spaces2=soup.find_all('div', class_='mb-srp__list')[:30]
    for space,space2 in zip(spaces,spaces2):
        
        cabin_finder = space2.find('div', class_='mb-srp__card__summary-commercial--column')
        if cabin_finder:
            location=space.find_element(By.CLASS_NAME,value='mb-srp__card--title').get_attribute("innerText").replace('Office Space for Rent in','').replace(f'{city}','').strip()
            Location.append(location)
            
            Carpet=space.find_element(By.CLASS_NAME, value='mb-srp__card__summary--value').get_attribute("innerText").replace('sqft','').strip()
            Area.append(Carpet)
            
            cabins=space.find_elements(By.CLASS_NAME, value='mb-srp__card__summary-commercial--value')[0].get_attribute("innerText")
            Cabins.append(cabins)
           
        
            seats=space.find_elements(By.CLASS_NAME, value='mb-srp__card__summary-commercial--value')[1].get_attribute("innerText").replace(',','')
            Seats.append(seats)

            price_found=space2.find('div',class_='mb-srp__card__price--other-charges')
            if price_found:
                
                Pricing_info=space.find_element(By.CLASS_NAME, value='mb-srp__card__price--other-charges')
                actions.move_to_element(Pricing_info).click().perform()
                time.sleep(1)
                try:
                    price=Pricing_info.find_element(By.CLASS_NAME,'row-total').get_attribute("innerText").split('₹')[1].replace(',','')
                    
                    
                    close_button=Pricing_info.find_element(By.CLASS_NAME,'mb-srp__card__tooltip--close')
                    actions.move_to_element(close_button).click().perform()
                    priceperseat=int(price)/int(seats)
                    area_int=Carpet.replace('sqft','').replace(',','').replace('sqyrd','').replace('sqm','').replace('acre','').replace('sqkm','').replace('sqmi','').replace('sqin','').strip()
                    pricepersqft=int(price)/int(area_int)
                    Price.append(price)
                    Pricepersqft.append(pricepersqft)
                    Priceperseat.append(priceperseat)
                except:
                    Price.append("NA")
                    Priceperseat.append("NA")
                    Pricepersqft.append("NA")
            else:
                Price.append("Please Call us")
                Priceperseat.append("NA")
                Pricepersqft.append("NA")
            Building.append("NA")
            Lockin.append("NA")
             
        
    
    for i in range(int(results)//30):
        driver.get(f"https://www.magicbricks.com/mbsrp/propertySearch.html?editSearch=Y&category=R&propertyType=10007,10018&city={city_code}&page={i+2}&groupstart={30*i}&offset=0&maxOffset=270&sortBy=premiumRecent&postedSince=-1&pType=10007,10018&isNRI=N&multiLang=en")
        lisitng_tag=driver.find_element(By.TAG_NAME,'pre').get_attribute("innerText")
        listing_info=json.loads(lisitng_tag)
        
        for j in range(30):
            try:
                price=listing_info['resultList'][j]['price']
                Price.append(price)
            except:
                Price.append("NA")

            try:
                cabins=listing_info['resultList'][j]['pmtTCabin']
                Cabins.append(cabins)
            except:
                Cabins.append("NA")

            try:
                seats=listing_info['resultList'][j]['totalSeat']
                Seats.append(seats)
            except:
                Seats.append("NA")

            try:
                location=listing_info['resultList'][j]['lmtDName']
                Location.append(location)
            except:
                Location.append("NA")

            try:
                building=listing_info['resultList'][j]['buildingName']
                Building.append(building)
            except:
                Building.append("NA")

            try:
                pricepersqft=listing_info['resultList'][j]['sqFtPrice']
                Pricepersqft.append(pricepersqft)
            except:
                Pricepersqft.append("NA")
            try:
                if Seats[-1]!="NA":
                    
                    priceperseat=int(price)/int(seats)
                    Priceperseat.append(priceperseat)
                else:
                    Priceperseat.append("NA")
            except:
                Priceperseat.append("NA")

            try:
                lockin=listing_info['resultList'][j]['pmtLockPeriod']
                Lockin.append(lockin)
            except:
                Lockin.append("NA")

            try:
                area=listing_info['resultList'][j]['caSqFt']
                Area.append(area)
            except:
                Area.append("NA")
       
                
    df = pd.DataFrame({'Location': Location,'Building':Building,'Area(sqft)': Area,'Cabins': Cabins,'Seats':Seats,'Lockin':Lockin,'Price':Price,'Price/Seat':Priceperseat,'Price/Sqft':Pricepersqft})     
    return df 
    

def scrape_magicbricks_Bangalore_To_Excel(writer,df):
    df.to_excel(writer,sheet_name='Magicbricks Bangalore')
def scrape_magicbricks_Hyderabad_To_Excel(writer,df):
    df.to_excel(writer,sheet_name='Magicbricks Hyderabad')





In [11]:
city_code_bangalore=3327
city_code_Hyderabad=2060
df1=scrape_magicbricks("Bangalore",city_code_bangalore)
df2=scrape_magicbricks("Bangalore",city_code_Hyderabad)

with pd.ExcelWriter('Office_Spaces.xlsx') as writer:
    scrape_magicbricks_Bangalore_To_Excel(writer,df1)

with pd.ExcelWriter('Office_Spaces.xlsx') as writer:
    scrape_magicbricks_Hyderabad_To_Excel(writer,df2)
    
