# Scraping Data from Top Coworking Space Websites

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


def scrape_innov8(city):
    url=f'https://www.innov8.work/coworking-space/{city}/'
    response = requests.get(url)
    name=[]
    location=[]
    solutions=[]
    price=[]
    by=[]
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        spaces = soup.find_all('div', class_='ListingCard__without-map-container')
        for space in spaces:
            Name = space.find('h2', class_='property__name').text.strip()
            name.append(Name)
           
            Location = space.find('div', class_='property__address').text.strip()
            location.append(Location)
            
            Solutions =[solution.text for solution in space.find_all('div', class_='property__productName')]
            for solution in Solutions:
                solutions.append(solution)

            Prices=[price.text.replace('₹','') for price in space.find_all('div', class_='property__productPrice') ]
            for Price in Prices:
                
                if Price=="NA":
                    price.append("NA")
                    by.append("NA")
                    continue
                new_Price_list=Price.split('/')
                price.append(new_Price_list[0])
                by.append(new_Price_list[1])
            for i in range(len(Prices)-1):
                location.append(Location)
                name.append(Name)

        df = pd.DataFrame({'Name': name, 'Location': location, 'Solutions': solutions,'Price':price,'Per':by})
          
    else:
        print(f'Failed to retrieve data from {url}')

    return(df)

def scrape_wework(city):
    url=f'https://wework.co.in/{city}'
    response = requests.get(url)
    name=[]
    location=[]
    amenities=[]
    Price=[]
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        spaces = soup.find_all('div', class_='MuiGrid-root MuiGrid-container MuiGrid-item MuiGrid-grid-xs-12 MuiGrid-grid-md-5.93 MuiGrid-grid-lg-5.93 card_container bau css-1exchbi')
        for space in spaces:
            Name = space.p.a.text.strip()
            name.append(Name)
           
            
            Location = space.find('p', class_='street').text.strip()
            location.append(Location)
            
            Amenities =[amenity.text for amenity in space.find_all('p', class_='aminity_name')]
            Amenities_str=','.join(Amenities)
            amenities.append(Amenities_str)

            Private_Office_url=space.find('a', class_='external_link')['href']
            response2=requests.get(Private_Office_url)
            soup2=BeautifulSoup(response2.content,'html.parser')
            Private_Office_Cost=soup2.find('span', class_='price')
            if Private_Office_Cost:
                Private_Office_Cost_Sliced=Private_Office_Cost.text[slice(16,22)]
                Price.append(Private_Office_Cost_Sliced.strip())
            else:
                Price.append('NA')
        df = pd.DataFrame({'Name': name, 'Location': location, 'Amenities': amenities,'Price/Desk/Month':Price})
          
    else:
        print(f'Failed to retrieve data from {url}')

    return(df)

            
def scrape_91springboard(city):
    url = f'https://www.91springboard.com/{city}/'
    response = requests.get(url)
    Locations=[]
    connectivity=[]
    solution_list=[]
    Price=[]
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        spaces = soup.find_all('div', class_='col-12 col-md-4 py-3')
        for space in spaces:
            solutions=[]
            location = space.find('h4', class_='card-title').text.strip()
            Locations.append(location)
            
            Connectivity = space.find('p', class_='card-text f-16 mb-1').text.strip()
            connectivity.append(Connectivity)

            Solutions=[Solution.text.replace('₹', ':₹') for Solution in space.find('table', class_='table mb-1').find_all('tr')]
            
            for i in range(len(Solutions)-1):
                Locations.append(location)
                connectivity.append(Connectivity)
            for Solution in Solutions:
                price=Solution.split('₹')
                solution_list.append(price[0])  
                Price.append(price[1])
            
            Other_Amenities_url=space.a['href']
            response2=requests.get(Other_Amenities_url)
            soup2=BeautifulSoup(response2.content,'html.parser')
            Other_Amenities=soup2.find_all('p', class_='pt-3')
            #print("Other Amenities:")
            #for Others in Other_Amenities:
                #print(Others.text)


        df = pd.DataFrame({'Location': Locations,'Connectivity': connectivity,'Solutions': solution_list,'Price':Price})
        
    else:
        print(f'Failed to retrieve data from {url}')
    return(df)

def scrape_regus(city):
    Name=[]
    Location=[]
    Solution=[]
    Price=[]
    driver = webdriver.Chrome()
    driver.get("https://www.regus.com/en-gb")
    driver.implicitly_wait(10)
    try:
        driver.find_element(By.CLASS_NAME, value='ot-pc-refuse-all-handler').click()
    except:
        print("skipping")
    city_input=driver.find_element(By.ID, value='q')
    city_input.send_keys(city)
    time.sleep(2)
    driver.find_element(By.XPATH,"/html/body/div[1]/section[1]/div/div[2]/div[2]/div/div/div/form/div/div/div/div[2]/button").click()
    time.sleep(10)
    driver.find_element(By.CLASS_NAME,value='rtl-1gd1xfc-loadMoreButton').click()
    spaces=driver.find_elements(By.CLASS_NAME, value='rtl-1qtfy82-cardWrapper')
    for space in spaces:
        name=space.find_element(By.CLASS_NAME, value='rtl-yuhblz-cardTitle').get_attribute("innerText").strip()
        Name.append(name)
        
        location=space.find_element(By.CLASS_NAME, value='rtl-o9z7ux-cardAddress').get_attribute("innerText").strip()
        Location.append(location)
        
        solution=space.find_element(By.CLASS_NAME, value='rtl-4pm1ro-solutionItemName').get_attribute("innerText").strip()
        Solution.append(solution)
        
        price=space.find_element(By.CLASS_NAME, value='rtl-1v41wj9-solutionItemPrice').get_attribute("innerText")
        sliced_price=price[9:-4].strip()
        if sliced_price=='l':
            sliced_price='NA'
        Price.append(sliced_price)
    
    df = pd.DataFrame({'Name': Name,'Location': Location,'Solutions': Solution,'Price':Price}) 
          
    

    return(df)


def scrape_Awfis(city):
    Name=[]
    Name2=[]
    Location=[]
    Location2=[]
    Price_main=[]
    Amenities_main=[]
    Amenities_main2=[]
    Price_main2=[]
    Seats=[]
    driver = webdriver.Chrome()
    driver.get("https://www.awfis.com")
    
    driver.implicitly_wait(10)
    driver.find_element(By.ID, value='location').click()
    driver.find_element(By.LINK_TEXT, f"Coworking space in {city}").click();
    
    names=driver.find_elements(By.CSS_SELECTOR, "a.font-txt-white")
    names_cost=driver.find_elements(By.ID,"6-seater")
    for name,nc in zip(names,names_cost):
        new_cost=nc.find_element(By.TAG_NAME,"h3")
        names_cost_stripped=new_cost.get_attribute("innerText").replace('Rs.','').replace(',','').strip()
        if int(names_cost_stripped)<3000:
            continue
        Name.append(name.get_attribute("innerText"))
 

    for i in range(len(Name)):
        Price=[]
        Amenities=[]
        
        Individual_Space=Name[i]
        driver.find_element(By.LINK_TEXT, f"{Individual_Space}").click() 
        
        Location_Parent=driver.find_element(By.CSS_SELECTOR, "div.title-holder")
        location=Location_Parent.find_element(By.TAG_NAME,'h4')
        Location.append(location.get_attribute("innerText"))
        
        amenities=driver.find_elements(By.CSS_SELECTOR,"a.font-14") 
        
        for amenity in amenities :
            if amenity.get_attribute("innerText") =="Cabins" or amenity.get_attribute("innerText")=="Fixed Desks":
                Amenities.append(amenity.get_attribute("innerText"))
        for amenity in Amenities:
            if amenity =="Cabins" or amenity=="Fixed Desks":
                current_amenity=driver.find_element(By.LINK_TEXT,f"{amenity}")
                if len(Amenities)>1:
                    current_amenity.click()
                price=driver.find_element(By.CSS_SELECTOR,"h3.price").get_attribute("innerText").replace('Rs.','')
                Price.append(price)
                try:
                    seats=[seat.get_attribute("innerText") for seat in driver.find_elements(By.CLASS_NAME,"seater-click")]
                    Seats_str=','.join(seats)
                    Seats.append(Seats_str)
                except:
                    Seats.append('NA')
        time.sleep(10)
        
        if i==0:
            driver.find_element(By.ID, value='zs-tip-close').click()
            
        driver.find_element(By.ID, value='location').click()
        driver.find_element(By.LINK_TEXT, f"Coworking space in {city}").click();
        Price_main.append(Price)
        Amenities_main.append(Amenities)
        
        for j in range(len(Amenities)):
            Name2.append(Name[i])
            Location2.append(Location[i])
            Price_main2.append(Price_main[i][j])
            Amenities_main2.append(Amenities_main[i][j])

    df = pd.DataFrame({'Name': Name2,'Location': Location2,'Solutions': Amenities_main2,'Price':Price_main2,'Seats' : Seats})  
            
    return(df)

def scrape_91springboard_Bangalore_To_Excel(writer,df):
    df.to_excel(writer,sheet_name='91Springboard Bangalore')
    
def scrape_wework_Bangalore_To_Excel(writer,df):
    df.to_excel(writer,sheet_name='Wework Bangalore')

def scrape_Awfis_Bangalore_To_Excel(writer,df):
    df.to_excel(writer,sheet_name='Awfis Bangalore')
    
def scrape_91springboard_Hyderabad_To_Excel(writer,df):
    df.to_excel(writer,sheet_name='91Springboard Hyderabad')
    
def scrape_wework_Hyderabad_To_Excel(writer,df):
    df.to_excel(writer,sheet_name='Wework Hyderabad')

def scrape_Awfis_Hyderabad_To_Excel(writer,df):
    df.to_excel(writer,sheet_name='Awfis Hyderabad')

def scrape_regus_Bangalore_To_Excel(writer,df):
    df.to_excel(writer,sheet_name='Regus Bangalore')

def scrape_innov8_Bangalore_To_Excel(writer,df)  :
    df.to_excel(writer,sheet_name='innov8 Bangalore')

def scrape_regus_Hyderabad_To_Excel(writer,df):
    df.to_excel(writer,sheet_name='Regus Hyderabad')

def scrape_innov8_Hyderabad_To_Excel(writer,df)  :
    df.to_excel(writer,sheet_name='innov8 Hyderabad')
    



In [9]:
df1=scrape_91springboard('bengaluru')
df2=scrape_wework('bangalore')
df3=scrape_91springboard('hyderabad')
df4=scrape_wework('hyderabad')
df7=scrape_Awfis('Bengaluru')
df8=scrape_Awfis('Hyderabad')
df9=scrape_regus('Bangalore')
df10=scrape_regus("Hyderabad")
df11=scrape_innov8("Bangalore")
df12=scrape_innov8('Hyderabad')

with pd.ExcelWriter('Cowork_Spaces.xlsx') as writer:
    scrape_91springboard_Bangalore_To_Excel(writer,df1)
    scrape_wework_Bangalore_To_Excel(writer,df2)
    scrape_Awfis_Bangalore_To_Excel(writer,df7)
    scrape_91springboard_Hyderabad_To_Excel(writer,df3)
    scrape_Awfis_Hyderabad_To_Excel(writer,df8)
    scrape_wework_Hyderabad_To_Excel(writer,df4)
    scrape_regus_Bangalore_To_Excel(writer,df9)
    scrape_innov8_Bangalore_To_Excel(writer,df11)  
    scrape_regus_Hyderabad_To_Excel(writer,df10)
    scrape_innov8_Hyderabad_To_Excel(writer,df12)