# Web Scrap apartments.com for house rents using Selenium
### Selenium
#### Advantages: 
browser automation (clicking a button, ...)

#### Disavantages: 
slow, small amount of data, element staled exception, more fragile when selecting an element

In [None]:
# import webdriver
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time

# pandas
import pandas as pd


class RentsScraper:
    # our scraper uses the below link 
    urls = [
        'https://www.apartments.com/austin-tx/student-housing/',
        'https://www.apartments.com/saint-louis-mo/student-housing/',
        'https://www.apartments.com/minneapolis-mn/student-housing/',
        'https://www.apartments.com/tampa-fl/student-housing/',
        'https://www.apartments.com/new-york-ny/student-housing/'
    ]
    
    # a list of locations where the rents are located
    locations = ["austin-tx", 
                 "saint-louis-mo",
                 "minneapolis-mn", 
                 "tampa-fl",
                 "new-york-ny"]
    
    # indicate the index of the url list - where we are currently at
    index = 0
    
    # data
    data = []
    
    current_url = ""
    
    def __init__(self):
        # create webdriver object, open the firefox browser
        self.driver = webdriver.Firefox()
        
        # explicit wait object
        self.wait = WebDriverWait(self.driver, 20)
       
    
    def crawl(self):
        if (self.index<len(self.urls)):
            # get apartments.com, open the url 
            self.driver.get(self.urls[self.index]) 
            
            # parse the data
            self.parse()
            
            # update the index, and continue crawling
            self.index+=1
            
            # continue crawl to the next url in the urls list
            self.crawl()
        
        else:
            # save the data to the csv file
            pd.DataFrame(self.data, columns=["locations", "prices", "beds"]).to_csv("rents.csv")
            
        
    def parse(self):        
        print(self.current_url)
        
         # wait
        try:
            self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, "property-pricing")))
        except :
            return
        
        # get all prices in the page
        prices = [rent.text for rent in self.driver.find_elements_by_class_name("property-pricing")]
        
        # get all bed numbers in the page
        beds = [rent.text for rent in self.driver.find_elements_by_class_name("property-beds")]
        
        # locations of the rents
        locations = [self.locations[self.index]]*len(prices)
        
        # create rent records
        rent_tuples = list(zip(locations, prices, beds))
        
        # add to our data repo
        self.data.extend(rent_tuples)
        
        # goes to the next page, if any
        self.next_page()
        
    def next_page(self):
        # wait
        try:
            self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "a.next")))
        except:
            return
        
        # find the button element
        next_button = self.driver.find_element_by_css_selector("a.next")
        
        # if the button exist, click on it
        if (next_button is not None):
            next_button.click()
            time.sleep(10)
            
            # sometimes click does not work
            while (self.current_url == self.driver.current_url):
                next_button.click()
                time.sleep(10)
                        
            # continue parsing
            self.current_url = self.driver.current_url
            self.parse()
        
        
        
            
        

In [None]:
my_scraper = RentsScraper()
my_scraper.crawl()