# Scrape DC Apartment Listings from ApartmentList.com

In [19]:
# Load packages 

import requests  
from bs4 import BeautifulSoup
import pandas as pd
import time

# # Many thanks to Ann Mohan Kunnath for the code to scrape Apartments.com
# (https://towardsdatascience.com/an-introduction-to-web-scraping-with-python-bc9563fe8860)
# It has been partially repurposed here for ApartmentList.com!

# Read in some initial variables that we'll need to capture the data
building_name = []
building_address = []
building_description = []
building_units_avail = []
apt_type = []
apt_rent = []
apt_footage = []
apts_list = []


# Base URL - apartments in Washington, DC
base_url = 'https://www.apartmentlist.com/dc/washington'
response = requests.get(base_url)
if response.status_code == 200:
    page = response.content
    sopa = BeautifulSoup(page, 'html.parser')
    
    # Dynamically grab the last pagination number
    paging = sopa.find('nav', {'class':'MuiPagination-root'}).find_all('li')
    start_page = 1
    last_page = str(paging[len(paging)-2].text)
    
    # Start working through each of the pages
    for page_number in range(int(start_page),int(last_page) + 1):
        url = '{}/page-{}'.format(base_url, str(page_number))
        response = requests.get(url)
        if response.status_code == 200:  
            page = response.content
            sopa = BeautifulSoup(page,"html.parser")    
            
            # Get building-level information
            for listing in sopa.find_all('div', {'class', 'ListingCard'}): 
                building_name = listing.find('div', {'class', 'css-d6gmau e1k7pw6k4'}).text
                building_address = listing.find('div', {'class', 'css-17xjl8p e1k7pw6k5'}).text
                building_description = listing.find('div', {'class', 'css-11wmgwu e1k9ondy1'}).text
                building_units_avail = listing.find('div', {'class', 'css-1qplh4f e1k7pw6k2'}).text

                # Get apartment-level information
                for apts in listing.find_all('div', {'class', 'css-1oxqqna e1i6tqc31'}):
                    apt_type = apts.find('div', {'class', 'css-xjvzth e1i6tqc32'}).text
                    apt_rent = apts.find('div', {'class', 'css-ajwnv4 e1i6tqc33'}).text
                    if apts.find('div', {'class', 'css-o1qo1i e1i6tqc34'}):
                        apt_footage = apts.find('div', {'class', 'css-o1qo1i e1i6tqc34'}).text
                    else:
                        apt_footage = 'NA'

                    # Add this to a list
                    apts_list.append([building_name,
                                 building_address,
                                 building_description,
                                 building_units_avail,
                                 apt_type,
                                 apt_rent,
                                 apt_footage])

                # Print building name when done
                print('Page {}: {}'.format(page_number, building_name))

                # Give the site a chance to recover
                time.sleep(2)

# Make this a dataframe
apts_df = pd.DataFrame(apts_list)
apts_df.columns = ['building_name','building_address','building_description', 'building_units_available',
                     'apt_type','apt_rent','apt_footage']

# Read it out
apts_df.to_csv("ApartmentList_Information.csv", index = False)

print('Scrape Complete! Look in your current folder for results of the search.')

Page 1: 1500 Mass
Page 1: Park Connecticut
Page 1: 3003 Van Ness
Page 1: 100K
Page 1: 455 Eye Street
Page 1: Connecticut Heights
Page 1: The Flats at Dupont Circle
Page 1: Alban Towers
Page 1: Calvert Woodley
Page 1: 2501 Porter
Page 1: Cleveland House
Page 1: Corcoran House at Dupont Circle
Page 1: 425 Mass
Page 1: 2400 M
Page 1: 1210 Mass
Page 1: RiverPoint
Page 1: The Kelvin
Page 1: Senate Square
Page 1: 1331
Page 1: 555
Page 1: Resa
Page 1: The Batley
Page 1: 14W Apartments
Page 1: The Channel
Page 2: Highbridge
Page 2: Valo
Page 2: Sonnet
Page 2: 7th Flats
Page 2: Anthology
Page 2: Parc Riverside
Page 2: Jefferson Marketplace
Page 2: Camden NoMa
Page 2: Twelve12
Page 2: Foundry Lofts
Page 2: The Shelburne
Page 2: The Sedgewick
Page 2: Camden Grand Parc
Page 2: Camden Roosevelt
Page 2: RiverPoint
Page 2: The Channel
Page 2: Novel South Capitol
Page 2: 1331
Page 2: Senate Square
Page 2: The Kelvin
Page 2: Flats 130 At Constitution Square
Page 2: Resa
Page 2: The Batley
Page 2: Cross

Page 15: Avalon First + M
Page 15: Avalon at Gallery Place
Page 15: AVA NoMa
Page 15: Avalon at Foxhall
Page 15: AVA Van Ness
Page 15: Ava H Street
Page 15: Empire Apartments
Page 15: Majestic
Page 15: Park Terrace
Page 15: Tivoli Apartments
Page 16: Yuma Gardens
Page 16: Miramar Apartments
Page 16: The Dahlia
Page 16: Carillon House
Page 16: The Lansburgh
Page 16: 1429 Parkwood Place NW
Page 16: 1446 Parkwood Place NW
Page 16: 1209 6th St NE
Page 16: 1248 Irving Street Northwest - 1
Page 16: 1604 19th Street Northwest, 2
Page 16: 1901 13th Street Northwest, 10
Page 16: 2136 O Street NW, 3
Page 16: 526 5th Street Southeast, 102
Page 16: 5430 North Capitol Street Northwest, Basement
Page 16: 1704 Q Street Northwest, B
Page 16: 1765 R Street Northwest, 202
Page 16: 503 Independence Avenue Southeast, 302
Page 16: 1800 19th Street Northwest, 4
Page 16: 712 G Street Southeast, 101
Page 16: 1105 E Street Southeast
Page 16: 114 F Street Southeast
Page 16: 2630 Monroe Street Northeast
Page 16:

Page 26: 1763 P STREET NW
Page 26: 414 SEWARD SQUARE SE
Page 26: 1245 13TH STREET NW
Page 26: 410 O STREET SW
Page 26: 1111 25TH STREET NW
Page 26: 2202 WYOMING AVENUE NW
Page 26: 3203 10TH STREET NE
Page 26: 2200 17TH STREET NW
Page 26: 1830 JEFFERSON PLACE NW
Page 26: 3701 CONNECTICUT AVENUE NW
Page 27: 24 RHODE ISLAND AVENUE NE
Page 27: 1252 E STREET NE
Page 27: 1327 EUCLID STREET NW
Page 27: 2424 17TH STREET NW
Page 27: 907 N STREET NW
Page 27: 424 Q STREET NW
Page 27: 2101 Connecticut Ave
Page 27: 2151 FLORIDA AVENUE NW
Page 27: 1221 NW 4TH STREET NW
Page 27: 3759 W STREET NW
Page 27: 3805 VAN NESS STREET NW
Page 27: 475 K STREET NW
Page 27: 1618 S STREET NW
Page 27: 612 ALABAMA AVENUE SE
Page 27: 541 FLORIDA AVENUE NW
Page 27: 111 11TH STREET SE
Page 27: 1439 SPRING ROAD NW
Page 27: 1101 FERN STREET NW
Page 27: 3620 PROSPECT STREET NW
Page 27: 1013 E STREET SE
Page 27: 2812 P STREET NW
Page 27: 2035 ROSEMONT AVENUE NW
Page 27: 3908 GEORGETOWN COURT NW
Page 27: 1111 23RD STREET NW

Page 37: 2824 12th St NE
Page 38: 4311 37th St NW
Page 38: 2824 12th St NE
Page 38: 4602 hillside rd se Washington DC 3
Page 38: 4612 hillside rd se Washington DC 2
Page 38: 1415 Oak St NW Unit T02
Page 38: 1416 Belmont St NW
Page 38: 1852 5th St NW
Page 38: 1457 Girard St NW PH7
Page 38: 1427 Chapin St NW Unit 301
Page 38: 2818 Connecticut Ave NW  Unit 503
Page 38: 1401 17th St NW Unit 904
Page 38: 1301 T St NW Unit 3
Page 38: 1821 16th Street, NW #107
Page 38: 510 N Street SW
Page 38: 4224 16th St NW
Page 38: 421 10th St NE Basement Unit
Page 38: 204 Channing St NE
Page 38: 4316 Gorman Ter SE
Page 38: 3301 E St SE BSMT
Page 38: 5130 Connecticut Ave. NW 207
Page 38: 632 Emerson St NE
Page 38: 615 Gallatin St. NE
Page 38: 1833 S St. NW, 31
Page 38: 308 E St NE unit 2
Page 39: 24 Girard St NE
Page 39: 1072 Thomas Jefferson St NW 101
Page 39: 1740 18th St NW
Page 39: 3404 13th St NW  Unit 102
Page 39: 4729 1st Street SW #102
Page 39: 3700 39th Street NW #F180
Page 39: 1416 5th St NW Unit