In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

In [2]:
url = "https://pythonizing.github.io/data/real-estate/rock-springs-wy/LCWYROCKSPRINGS/t=0&s=0.html"

r = requests.get(url)
c = r.content

soup = BeautifulSoup(c, 'html.parser')

all = soup.find_all('div', {'class': 'propertyRow'})

# COLLECT THE NUMBER OF PAGES

In [3]:
pages = soup.find_all('a', {'class': 'Page'})
last_page = int(pages[-1].text)
last_page

3

# EXTRACT FROM FIRST PAGE ONLY

In [4]:
all[0]

<div class="propertyRow" id="propertyRowREN021201395" onclick="Track.doEvent('Hybrid Mapping', 'Property Center Lane', 'Select a property with brand REN to view details'); document.location.href='/property/0-gateway-rock-springs-wy-82901-REN021201395';">
<div class="CenterLaneCardBg CardWrapper propertyCard" id="propertyREN021201395">
<div class="CenterLaneCard propertyCard">
<div class="CardThumb">
<div class="landscapeThumbContainer">
<a href="http://web.archive.org/web/20160127020422/http://www.century21.com/property/0-gateway-rock-springs-wy-82901-REN021201395"><img class="lazyLoad" rel="http://www.century21.com/listhub/thumb/130x90/photos.listhub.net/FCBRWY/20156419/1?lm=20151103T230938" src="./LCWYROCKSPRINGS1_files/1"/></a>
</div>
</div>
<h4 class="propPrice">


            $725,000



      <span class="IconPropertyFavorite16"></span>
</h4>
<div class="CardDetails">
<div class="primaryDetails">
<span class="propAddressCollapse" title="0 Gateway">0 Gateway</span>
<span class="pr

In [5]:
# extract property price on first property listed
price = all[0].find('h4', {'class': 'propPrice'}).text.replace('\n', '').replace(' ', '')
price

'$725,000'

In [6]:
properties = []
for property in all:
    propDict = {}
    propDict['price'] = property.find('h4', {'class': 'propPrice'}).text.replace('\n', '').replace(' ', '')
    propDict['address_street'] = property.find_all('span', {'class': 'propAddressCollapse'})[0].text
    propDict['address_city_state_zip'] = property.find_all('span', {'class': 'propAddressCollapse'})[1].text
    # do a try/except because some values are missing
    try:
        propDict['nb_bedrooms'] = property.find('span', {'class': 'infoBed'}).find('b').text
    except:
        propDict['nb_bedrooms'] = None
    try:
        propDict['sqft'] = property.find('span', {'class': 'infoSqFt'}).find('b').text
    except:
        propDict['sqft'] = None
    try:
        propDict['nb_full_baths'] = property.find('span', {'class': 'infoValueFullBath'}).find('b').text
    except:
        propDict['nb_full_baths'] = None
    try:
        propDict['nb_half_baths'] = property.find('span', {'class': 'infoValueHalfBath'}).find('b').text
    except:
        propDict['nb_half_baths'] = None
        
    # get the lot size
    for column_group in property.find_all('div', {'class': 'columnGroup'}):
        # zip iterates through two lists at the same time
        for featureGroup, featureName in zip(
            column_group.find_all('span', {'class': 'featureGroup'}),
            column_group.find_all('span', {'class': 'featureName'})
            ):
            if 'Lot Size' in featureGroup.text:
                propDict['lot_size'] = featureName.text
    properties.append(propDict)

In [7]:
properties

[{'price': '$725,000',
  'address_street': '0 Gateway',
  'address_city_state_zip': 'Rock Springs, WY 82901',
  'nb_bedrooms': None,
  'sqft': None,
  'nb_full_baths': None,
  'nb_half_baths': None},
 {'price': '$452,900',
  'address_street': '1003 Winchester Blvd.',
  'address_city_state_zip': 'Rock Springs, WY 82901',
  'nb_bedrooms': '4',
  'sqft': None,
  'nb_full_baths': '4',
  'nb_half_baths': None,
  'lot_size': '0.21 Acres'},
 {'price': '$396,900',
  'address_street': '600 Talladega',
  'address_city_state_zip': 'Rock Springs, WY 82901',
  'nb_bedrooms': '5',
  'sqft': '3,154',
  'nb_full_baths': '3',
  'nb_half_baths': None},
 {'price': '$389,900',
  'address_street': '3239 Spearhead Way',
  'address_city_state_zip': 'Rock Springs, WY 82901',
  'nb_bedrooms': '4',
  'sqft': '3,076',
  'nb_full_baths': '3',
  'nb_half_baths': '1',
  'lot_size': 'Under 1/2 Acre, '},
 {'price': '$254,000',
  'address_street': '522 Emerald Street',
  'address_city_state_zip': 'Rock Springs, WY 829

In [8]:
df = pd.DataFrame(properties)

In [9]:
df

Unnamed: 0,price,address_street,address_city_state_zip,nb_bedrooms,sqft,nb_full_baths,nb_half_baths,lot_size
0,"$725,000",0 Gateway,"Rock Springs, WY 82901",,,,,
1,"$452,900",1003 Winchester Blvd.,"Rock Springs, WY 82901",4.0,,4.0,,0.21 Acres
2,"$396,900",600 Talladega,"Rock Springs, WY 82901",5.0,3154.0,3.0,,
3,"$389,900",3239 Spearhead Way,"Rock Springs, WY 82901",4.0,3076.0,3.0,1.0,"Under 1/2 Acre,"
4,"$254,000",522 Emerald Street,"Rock Springs, WY 82901",3.0,1172.0,3.0,,"Under 1/2 Acre,"
5,"$252,900",1302 Veteran's Drive,"Rock Springs, WY 82901",4.0,1932.0,2.0,,0.27 Acres
6,"$210,000",1021 Cypress Cir,"Rock Springs, WY 82901",4.0,1676.0,3.0,,"Under 1/2 Acre,"
7,"$209,000",913 Madison Dr,"Rock Springs, WY 82901",3.0,1344.0,2.0,,"Under 1/2 Acre,"
8,"$199,900",1344 Teton Street,"Rock Springs, WY 82901",3.0,1920.0,2.0,,"Under 1/2 Acre,"
9,"$196,900",4 Minnies Lane,"Rock Springs, WY 82901",3.0,1664.0,2.0,,2.02 Acres


In [10]:
df.to_csv('output.csv')

# LOOP THROUGH PAGES

In [11]:
base_url = 'https://pythonizing.github.io/data/real-estate/rock-springs-wy/LCWYROCKSPRINGS/t=0&s='

properties_all = []

for page in range(0, last_page * 10, 10):
    print(base_url+str(page)+'.html')
    r = requests.get(base_url+str(page)+'.html')
    c = r.content
    soup = BeautifulSoup(c, 'html.parser')
    all = soup.find_all('div', {'class': 'propertyRow'})
    print(len(all))
    for property in all:
        propDict = {}
        try:
            propDict['price'] = property.find('h4', {'class': 'propPrice'}).text.replace('\n', '').replace(' ', '')
        except:
            propDict['price'] = None
        try:
            propDict['address_street'] = property.find_all('span', {'class': 'propAddressCollapse'})[0].text
        except:
            propDict['address_street'] = None
        try:
            propDict['address_city_state_zip'] = property.find_all('span', {'class': 'propAddressCollapse'})[1].text
        except:
            propDict['address_city_state_zip'] = None
        try:
            propDict['nb_bedrooms'] = property.find('span', {'class': 'infoBed'}).find('b').text
        except:
            propDict['nb_bedrooms'] = None
        try:
            propDict['sqft'] = property.find('span', {'class': 'infoSqFt'}).find('b').text
        except:
            propDict['sqft'] = None
        try:
            propDict['nb_full_baths'] = property.find('span', {'class': 'infoValueFullBath'}).find('b').text
        except:
            propDict['nb_full_baths'] = None
        try:
            propDict['nb_half_baths'] = property.find('span', {'class': 'infoValueHalfBath'}).find('b').text
        except:
            propDict['nb_half_baths'] = None
        
        # get the lot size
        for column_group in property.find_all('div', {'class': 'columnGroup'}):
            # zip iterates through two lists at the same time
            for featureGroup, featureName in zip(
                column_group.find_all('span', {'class': 'featureGroup'}),
                column_group.find_all('span', {'class': 'featureName'})
                ):
                if 'Lot Size' in featureGroup.text:
                    propDict['lot_size'] = featureName.text
        properties_all.append(propDict)

https://pythonizing.github.io/data/real-estate/rock-springs-wy/LCWYROCKSPRINGS/t=0&s=0.html
10
https://pythonizing.github.io/data/real-estate/rock-springs-wy/LCWYROCKSPRINGS/t=0&s=10.html
10
https://pythonizing.github.io/data/real-estate/rock-springs-wy/LCWYROCKSPRINGS/t=0&s=20.html
17


In [12]:
len(properties_all)

37

In [13]:
df = pd.DataFrame(properties_all)
df

Unnamed: 0,price,address_street,address_city_state_zip,nb_bedrooms,sqft,nb_full_baths,nb_half_baths,lot_size
0,"$725,000",0 Gateway,"Rock Springs, WY 82901",,,,,
1,"$452,900",1003 Winchester Blvd.,"Rock Springs, WY 82901",4.0,,4.0,,0.21 Acres
2,"$396,900",600 Talladega,"Rock Springs, WY 82901",5.0,3154.0,3.0,,
3,"$389,900",3239 Spearhead Way,"Rock Springs, WY 82901",4.0,3076.0,3.0,1.0,"Under 1/2 Acre,"
4,"$254,000",522 Emerald Street,"Rock Springs, WY 82901",3.0,1172.0,3.0,,"Under 1/2 Acre,"
5,"$252,900",1302 Veteran's Drive,"Rock Springs, WY 82901",4.0,1932.0,2.0,,0.27 Acres
6,"$210,000",1021 Cypress Cir,"Rock Springs, WY 82901",4.0,1676.0,3.0,,"Under 1/2 Acre,"
7,"$209,000",913 Madison Dr,"Rock Springs, WY 82901",3.0,1344.0,2.0,,"Under 1/2 Acre,"
8,"$199,900",1344 Teton Street,"Rock Springs, WY 82901",3.0,1920.0,2.0,,"Under 1/2 Acre,"
9,"$196,900",4 Minnies Lane,"Rock Springs, WY 82901",3.0,1664.0,2.0,,2.02 Acres


In [14]:
df.to_csv('output_all.csv')