In [12]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pandas as pd

In [13]:
#Testing
linked = 'https://www.century21.com/real-estate/california/LSCA/'

response = requests.get(linked)
soup = BeautifulSoup(response.text, 'html.parser')
result = soup.find_all('div', class_='infinite-item')
result

[<div class="infinite-item property-card clearfix property-card-C2180910323" data-brand-cd="C21" data-id="C2180910323" data-latitude="38.02535" data-link="/property/22216-parrotts-ferry-13-sonora-ca-95370-C2180910323" data-listing-id="" data-longitude="-120.40381" data-mls="20181467" data-source-id="56dafcea-766d-461e-97bc-6ee9e5eb9983" data-zip="95370">
 <div class="property-card-clip"> <div class="property-card-image" style="background-image: url(https://www2.century21.com/c21/photo/maxxmax/c21.azureedge.net/308i0/0r6e05264hncmdte4j3a11qnf4i);">
 <div class="property-image-flag newly-listed">Newly Listed</div>
 <div class="property-image-count">
 <div class="image-count-left">&lt;</div>
 <div class="image-count-current">1</div>
 <div class="image-count-total">12</div>
 <div class="image-count-right">&gt;</div>
 </div>
 </div>
 </div>
 <div class="property-card-primary-info">
 <div class="pdp-listing-type sale">FOR SALE</div>
 <a class="listing-price" href="/property/22216-parrotts-fe

In [14]:
#Set up list to hold response info
house_dict = []

In [15]:
# Loop through x pages of the website with filter of Single House
page_link = 'https://www.century21.com/real-estate/california/LSCA/?sn=5&sk=Y&pt=2&p={}'
for link in [page_link.format(page) for page in range(1,5000)]:
    res = requests.get(link)
    new_soup = BeautifulSoup(res.text, 'html.parser')
    new_results = new_soup.find_all('div', class_='infinite-item')

    # Loop through returned results
    for result in new_results:
        # Error handling
        try:
            # Identify and return listing type
            listing_type = result.find('div', class_="pdp-listing-type").text
            # Identify and return price of House
            price = result.find('a', class_="listing-price").text
            # Identify and return number and street address of House
            street = result.find('div', class_="property-address").text
            # Identify and return city, state, and zip code of House
            city = result.find('div', class_="property-city").text
            zip_code = city.strip().replace("\n", "")
            City_state = zip_code[:-6]
            # Identify and return room number of House
            bed = result.find('div', class_="property-beds").find('strong').text
            # Identify and return bath number of House
            bath = result.find('div', class_="property-baths").find('strong').text
            # Identify and return half-bath number of House. If half-bath does not exist, it will return 0
            try:    
                half_bath = result.find('div', class_="property-half-baths").find('strong').text
            except:
                half_bath = "0"
                pass
            # Identify and return sqft of House
            sqft = result.find('div', class_="property-sqft").find('strong').text
            # Identify coordinate

            latitude = result.get("data-latitude")
            longitude = result.get("data-longitude")

            # Print results only if title, price, and link are available
            if (listing_type and price and street and city and bed and bath and sqft and latitude and longitude):
                house_obj = {
                "Listing_type": listing_type,
                "Price": price.strip().replace("\n", ""),
                "Street": street.strip().replace("\n", ""),
                "City": City_state[:-3],
                "State": City_state[-2:],
                "Zip_code": zip_code[-5:],
                "Bed": bed.strip().replace("\n", ""),
                "Half-bath": half_bath.strip().replace("\n", ""),
                "Bath": bath.strip().replace("\n", ""),
                "Square_Feet": sqft.strip().replace("\n", ""),
                "House_type": "Condo/Townhome",
                "Latitude": latitude,
                "Longitude": longitude
                }

                #Continue to add data into collection house_dict
                house_dict.append(house_obj)

        except AttributeError as e:
            continue
            # print(e)

print(house_dict)

[{'Listing_type': 'FOR SALE', 'Price': '$1,949,000', 'Street': '2222 Avenue of the Stars 1201E', 'City': 'Los Angeles', 'State': 'CA', 'Zip_code': '90067', 'Bed': '2', 'Half-bath': '0', 'Bath': '2', 'Square_Feet': '1,639', 'House_type': 'Condo/Townhome', 'Latitude': '34.05225', 'Longitude': '-118.40849'}, {'Listing_type': 'FOR SALE', 'Price': '$1,500,000', 'Street': '5167 Hall Rd', 'City': 'Santa Rosa', 'State': 'CA', 'Zip_code': '95401', 'Bed': '4', 'Half-bath': '0', 'Bath': '3', 'Square_Feet': '3,671', 'House_type': 'Condo/Townhome', 'Latitude': '38.44093', 'Longitude': '-122.80131'}, {'Listing_type': 'FOR SALE', 'Price': '$649,900', 'Street': '2625 E North Bear Creek Dr', 'City': 'Merced', 'State': 'CA', 'Zip_code': '95340', 'Bed': '4', 'Half-bath': '1', 'Bath': '3', 'Square_Feet': '3,385', 'House_type': 'Condo/Townhome', 'Latitude': '37.30981', 'Longitude': '-120.43886'}, {'Listing_type': 'FOR SALE', 'Price': '$559,950', 'Street': '820 Saint Andrews Circle', 'City': 'Paso Robles', 

In [16]:
house_data = pd.DataFrame(house_dict)
house_data

Unnamed: 0,Bath,Bed,City,Half-bath,House_type,Latitude,Listing_type,Longitude,Price,Square_Feet,State,Street,Zip_code
0,2,2,Los Angeles,0,Condo/Townhome,34.05225,FOR SALE,-118.40849,"$1,949,000",1639,CA,2222 Avenue of the Stars 1201E,90067
1,3,4,Santa Rosa,0,Condo/Townhome,38.44093,FOR SALE,-122.80131,"$1,500,000",3671,CA,5167 Hall Rd,95401
2,3,4,Merced,1,Condo/Townhome,37.30981,FOR SALE,-120.43886,"$649,900",3385,CA,2625 E North Bear Creek Dr,95340
3,2,3,Paso Robles,0,Condo/Townhome,35.60401,FOR SALE,-120.66757,"$559,950",1821,CA,820 Saint Andrews Circle,93446
4,3,4,Rocklin,0,Condo/Townhome,38.80384,FOR SALE,-121.26919,"$550,000",2650,CA,3017 Strand,95765
5,2,2,Petaluma,0,Condo/Townhome,38.243362,FOR SALE,-122.648918,"$520,000",1092,CA,7 Liberty Lane,94952
6,2,3,Oakdale,0,Condo/Townhome,37.77773,FOR SALE,-120.84571,"$375,000",1738,CA,831 River Bluff Ct,95361
7,2,4,Riverbank,1,Condo/Townhome,37.71547,FOR SALE,-120.95234,"$370,000",2052,CA,5323 St Elmo Ct,95367
8,1,2,Simi Valley,1,Condo/Townhome,34.265548,FOR SALE,-118.662878,"$369,777",1137,CA,6524 Stoney View Ln 7,93063
9,1,2,San Luis Obispo,1,Condo/Townhome,35.26114,FOR SALE,-120.70168,"$369,000",1001,CA,1750 Prefumo Canyon Rd. #55,93405


In [17]:
# Export file as a CSV, without the Pandas index, but with the header
house_data.to_csv("Output/Q2-2018_Condo_Townhome_Data.csv", index=False, header=True)