# Web Scraping With Beautiful Soup 
In this project, I'm going to collect data from the realtor website using Beautifulsoup library in Python. Create Data Frame and save as csv file/

In [1]:
#importing needed libraries
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

## Collecting Data
Here I'm going to use beautiful soup to collect data. After taking a closer look into HTML, I will be extracting following information: Address of the property,Price, Number of beds and baths, area of property, lot size and link to the property. 


In [2]:

header = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36",
    "Accept-Language": "en-GB,en-US;q=0.9,en;q=0.8"}  
list_of_properties=[]
#creating loop to extract multiple pages of data 
for page in range(1,6):
    url="https://www.realtor.com/realestateandhomes-search/West-Palm-Beach_FL/beds-3/type-single-family-home/price-na-1000000/pg-"+ str(page)
    response = requests.get(url,headers=header)
    data = response.text
    soup = BeautifulSoup(data, "html.parser")
    properties=soup.find_all("li",{"class":"component_property-card"})
#collecting data
    for i in properties:
            address_elements = i.find_all("div", {"data-label":"pc-address"})
            addresses = [address.get_text(separator=" ").strip('Email agent').split(" | ")[0] for address in address_elements] # adding separator=" " in get_text to separate address from city 
            price_elements=i.find_all("span", {"data-label":"pc-price"})
            prices = [price.get_text(separator=" ").strip('From').split("|")[0] for price in price_elements if "$" in price.text]
            bedroom_elements = i.find_all("li", {"data-label": "pc-meta-beds"})
            beds = [bed.get_text(separator=" ").strip("bed").split("|")[0] for bed in bedroom_elements]
            baths_elements= i.find_all("li", {"data-label": "pc-meta-baths"})
            baths = [bath.get_text(separator=" ").strip('bath').split("|")[0] for bath in baths_elements]
            size_elements = i.find_all("li", {"data-label": "pc-meta-sqft"})
            sizes = [size.get_text(separator=" ").split("|")[0] for size in size_elements]
            area_lot_elements=i.find_all("li", {"data-label": "pc-meta-sqftlot"})
            area_lots = [area.get_text(separator=" ").strip('lot').split("|")[0] for area in area_lot_elements]
            url_elements=i.find_all("a", {"rel": "noopener"})
            urls=[x.get('href')  if 'http' in x.get('href') else 'https://www.realtor.com' + x.get('href') for x in url_elements]

            list_of_properties.append([addresses,prices,beds,baths,sizes,area_lots,urls])

## Creating DataFrame

In [3]:
df = pd.DataFrame(list_of_properties, columns = ['Address', 'Price($)', 'Bed','Bath','Area(sqft)','Lot size','Link'])
#df=df[df['Address'].map(lambda x: len(x)) > 0]
df

Unnamed: 0,Address,Price($),Bed,Bath,Area(sqft),Lot size,Link
0,"[7507 Pine Tree Ln , Lake Clarke Shores , FL ...","[$995,000]",[3 ],[2 ],"[2,365 sqft]",[0.33 acre ],[https://www.realtor.com/realestateandhomes-de...
1,"[7442 Venetian Way , West Palm Beach , FL 3...","[$835,000]",[3 ],[2 ],"[2,046 sqft]",[0.52 acre ],[https://www.realtor.com/realestateandhomes-de...
2,"[2620 Sandy Cay , West Palm Beach , FL 33411]","[$674,900]",[4 ],[3 ],"[3,402 sqft]","[8,511 sqft ]",[https://www.realtor.com/realestateandhomes-de...
3,[],[],[],[],[],[],[]
4,"[2120 Chagall Cir , West Palm Beach , FL 33...","[$699,000]",[4 ],[2.5 ],"[2,762 sqft]","[5,698 sqft ]",[https://www.realtor.com/realestateandhomes-de...
...,...,...,...,...,...,...,...
250,"[721 Maddock St , West Palm Beach , FL 33405]","[$379,900]",[3 ],[1 ],"[1,036 sqft]","[5,446 sqft ]",[https://www.realtor.com/realestateandhomes-de...
251,"[5905 Snowdrop Way , West Palm Beach , FL 3...","[$449,500]",[4 ],[2.5 ],"[2,089 sqft]","[4,621 sqft ]",[https://www.realtor.com/realestateandhomes-de...
252,"[212 S Robbins Dr , West Palm Beach , FL 33...","[$380,000]",[3 ],[2 ],"[1,275 sqft]","[7,200 sqft ]",[https://www.realtor.com/realestateandhomes-de...
253,"[13706 52nd Ct N , The Acreage , FL 33411]","[$529,900]",[3 ],[2 ],"[1,500 sqft]",[1.29 acre ],[https://www.realtor.com/realestateandhomes-de...


## Cleaning DataFrame

In [4]:
df=df.astype(str).replace({"\[":"", "\]":" ","\,":""}, regex=True)
new_df = df.apply(lambda x : x.str.strip("'$ "))
new_df=new_df.replace({'':np.nan})
new_df['Price($)'] = new_df['Price($)'].astype(float) 
new_df

Unnamed: 0,Address,Price($),Bed,Bath,Area(sqft),Lot size,Link
0,7507 Pine Tree Ln Lake Clarke Shores FL 3...,995000.0,3,2,2365 sqft,0.33 acre,https://www.realtor.com/realestateandhomes-det...
1,7442 Venetian Way West Palm Beach FL 33406,835000.0,3,2,2046 sqft,0.52 acre,https://www.realtor.com/realestateandhomes-det...
2,2620 Sandy Cay West Palm Beach FL 33411,674900.0,4,3,3402 sqft,8511 sqft,https://www.realtor.com/realestateandhomes-det...
3,,,,,,,
4,2120 Chagall Cir West Palm Beach FL 33409,699000.0,4,2.5,2762 sqft,5698 sqft,https://www.realtor.com/realestateandhomes-det...
...,...,...,...,...,...,...,...
250,721 Maddock St West Palm Beach FL 33405,379900.0,3,1,1036 sqft,5446 sqft,https://www.realtor.com/realestateandhomes-det...
251,5905 Snowdrop Way West Palm Beach FL 33415,449500.0,4,2.5,2089 sqft,4621 sqft,https://www.realtor.com/realestateandhomes-det...
252,212 S Robbins Dr West Palm Beach FL 33409,380000.0,3,2,1275 sqft,7200 sqft,https://www.realtor.com/realestateandhomes-det...
253,13706 52nd Ct N The Acreage FL 33411,529900.0,3,2,1500 sqft,1.29 acre,https://www.realtor.com/realestateandhomes-det...


In [5]:
#cheking if there are missing values
new_df.isnull().sum()

Address       49
Price($)      45
Bed           45
Bath          45
Area(sqft)    46
Lot size      83
Link          45
dtype: int64

In [6]:
#removing rows with missing values in adresses column 
new_df = new_df[~new_df.Address.isnull()]
# sorting data by Price 
new_df=new_df.sort_values(['Price($)'], ascending = [False]) 
new_df.reset_index(drop=True, inplace=True)
new_df

Unnamed: 0,Address,Price($),Bed,Bath,Area(sqft),Lot size,Link
0,7507 Pine Tree Ln Lake Clarke Shores FL 3...,995000.0,3,2,2365 sqft,0.33 acre,https://www.realtor.com/realestateandhomes-det...
1,7946 Via Villagio West Palm Beach FL 33412,995000.0,3,3,2506 sqft,7000 sqft,https://www.realtor.com/realestateandhomes-det...
2,361 Colonial Rd West Palm Beach FL 33405,995000.0,3,2,1456 sqft,,https://www.realtor.com/realestateandhomes-det...
3,11066 46th Pl N The Acreage FL 33411,975750.0,6,5,4123 sqft,1.25 acre,https://www.realtor.com/realestateandhomes-det...
4,12837 Calais Cir Palm Beach Gardens FL 33410,969900.0,4,3,2732 sqft,9234 sqft,https://www.realtor.com/realestateandhomes-det...
...,...,...,...,...,...,...,...
201,1354 11th St West Palm Beach FL 33401,288900.0,3,1,792 sqft,6600 sqft,https://www.realtor.com/realestateandhomes-det...
202,128 Ethelyn Dr West Palm Beach FL 33415,280000.0,3,1,1072 sqft,4750 sqft,https://www.realtor.com/realestateandhomes-det...
203,720 Division Ave West Palm Beach FL 33401,269999.0,3,2,1110 sqft,3920 sqft,https://www.realtor.com/realestateandhomes-det...
204,5410 Helene Pl West Palm Beach FL 33407,265000.0,3,1,914 sqft,6005 sqft,https://www.realtor.com/realestateandhomes-det...


In [7]:
#cheking if we have duplicate data
print("There is {} duplicated values in data frame".format(new_df.duplicated().sum()))

There is 0 duplicated values in data frame


In [8]:
#saving to csv file
new_df.to_csv('RealEstate.csv', index=False)

In [9]:
# creating clikable links 
from IPython.display import HTML
df2=HTML(new_df.to_html(render_links=True, escape=False))
df2

Unnamed: 0,Address,Price($),Bed,Bath,Area(sqft),Lot size,Link
0,7507 Pine Tree Ln Lake Clarke Shores FL 33406,995000.0,3,2,2365 sqft,0.33 acre,https://www.realtor.com/realestateandhomes-detail/7507-Pine-Tree-Ln_Lake-Clarke-Shores_FL_33406_M59012-07521
1,7946 Via Villagio West Palm Beach FL 33412,995000.0,3,3,2506 sqft,7000 sqft,https://www.realtor.com/realestateandhomes-detail/7946-Via-Villagio_West-Palm-Beach_FL_33412_M52604-92735
2,361 Colonial Rd West Palm Beach FL 33405,995000.0,3,2,1456 sqft,,https://www.realtor.com/realestateandhomes-detail/361-Colonial-Rd_West-Palm-Beach_FL_33405_M67732-45097
3,11066 46th Pl N The Acreage FL 33411,975750.0,6,5,4123 sqft,1.25 acre,https://www.realtor.com/realestateandhomes-detail/11066-46th-Pl-N_West-Palm-Beach_FL_33411_M59840-54786
4,12837 Calais Cir Palm Beach Gardens FL 33410,969900.0,4,3,2732 sqft,9234 sqft,https://www.realtor.com/realestateandhomes-detail/12837-Calais-Cir_West-Palm-Beach_FL_33410_M57349-98269
5,2518 Monaco Ter West Palm Beach FL 33410,950000.0,3,2,2132 sqft,8712 sqft,https://www.realtor.com/realestateandhomes-detail/2518-Monaco-Ter_West-Palm-Beach_FL_33410_M57471-09700
6,11817 46th Pl N Royal Palm Beach FL 33411,950000.0,4,2.5,2741 sqft,1.25 acre,https://www.realtor.com/realestateandhomes-detail/11817-46th-Pl-N_West-Palm-Beach_FL_33411_M59780-92236
7,3017 Santa Margarita Rd West Palm Beach FL 33411,949000.0,4,3.5,3697 sqft,6380 sqft,https://www.realtor.com/realestateandhomes-detail/3017-Santa-Margarita-Rd_West-Palm-Beach_FL_33411_M68619-35900
8,1530 Wilderness Rd West Palm Beach FL 33409,945000.0,3,2,,6095 sqft,https://www.realtor.com/realestateandhomes-detail/1530-Wilderness-Rd_West-Palm-Beach_FL_33409_M63646-61304
9,520 51st St West Palm Beach FL 33407,925000.0,3,3,1777 sqft,,https://www.realtor.com/realestateandhomes-detail/520-51st-St_West-Palm-Beach_FL_33407_M59258-17565
