## Clean housing data
<p>In this notebook, we download housing data from a popular MLS website, merge, clean,
filter and prepare it for subsequent spatial and statistical analysis.</p>


In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from pyzillow.pyzillow import ZillowWrapper

%matplotlib inline

## Use pyzillow API to pull data

In [None]:
zillow_data = ZillowWrapper()

## Read data from all CSV files

In [2]:
csv1_path = '../data/Plymouth County, MA 2 Bath Homes For Sale _Trulia.csv'
csv2_path = '../data/Weymouth, MA 2 Bath Homes For Sale _ Trulia.csv'

prop_df1 = pd.read_csv(csv1_path)
prop_df2 = pd.read_csv(csv2_path)
prop_df1.head(10)

Unnamed: 0,Keywords,Dislike,Like,Like1,Content,propertycard__styledlinkm1ur0x3_URL,propertycard__styledlinkm1ur0x3,propertycard__styledlinkm1ur0x32,Dislike3
0,NEW,"$375,000",4bd,2ba,"1,544 sqft",https://www.trulia.com/p/ma/plymouth/26-willow...,26 Willow St,"Buttermilk Bay, Plymouth, MA","JAMES F. SAVAGE, BELSITO & ASSOCIATES INC."
1,NEW,"$450,000",3bd,2ba,"1,812 sqft",https://www.trulia.com/p/ma/plymouth/119-alewi...,119 Alewife Rd,"Cedarville, Plymouth, MA","ELIZABETH ISENOR, RE/MAX SPECTRUM"
2,NEW,"$449,900",4bd,2ba,"1,725 sqft",https://www.trulia.com/p/ma/bridgewater/351-so...,351 South St,"Bridgewater, MA","TEAM PACE, KELLER WILLIAMS REALTY"
3,NEW,"$379,000",5bd,2ba,"1,778 sqft",https://www.trulia.com/p/ma/west-wareham/8-wea...,8 Weaver St,"West Wareham, West Wareham, MA","PHIL AND DEBBIE ROSE, LAER REALTY PARTNERS / R..."
4,BANK OWNED,"$525,400",4bd,3ba,"2,964 sqft",https://www.trulia.com/p/ma/marshfield/364-fur...,364 Furnace St,"Marshfield, MA","JASON SMITH, JDS REALTY GROUP"
5,NEW,"$459,900",3bd,2ba,"1,545 sqft",https://www.trulia.com/p/ma/east-bridgewater/5...,552 Bridge St,"East Bridgewater, MA","SUSAN SOUZA, CONWAY - MATTAPOISETT"
6,,"$499,900",3bd,2ba,"1,528 sqft",https://www.trulia.com/p/ma/hanover/36-beechtr...,36 Beechtree Rd,"Hanover, MA","DONALD HARDY, KELLER WILLIAMS REALTY"
7,NEW,"$524,000",3bd,2ba,"2,015 sqft",https://www.trulia.com/p/ma/pembroke/69-clarem...,69 Claremont Rd,"Pembroke, MA","KIMBERLEY TUFTS, JILL & CO. REALTY GROUP"
8,NEW,"$375,000",3bd,2ba,"1,336 sqft",https://www.trulia.com/p/ma/bridgewater/475-oa...,475 Oak St,"Bridgewater, MA","KAREN MURRAY, REIS REAL ESTATE & COMPANY INC."
9,NEW,"$550,000",2bd,4ba,"2,224 sqft",https://www.trulia.com/p/ma/pembroke/672-cente...,672 Center St,"Pembroke, MA","CINDI FORD, RE/MAX PLATINUM"


In [3]:
prop_df2.head(10)


Unnamed: 0,Keywords,Dislike,Like,Like1,Content,propertycard__styledlinkm1ur0x3_URL,propertycard__styledlinkm1ur0x3,propertycard__styledlinkm1ur0x32,Dislike3
0,NEW,"$399,000",3bd,2ba,"1,633 sqft",https://www.trulia.com/p/ma/stoughton/36-talbo...,36 Talbot St,"Stoughton, MA","SHAUNA FANNING, LAMACCHIA REALTY, INC."
1,"NEW\nOPEN SUN, 9-10AM","$529,900",4bd,3ba,"1,682 sqft",https://www.trulia.com/p/ma/weymouth/18-weybos...,18 Weybosset St,"North Weymouth, Weymouth, MA","ANGELA BERGIN, WILLIAM RAVEIS R.E. & HOME SERV..."
2,NEW,"$324,900",2bd,2ba,"1,020 sqft",https://www.trulia.com/p/ma/rockland/91-boxber...,91 Boxberry Ln #91,"Rockland, MA","ALYSA O'HARA, BRADFORD REALTY"
3,,"$499,900",3bd,2ba,"1,528 sqft",https://www.trulia.com/p/ma/hanover/36-beechtr...,36 Beechtree Rd,"Hanover, MA","DONALD HARDY, KELLER WILLIAMS REALTY"
4,NEW,"$475,000",4bd,2ba,"1,856 sqft",https://www.trulia.com/p/ma/randolph/114-north...,114 North St,"Randolph, MA","LISA JEDREY, KINLIN GROVER DARTMOUTH AT PADANA..."
5,NEW,"$399,995",3bd,3ba,"1,576 sqft",https://www.trulia.com/p/ma/weymouth/61-broad-...,61 Broad Reach #M11B,"North Weymouth, Weymouth, MA","DEPEND ON DAKOTA TEAM, KELLER WILLIAMS REALTY"
6,,"$549,000",4bd,2ba,"1,550 sqft",https://www.trulia.com/p/ma/braintree/4-lake-s...,4 Lake St,"Braintree, MA","BUY BOSTON TEAM, COLDWELL BANKER REALTY - BROO..."
7,"NEW\nOPEN WED, 12-3PM","$525,000",3bd,2ba,"1,600 sqft",https://www.trulia.com/p/ma/hanover/23-karen-r...,23 Karen Rd,"Hanover, MA","DAVID DELANEY, DAVID R. DELANEY, ESQ."
8,NEW,"$549,000",2bd,3ba,"1,102 sqft",https://www.trulia.com/p/ma/dorchester-center/...,29 Saint Gregory St #29,"South Dorchester, Dorchester Center, MA","NICHOLS REALTY TEAM, BOSTON TRUST REALTY GROUP"
9,NEW,"$425,000",3bd,3ba,"1,892 sqft",https://www.trulia.com/p/ma/brockton/38-hillcr...,38 Hillcrest Ave,"Brockton, MA","VALERIE LYONS, RE/MAX PLATINUM"


## Merge all CSV into a single sheet

In [4]:
prop_df = prop_df1.append(prop_df2)
prop_df.head(10)

Unnamed: 0,Keywords,Dislike,Like,Like1,Content,propertycard__styledlinkm1ur0x3_URL,propertycard__styledlinkm1ur0x3,propertycard__styledlinkm1ur0x32,Dislike3
0,NEW,"$375,000",4bd,2ba,"1,544 sqft",https://www.trulia.com/p/ma/plymouth/26-willow...,26 Willow St,"Buttermilk Bay, Plymouth, MA","JAMES F. SAVAGE, BELSITO & ASSOCIATES INC."
1,NEW,"$450,000",3bd,2ba,"1,812 sqft",https://www.trulia.com/p/ma/plymouth/119-alewi...,119 Alewife Rd,"Cedarville, Plymouth, MA","ELIZABETH ISENOR, RE/MAX SPECTRUM"
2,NEW,"$449,900",4bd,2ba,"1,725 sqft",https://www.trulia.com/p/ma/bridgewater/351-so...,351 South St,"Bridgewater, MA","TEAM PACE, KELLER WILLIAMS REALTY"
3,NEW,"$379,000",5bd,2ba,"1,778 sqft",https://www.trulia.com/p/ma/west-wareham/8-wea...,8 Weaver St,"West Wareham, West Wareham, MA","PHIL AND DEBBIE ROSE, LAER REALTY PARTNERS / R..."
4,BANK OWNED,"$525,400",4bd,3ba,"2,964 sqft",https://www.trulia.com/p/ma/marshfield/364-fur...,364 Furnace St,"Marshfield, MA","JASON SMITH, JDS REALTY GROUP"
5,NEW,"$459,900",3bd,2ba,"1,545 sqft",https://www.trulia.com/p/ma/east-bridgewater/5...,552 Bridge St,"East Bridgewater, MA","SUSAN SOUZA, CONWAY - MATTAPOISETT"
6,,"$499,900",3bd,2ba,"1,528 sqft",https://www.trulia.com/p/ma/hanover/36-beechtr...,36 Beechtree Rd,"Hanover, MA","DONALD HARDY, KELLER WILLIAMS REALTY"
7,NEW,"$524,000",3bd,2ba,"2,015 sqft",https://www.trulia.com/p/ma/pembroke/69-clarem...,69 Claremont Rd,"Pembroke, MA","KIMBERLEY TUFTS, JILL & CO. REALTY GROUP"
8,NEW,"$375,000",3bd,2ba,"1,336 sqft",https://www.trulia.com/p/ma/bridgewater/475-oa...,475 Oak St,"Bridgewater, MA","KAREN MURRAY, REIS REAL ESTATE & COMPANY INC."
9,NEW,"$550,000",2bd,4ba,"2,224 sqft",https://www.trulia.com/p/ma/pembroke/672-cente...,672 Center St,"Pembroke, MA","CINDI FORD, RE/MAX PLATINUM"


### Clean column names
Column names contain illegal characters. Let us rename them

In [5]:
prop_df.columns

Index(['Keywords', 'Dislike', 'Like', 'Like1', 'Content',
       'propertycard__styledlinkm1ur0x3_URL',
       'propertycard__styledlinkm1ur0x3', 'propertycard__styledlinkm1ur0x32',
       'Dislike3'],
      dtype='object')

In [None]:
prop_df.rename(index=str, columns= {
    'Keywords': 'Listing_type',
    'Dislike': 'Price',
    'Like': 'Bedrooms',
    'Like1': 'Bathrooms',
    'Content': 'Sq_Ft',
    'propertycard__styledlinkm1ur0x3_URL': 'URL',
    'propertycard__styledlinkm1ur0x3': 'Address',
    'propertycard__styledlinkm1ur0x32': 'Town, St',
    'Dislike3': 'Listing_Agent'
})
