In [168]:
import numpy as np
import pandas as pd
import re
import json
import math

In [169]:
df = pd.read_json('../collection/raw_data/showmetherent.json')
df = df.drop(['updated_datetime', 'deposit', 'name'], axis=1)
col = ['address', 'price', 'bed', 'bath', 'area', 'company', 'neighborhood', 'laundry', 'pets', 'parking', 'utilities', 'property_type', 'year_built', 'description', 'images']
df.iloc[1]

address                       1904 Geddes Ave, Ann Arbor, MI 48104
company                             Investor's Property Management
property_type    Condo\r\n                                , \r\...
year_built                                                    2009
price                                                     [$4,050]
bed                                                   [4 bedrooms]
bath                                                 [3 bathrooms]
area                                               [1,552 sq. ft.]
description      AUGUST LEASE: Stunning new duplex-style home b...
images                                                        [[]]
Name: 1, dtype: object

In [170]:
final_rows = []
for row in df.itertuples():
    address = row[1]
    company = row[2]
    property_type = row[3].strip()
    year_built = row[4]
    if math.isnan(year_built):
        year_built = 0 
    description = row[9]
    
    for j, s in enumerate(row[5]):
        
        entry = {'address': address,
                 'company': company,
                 'property_type': property_type,
                 'year_built': year_built,
                 'price': row[5][j].replace('$', '').replace(',', ''),
                 'bed': row[6][j][:-8].replace('b', ''),
                 'bath': row[7][j][:-8].replace('b', ''),
                 'area': row[8][j],
                 'description': description,
                 'images': row[10][j],
                 'neighborhood': None,
                 'laundry': None, 
                 'pets': None, 
                 'parking': None, 
                 'utilities': [],
                }
        final_rows.append(entry)

In [171]:
final_rows = pd.DataFrame(final_rows)
final_rows.iloc[0:10]

Unnamed: 0,address,company,property_type,year_built,price,bed,bath,area,description,images,neighborhood,laundry,pets,parking,utilities
0,"3324 Bluett Rd, Ann Arbor, MI 48105",Investor's Property Management,House\r\n \r\n ...,1963.0,2750,4.0,2½,"2,449 sq. ft.","IMMEDIATE OCCUPANCY: Wonderful two story, unf...",[],,,,,[]
1,"1904 Geddes Ave, Ann Arbor, MI 48104",Investor's Property Management,"Condo\r\n , \r\...",2009.0,4050,4.0,3,"1,552 sq. ft.",AUGUST LEASE: Stunning new duplex-style home b...,[],,,,,[]
2,"916 S Main St, Ann Arbor, MI 48104",Investor's Property Management,House\r\n \r\n ...,0.0,4200 - 4500,4.0,2,"1,553 sq. ft.","AUGUST LEASE: TIERED PRICING $4,200/4 or 4,50...",[],,,,,[]
3,"802 Arch St, Ann Arbor, MI 48104",Investor's Property Management,House\r\n \r\n ...,1900.0,3300,3.0,1,"1,039 sq. ft.","SEPTEMBER LEASE: Charming 3 bedroom, 1 bath, ...",[],,,,,[]
4,"708 Dewey Ave, Ann Arbor, MI 48104",Investor's Property Management,House\r\n \r\n ...,1915.0,3750,5.0,2,"1,320 sq. ft.","Charming 5 bedroom, 2 full bath, 3 story furni...",[],,,,,[]
5,"536 S Forest Ave, Ann Arbor, MI 48104",University Towers,Apartments\r\n ...,1965.0,1599,,1,- sq. ft.,"Studio, one, two and three bedroom apartments ...",[https://s3.amazonaws.com/photos.rentlinx.com/...,,,,,[]
6,"536 S Forest Ave, Ann Arbor, MI 48104",University Towers,Apartments\r\n ...,1965.0,1629,,1,- sq. ft.,"Studio, one, two and three bedroom apartments ...",[https://s3.amazonaws.com/photos.rentlinx.com/...,,,,,[]
7,"536 S Forest Ave, Ann Arbor, MI 48104",University Towers,Apartments\r\n ...,1965.0,1119,,1,- sq. ft.,"Studio, one, two and three bedroom apartments ...",[https://s3.amazonaws.com/photos.rentlinx.com/...,,,,,[]
8,"536 S Forest Ave, Ann Arbor, MI 48104",University Towers,Apartments\r\n ...,1965.0,Leased through 8/30/2020,,1,- sq. ft.,"Studio, one, two and three bedroom apartments ...",[https://s3.amazonaws.com/photos.rentlinx.com/...,,,,,[]
9,"536 S Forest Ave, Ann Arbor, MI 48104",University Towers,Apartments\r\n ...,1965.0,Leased through 8/30/2020,,1,- sq. ft.,"Studio, one, two and three bedroom apartments ...",[https://s3.amazonaws.com/photos.rentlinx.com/...,,,,,[]


In [172]:
def clean_price(price):
    price = price.replace(',', '').replace('Person', '').replace('$', '').replace('/', '').strip()
    if price == 'Leased':
        return np.nan
    elif re.search('[Cc]all', price) != None:
        return np.nan
    elif len(price) < 7:
        return float(price)
    elif re.search('-', price) != None:
        # mean recorded if price range given
        var = lambda x: (float(x[:x.find('-')].replace(',', '')) + float(x[x.find('-') + 1:].replace(',', ''))) / 2 
        return int(var(price))
    elif re.search('to', price) != None:
        # mean recorded if price range given
        var = lambda x: (float(x[:x.find('to')].replace(',', '')) + float(x[x.find('to') + 2:].replace(',', ''))) / 2 
        return float(var(price))

final_rows['price'] = final_rows['price'].apply(clean_price)

In [173]:
def clean_bath(room):
    room = room[:2]
    if '½' in room:
        room = float(room[0] + '.5')
    else:
        room = float(room)
    return room

final_rows['bath'] = final_rows['bath'].apply(clean_bath)

In [174]:
def clean_area(room):
    room = room[:-7]
    if '-' not in room:
        if len(room) < 7:
            return float(room.replace(',', ''))
        else:
            # mean recorded if price range given
            var = lambda x: (int(x[:x.find('-')].replace(',', '')) + int(x[x.find('-') + 1:].replace(',', ''))) / 2 
            return float(var(room))   
    else:
        return np.nan

final_rows['area'] = final_rows['area'].apply(clean_area)

In [175]:
def clean_property(string):
    if string == '':
        string = None
    elif string == '-':
        string = None
    elif re.search(',', string) != None:
        string = string.split(',')[0].strip().lower()
    elif re.search('\(', string) != None:
        string = string.split('(')[0].strip().lower()
    
    if 'apartments' in string:
        string = string[:-1]
    return string

final_rows['property_type'] = final_rows['property_type'].apply(clean_property)

In [176]:
final_rows.loc[final_rows['price'] == 'Leased'].price = np.nan
final_rows['year_built'] = final_rows['year_built'].astype(int)
final_rows.loc[final_rows['year_built'] == 0].year_built = np.nan

  res_values = method(rvalues)


In [180]:
final_rows = final_rows[col]
final_rows

Unnamed: 0,address,price,bed,bath,area,company,neighborhood,laundry,pets,parking,utilities,property_type,year_built,description,images
0,"3324 Bluett Rd, Ann Arbor, MI 48105",2750.0,4,2.5,2449.0,Investor's Property Management,,,,,[],house,1963,"IMMEDIATE OCCUPANCY: Wonderful two story, unf...",[]
1,"1904 Geddes Ave, Ann Arbor, MI 48104",4050.0,4,3.0,1552.0,Investor's Property Management,,,,,[],condo,2009,AUGUST LEASE: Stunning new duplex-style home b...,[]
2,"916 S Main St, Ann Arbor, MI 48104",4350.0,4,2.0,1553.0,Investor's Property Management,,,,,[],house,0,"AUGUST LEASE: TIERED PRICING $4,200/4 or 4,50...",[]
3,"802 Arch St, Ann Arbor, MI 48104",3300.0,3,1.0,1039.0,Investor's Property Management,,,,,[],house,1900,"SEPTEMBER LEASE: Charming 3 bedroom, 1 bath, ...",[]
4,"708 Dewey Ave, Ann Arbor, MI 48104",3750.0,5,2.0,1320.0,Investor's Property Management,,,,,[],house,1915,"Charming 5 bedroom, 2 full bath, 3 story furni...",[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2067,"825 Packard St, Ann Arbor, MI 48104",,7,2.5,,PTP Management,,,,,[],house,0,Location Location Location. Fantastic 7 bedroo...,[]
2068,"821 Packard St, Ann Arbor, MI 48104",,8,4.0,,PTP Management,,,,,[],house,0,821 Packard Salacious and Huge 8 bedroom house...,[]
2069,"522 Monroe St, Ann Arbor, MI 48104",,5,5.5,,PTP Management,,,,,[],apartment,1999,"LOCATION, LOCATION, LOCATION! The University o...",[]
2070,"522 Monroe St, Ann Arbor, MI 48104",,5,5.5,,PTP Management,,,,,[],apartment,1999,"LOCATION, LOCATION, LOCATION! The University o...",[]


In [181]:
final_rows.to_json(r'../data/showmetherent.json')