In [552]:
import numpy as np
import pandas as pd
import re
import json

In [553]:
df = pd.read_json('../collection/raw_data/apartments.json')
df = df.drop(['recurring_fees', 'one_time_fees'], axis=1)
col = ['address', 'price', 'bed', 'bath', 'area', 'company', 'neighborhood', 'laundry', 'pets', 'parking', 'utilities', 'property_type', 'year_built', 'description', 'images']
df.iloc[1]

address                                         1777 Plymouth Rd
company                                                     None
price                                                   [$1,625]
bed                                                       [1 BR]
bath                                                      [1 BA]
area                                                 [730 Sq Ft]
description    Immediate occupancy! Unique opportunity to liv...
rating                                                       NaN
amenities                                                     []
Name: 1, dtype: object

In [554]:
final_rows = []
for row in df.itertuples():
    address = row[1]
    company = row[2]
    description = row[7]
    rating = row[8]
    amenities = row[9]
    
    for j, s in enumerate(row[3]):
        entry = {'address': address,
                 'company': company, 
                 'price': row[3][j],
                 'bed': row[4][j],
                 'bath': row[5][j],
                 'area': row[6][j],
                 'description': description,
                 'rating': rating,
                 'amenities': amenities
                }
        final_rows.append(entry)

In [555]:
# extract single rows and clean them
final_rows = pd.DataFrame(final_rows)

In [556]:
final_rows.iloc[0:50]

Unnamed: 0,address,company,price,bed,bath,area,description,rating,amenities
0,2709 N Spurway Dr,,"$3,300",3 BRs,2½ BAs,"2,438 Sq Ft",BRAND NEW! This North Oaks luxury townhome on ...,,[{'Unique Features': ['NewConstruction']}]
1,1777 Plymouth Rd,,"$1,625",1 BR,1 BA,730 Sq Ft,Immediate occupancy! Unique opportunity to liv...,,[]
2,908 Sybil St,,"$1,625",2 BRs,1 BA,,(734) 680-8673 - This is a great building loca...,,"[{'Unique Features': ['No Pets Allowed, Applia..."
3,333 Packard St,,"$1,075",1 BR,1 BA,375 Sq Ft,(734) 996-1991 - Four 1 Bedroom Apartment in b...,,[{'Unique Features': ['Cats Allowed - $100 Non...
4,513 Krause St,,"$1,900",1 BR,1 BA,900 Sq Ft,(734) 255-1299 - PRICE LOWERED! Old West Side ...,,[{'Unique Features': ['Lease Lengths - Contact...
5,555 E William St Unit 10E,,"$1,500",Studio,1 BA,400 Sq Ft,"Renovated 10th floor furnished highrise condo,...",,"[{'Pet Policy': []}, {'Parking': []}, {'Proper..."
6,2918 Signature Blvd,,"$1,900",2 BRs,2 BAs,"1,400 Sq Ft","** FOR RENT ** Location, Location, Location! 2...",,[]
7,3447 Cummings Dr,,$700,1 BR,1 BA,,(734) 709-5709 - Furnished bedrooms available ...,,"[{'Pet Policy': []}, {'Property Information': ..."
8,City Place Ann Arbor,Campus Village Communities,$920\r\n\t\t\t\t/ Person,6 BRs,5 BAs,"2,267 Sq Ft",City Place Ann Arbor offers 6 bedroom townhome...,5.0,"[{'Unique Features': ['16' Vaulted Ceiling', '..."
9,City Place Ann Arbor,Campus Village Communities,$970\r\n\t\t\t\t/ Person,6 BRs,5 BAs,"2,267 Sq Ft",City Place Ann Arbor offers 6 bedroom townhome...,5.0,"[{'Unique Features': ['16' Vaulted Ceiling', '..."


In [557]:
def clean_bed(bed):  
    if 'Studio' not in bed:
        bed = bed[:-3]
    else:
        bed = 0
    return bed

final_rows['bed'] = final_rows['bed'].apply(clean_bed)

In [558]:
def clean_bath(room):
    room = room[:-3]
    if '½' in room:
        room = float(room[0] + '.5')
    else:
        room = float(room)
    return room

final_rows['bath'] = final_rows['bath'].apply(clean_bath)

In [559]:
def clean_area(room):
    room = room[:-5]
    if room != '':
        if len(room) < 7:
            return float(room.replace(',', ''))
        else:
            # mean recorded if price range given
            var = lambda x: (int(x[:x.find('-')].replace(',', '')) + int(x[x.find('-') + 1:].replace(',', ''))) / 2 
            return float(var(room))   
    else:
        return np.nan

final_rows['area'] = final_rows['area'].apply(clean_area)

In [560]:
def clean_price(price):
    price = price.replace(',', '').replace('Person', '').replace('$', '').replace('/', '').strip()
    if len(price) < 7:
        return float(price)
    elif re.search('[Cc]all for [Rr]ent', price) != None:
        return np.nan
    else:
        # mean recorded if price range given
        var = lambda x: (int(x[:x.find('-')].replace(',', '')) + int(x[x.find('-') + 1:].replace(',', ''))) / 2 
        return float(var(price))

final_rows['price'] = final_rows['price'].apply(clean_price)

In [561]:
final_rows

Unnamed: 0,address,company,price,bed,bath,area,description,rating,amenities
0,2709 N Spurway Dr,,3300.0,3,2.5,2438.0,BRAND NEW! This North Oaks luxury townhome on ...,,[{'Unique Features': ['NewConstruction']}]
1,1777 Plymouth Rd,,1625.0,1,1.0,730.0,Immediate occupancy! Unique opportunity to liv...,,[]
2,908 Sybil St,,1625.0,2,1.0,,(734) 680-8673 - This is a great building loca...,,"[{'Unique Features': ['No Pets Allowed, Applia..."
3,333 Packard St,,1075.0,1,1.0,375.0,(734) 996-1991 - Four 1 Bedroom Apartment in b...,,[{'Unique Features': ['Cats Allowed - $100 Non...
4,513 Krause St,,1900.0,1,1.0,900.0,(734) 255-1299 - PRICE LOWERED! Old West Side ...,,[{'Unique Features': ['Lease Lengths - Contact...
...,...,...,...,...,...,...,...,...,...
1784,3210 Mc Comb St,,1500.0,3,1.0,988.0,"Very nice three bedroom, one bathroom ranch wi...",,[]
1785,1127 Church St,,2050.0,2,1.0,,(734) 662-8832 - AUGUST: Two wonderful 2 bedr...,,"[{'Unique Features': ['No Pets Allowed, Oven']..."
1786,1901 S Zeeb Rd,,1800.0,4,1.0,2012.0,This historic farmhouse with beautiful views i...,,[]
1787,616 Susan Dr,,2000.0,3,2.0,1008.0,Lovely ranch home available for rent August 15...,,"[{'Pet Policy': []}, {'Features': ['Washer/Dry..."


In [562]:
pets, laundry, parking, property_type, year_built, neighborhood, utilities, images = [], [], [], [], [], [], [], []
amenities = final_rows['amenities']
for row in amenities.values:
    row = json.dumps(row)
    
    if re.search('(?<!No )Cats Allowed|(?<!No )Dogs Allowed|considered', row) != None:
        pets.append(1)
    else: 
        pets.append(0)
        
    if re.search('[Ll]aundry|Washer|Dryer', row) != None:
        laundry.append(1)
    else:
        laundry.append(0)
    
    if re.search('parking', row) != None:
        parking.append(1)
    else:
        parking.append(0)
    
    year = re.search('Built in (\d*)', row)
    if year != None:
        year_built.append(year.group(1))
    else:
        year_built.append(np.nan)
        
    property_type.append('apartment')
    neighborhood.append(None)
    utilities.append([])
    images.append([])

In [563]:
final_rows['pets'] = pets
final_rows['laundry'] = laundry
final_rows['parking'] = parking
final_rows['year_built'] = year_built
final_rows['property_type'] = property_type
final_rows['neighborhood'] = neighborhood
final_rows['utilities'] = utilities
final_rows['images'] = images

final_rows = final_rows[col]

In [564]:
final_rows

Unnamed: 0,address,price,bed,bath,area,company,neighborhood,laundry,pets,parking,utilities,property_type,year_built,description,images
0,2709 N Spurway Dr,3300.0,3,2.5,2438.0,,,0,0,0,[],apartment,,BRAND NEW! This North Oaks luxury townhome on ...,[]
1,1777 Plymouth Rd,1625.0,1,1.0,730.0,,,0,0,0,[],apartment,,Immediate occupancy! Unique opportunity to liv...,[]
2,908 Sybil St,1625.0,2,1.0,,,,1,0,0,[],apartment,,(734) 680-8673 - This is a great building loca...,[]
3,333 Packard St,1075.0,1,1.0,375.0,,,1,1,0,[],apartment,,(734) 996-1991 - Four 1 Bedroom Apartment in b...,[]
4,513 Krause St,1900.0,1,1.0,900.0,,,1,0,0,[],apartment,,(734) 255-1299 - PRICE LOWERED! Old West Side ...,[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1784,3210 Mc Comb St,1500.0,3,1.0,988.0,,,0,0,0,[],apartment,,"Very nice three bedroom, one bathroom ranch wi...",[]
1785,1127 Church St,2050.0,2,1.0,,,,1,0,0,[],apartment,,(734) 662-8832 - AUGUST: Two wonderful 2 bedr...,[]
1786,1901 S Zeeb Rd,1800.0,4,1.0,2012.0,,,0,0,0,[],apartment,,This historic farmhouse with beautiful views i...,[]
1787,616 Susan Dr,2000.0,3,2.0,1008.0,,,1,0,0,[],apartment,,Lovely ranch home available for rent August 15...,[]


In [565]:
final_rows.to_json(r'../data/apartments.json')