In [1]:
# Import dependencies
import pandas as pd
import numpy as np
import ast
pd.set_option('display.max_columns', None)

In [2]:
# Import cleaned data
buys = pd.read_csv('clean_buys.csv', index_col=0)
rents = pd.read_csv('clean_rents.csv', index_col=0)

In [3]:
buys.head()

Unnamed: 0,url,street_address,city,state,zipcode,neighborhood,beds,baths,square_footage,price,listing_type,description,details,home_type,lot_size,year_built,num_rooms,heating,heating_fuel,air_con,microwave,dishwasher,disposal,washer,dryer,fireplace,vaulted_ceiling,double_pane,garage,num_parking,security,num_stories,foundation_type,patio,porch,deck,pool,Dining Room,Family Room,Laundry Room,Walk In Closet,Pantry,Breakfast Nook,Office,Workshop,Library,Recreation Room,Master Bedroom,Living Room,Loft,Central,Evaporative,Wall,None,Solar,Refrigeration,Other,Carpet,Hardwood,Laminate,Tile,Linoleum Vinyl,Concrete,Slate,Detached Garage,Attached Garage,Carport,Off Street,On Street,Garage Attached,Brick,Wood,Stucco,Stone,Cement,Vinyl,Metal,Shingle,Composition,Products
0,https://www.trulia.com/p/ca/sacramento/3141-ye...,3141 Yellowstone Ln,Sacramento,CA,95821,,4.0,2.0,1689.0,390000.0,BUY,"Beautifully upgrade, large 4 bedroom, 2 full b...","['Single Family Home', '$231/sqft', 'Lot Size:...",Single Family Home,11325.6,1946.0,8.0,Forced Air,Electric,Air Conditioning,,,,Washer,Dryer,Fireplace,,Double Paned Windows,Garage,1.0,Security System,1.0,,,,,,Dining Room,,,,,,,,,,,,,Central,,,,,,,Carpet,Hardwood,,,,,,Detached Garage,,,,,,Brick,Wood,,,,,,,,
1,https://www.trulia.com/p/ca/sacramento/7748-fi...,7748 Finnhorse Way,Sacramento,CA,95828,,5.0,3.0,3020.0,529900.0,BUY,"Fabulous 5 bedroom, 3 bathroom, two-story prop...","['Single Family Home', '$175/sqft', 'Lot Size:...",Single Family Home,6199.0,2016.0,9.0,Forced Air,Gas,Air Conditioning,,Dishwasher,,,,Fireplace,,Double Paned Windows,Garage,3.0,,2.0,,,,,,Dining Room,,,,,,,,,,,,,Central,,,,,,,Carpet,,Laminate,Tile,,,,,Attached Garage,,,,,,Wood,,,,,,,,
2,https://www.trulia.com/p/ca/rancho-cordova/224...,2246 Palmwood Ct,Rancho Cordova,CA,95670,Mills Ranch,4.0,2.0,1420.0,368900.0,BUY,"Beautifully updated 4 bedroom, 2 bath single s...","['Single Family Home', '$260/sqft', 'Lot Size:...",Single Family Home,4729.0,1983.0,7.0,Other,,Air Conditioning,Microwave,Dishwasher,Disposal,,,Fireplace,Vaulted Ceiling,Double Paned Windows,Garage,2.0,Security System,1.0,Slab,,,,,,Family Room,,,,,,,,,,,,Central,,,,,,,,,,Tile,,,,,Attached Garage,,,,,,Wood,,,,,,,,
3,https://www.trulia.com/p/ca/sacramento/5741-re...,5741 Rexleigh Dr,Sacramento,CA,95823,Valley High-North Laguna,4.0,3.0,2918.0,470000.0,BUY,"Beautiful 4 bedroom, 3 bathroom, 2 story locat...","['Single Family Home', '$161/sqft', 'Lot Size:...",Single Family Home,7439.0,1998.0,9.0,Forced Air,Gas,Air Conditioning,Microwave,Dishwasher,Disposal,,,Fireplace,,Double Paned Windows,Garage,2.0,,2.0,Slab,,,,,Dining Room,Family Room,Laundry Room,,,,,,,,,,,Central,,,,,,,Carpet,,Laminate,Tile,,,,,Attached Garage,,,,,,,,,,,,,,
4,https://www.trulia.com/p/ca/sacramento/6101-go...,6101 Golden Dawn Way,Sacramento,CA,95841,Foothill Farms,4.0,3.0,1903.0,375000.0,BUY,This home has an attached in-laws unit with it...,"['Single Family Home', '$197/sqft', 'Lot Size:...",Single Family Home,6970.0,1955.0,7.0,Forced Air,Electric,Air Conditioning,,Dishwasher,,,,Fireplace,,Double Paned Windows,Garage,2.0,,1.0,,,,,,Dining Room,,,,,,,,,,,,,Central,,,,,,,Carpet,,,,Linoleum Vinyl,,,,Attached Garage,,,,,Brick,Wood,,,,,,,,


In [None]:
rents.head()

In [None]:
buys.columns

In [17]:
# Separate data by numeric and non-numeric
# Non-numeric needs to be encoded
# Zipcode is separate because get_dummies interprets it by default as numeric
X_buys_le = buys[['city','neighborhood','home_type','heating','heating_fuel',
                 'air_con','microwave','dishwasher','disposal','washer','dryer','fireplace','vaulted_ceiling',
                 'double_pane','garage','security','foundation_type','patio','porch',
                 'deck','pool','Dining Room', 'Family Room', 'Laundry Room', 'Walk In Closet','Pantry',
                 'Breakfast Nook', 'Office', 'Workshop', 'Library','Recreation Room', 'Master Bedroom', 'Living Room',
                 'Loft', 'Central','Evaporative', 'Wall', 'None', 'Solar', 'Refrigeration', 'Other','Carpet',
                 'Hardwood', 'Laminate', 'Tile', 'Linoleum Vinyl', 'Concrete','Slate', 'Detached Garage', 
                 'Attached Garage', 'Carport', 'Off Street','On Street', 'Garage Attached', 'Brick', 'Wood', 'Stucco',
                 'Stone','Cement', 'Vinyl', 'Metal', 'Shingle', 'Composition', 'Products']]
X_buys_num = buys[['beds','baths','square_footage','lot_size','year_built','num_rooms','num_parking','num_stories']]
X_buys_zip = buys[['zipcode']]
y_buys = buys[['price']]

In [20]:
# One Hot Encode non-numeric data with pd.get_dummies
X_buys_zip = pd.get_dummies(X_buys_zip.astype(str))
X_buys_le = pd.get_dummies(X_buys_le)
X_buys_le.shape

(2418, 236)

In [23]:
# Rejoin data into a single dataframe
buys_encoded = pd.concat([X_buys_le,X_buys_zip, X_buys_num, y_buys], axis=1)
buys_encoded.shape

(2418, 293)

In [None]:
rents.columns

In [24]:
# Repeat encoding process for rents
X_rents_le = rents[['city','neighborhood','home_type',
       'smoking', 'living_room', 'dishwasher', 'microwave', 'refrigerator',
       'on_site_maint', 'on_site_mng', 'laundry', 'air_con', 'disposal',
       'dryer', 'patio', 'pool', 'balcony', 'washer', 'basketball',
       'ceiling_fan', 'fireplace', 'fitness', 'playground', 'floor_types',
       'bbq', 'vaulted_ceiling', 'Carport', 'On Street', 'Off Street',
       'Garage Detached', 'Garage Attached', 'No pets allowed', 'Cats allowed',
       'Small dogs allowed', 'large dogs allowed', 'Sewage', 'Garbage',
       'Water', 'Hot Water', 'Internet', 'Cable', 'Electricity', 'Gas', 'Heat',
       'Phone']]
X_rents_num = rents[['beds','baths','square_footage','year_built','deposit']]
X_rents_zip = rents[['zipcode']]
y_rents = rents[['price']]

In [25]:
X_rents_zip = pd.get_dummies(X_rents_zip.astype(str))
X_rents_le = pd.get_dummies(X_rents_le)
X_rents_le.shape

(2222, 190)

In [28]:
rents_encoded = pd.concat([X_rents_le,X_rents_zip, X_rents_num, y_rents], axis=1)
rents_encoded.shape

(2222, 239)

In [30]:
# Export encoded data
buys_encoded.to_csv('buys_encoded.csv')
rents_encoded.to_csv('rents_encoded.csv')
print('Export Complete')

Export Complete
