In [1]:
from sklearn.pipeline import make_pipeline

In [None]:
make_pipeline()

In [3]:
import pandas as pd
import numpy as np

# view all columns
pd.set_option("display.max.columns", None)

# load the data
df = pd.read_csv('../Datasets/clean_housing.csv')

# which state
print('Indicate which state: \n', df['state'].unique())
user_state = input()
print(' ')
state_df = df[df['state'] == user_state]

# drop uneeded columns and fill missing values with 'unknown'
drop_columns = ['lat', 'long', 'state']
state_df = state_df.drop(drop_columns, axis=1).fillna('unknown')

# which region
print('Indicate which region: \n', state_df['region'].unique())
user_region = input()
print(' ')
dummy_df = pd.get_dummies(state_df)
user_df = pd.DataFrame(data=[np.zeros(len(dummy_df.columns))], columns=dummy_df.columns)
user_df['region_'+user_region.lower()] = 1

# type
print('Indicate which type: \n', df['type'].unique())
type_ = input()
print(' ')
user_df['type_'+type_.lower()] = 1

# square feet
print('How many square feet:')
sq_feet = input()
print(' ')
user_df['sqfeet'] = sq_feet

# baths
print('How many bathrooms:')
baths = input()
print(' ')
user_df['baths'] = baths

# bedrooms
print('How many bedrooms:')
beds = input()
print(' ')
user_df['beds'] = beds

# dogs
print('Are dogs allowed:')
dogs = input()
print(' ')
if dogs.lower() == 'yes':
    user_df['dogs_allowed'] = 1
else:
    pass

# cats
print('Are cats allowed:')
cats = input()
print(' ')
if cats.lower() == 'yes':
    user_df['cats_allowed'] = 1
else:
    pass

# smoking
print('Is smoking allowed:')
smoking = input()
print(' ')
if smoking.lower() == 'yes':
    user_df['smoking_allowed'] = 1
else:
    pass

# wheel chair accessible
print('Is wheel chair accessible:')
wheel_chair = input()
print(' ')
if wheel_chair.lower() == 'yes':
    user_df['wheelchair_access'] = 1
else:
    pass

# electric vehicle charger
print('Is there an electric vehicle charger:')
electric_vehicle = input()
print(' ')
if electric_vehicle.lower() == 'yes':
    user_df['electric_vehicle_charge'] = 1
else:
    pass

# furnished
print('Does it come furnished:')
furnished = input()
print(' ')
if furnished.lower() == 'yes':
    user_df['comes_furnished'] = 1
else:
    pass

# laundry options
print('Indicate the laundry situation: \n', state_df['laundry_options'].unique())
laundry = input()
print(' ')
user_df['laundry_options_'+laundry.lower()] = 1

# parking options
print('Indicate the parking situation: \n', state_df['parking_options'].unique())
parking = input()
print(' ')
user_df['parking_options_'+parking.lower()] = 1


### model building ###
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

X = dummy_df.drop('price', axis=1)
y = dummy_df['price']

# split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

# instantiate the model
lr_model = LinearRegression()

# fit the data
lr_model.fit(X_train, y_train) # fit the model to the training data.


### make preidctions ###

# predict the home value
user_prediction = lr_model.predict(user_df.drop('price', axis=1))

# display the prediction
print('The cost of this property is estimated to be ${} a month'.format(round(user_prediction[0])))

Indicate which state: 
 ['CA' 'CO' 'CT' 'DC' 'FL' 'DE' 'GA' 'HI' 'ID' 'IL' 'IN' 'IA' 'KS' 'KY'
 'LA' 'ME' 'MI' 'MD' 'MA' 'MN' 'MS' 'NC' 'MO' 'MT' 'NE' 'NV' 'NJ' 'NM'
 'NY' 'NH' 'OH' 'ND' 'OK' 'OR' 'PA' 'RI' 'SC' 'TN' 'SD' 'TX' 'UT' 'VA'
 'VT' 'WA' 'WV' 'WI' 'WY' 'AL' 'AZ' 'AK' 'AR']
IL
 
Indicate which region: 
 ['bloomington-normal' 'champaign urbana' 'chicago' 'decatur' 'la salle co'
 'quad cities, IA/IL' 'st louis, MO' 'peoria' 'springfield' 'rockford'
 'southern illinois' 'mattoon-charleston' 'western IL']
chicago
 
Indicate which type: 
 ['apartment' 'condo' 'house' 'duplex' 'townhouse' 'loft' 'manufactured'
 'cottage/cabin' 'flat' 'in-law' 'land']
apartment
 
How many square feet:
1200
 
How many bathrooms:
2
 
How many bedrooms:
3
 
Are dogs allowed:
yes
 
Are cats allowed:
yes
 
Is smoking allowed:
yes
 
Is wheel chair accessible:
yes
 
Is there an electric vehicle charger:
yes
 
Does it come furnished:
yes
 
Indicate the laundry situation: 
 ['w/d hookups' 'laundry on site' 'u



In [5]:
user_df

Unnamed: 0,price,sqfeet,beds,baths,cats_allowed,dogs_allowed,smoking_allowed,wheelchair_access,electric_vehicle_charge,comes_furnished,region_SF bay area,region_bakersfield,region_chico,region_fresno / madera,region_gold country,region_hanford-corcoran,region_humboldt county,region_imperial county,region_inland empire,region_los angeles,region_mendocino county,region_merced,region_modesto,region_monterey bay,region_orange county,region_palm springs,region_redding,region_reno / tahoe,region_sacramento,region_san diego,region_san luis obispo,region_santa barbara,region_santa maria,region_siskiyou county,region_stockton,region_susanville,region_ventura county,region_visalia-tulare,region_yuba-sutter,type_apartment,type_condo,type_cottage/cabin,type_duplex,type_flat,type_house,type_in-law,type_loft,type_manufactured,type_townhouse,laundry_options_laundry in bldg,laundry_options_laundry on site,laundry_options_no laundry on site,laundry_options_unknown,laundry_options_w/d hookups,laundry_options_w/d in unit,parking_options_attached garage,parking_options_carport,parking_options_detached garage,parking_options_no parking,parking_options_off-street parking,parking_options_street parking,parking_options_unknown,parking_options_valet parking,type_flats
0,0.0,1200,3,3,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0.0,0.0,1


In [None]:
cross_val_score()

In [4]:
lr_model.score(X_test, y_test)

0.6187897961725464

In [10]:
y_test

383877    3500
384091     950
11994     2195
382981    1340
369240    1185
          ... 
382076    1345
379167    3295
377933    1165
381228    1875
380387    1495
Name: price, Length: 6617, dtype: int64

In [11]:
predictions

array([ 2866.87629813, -9052.20178703, 14701.77656562, ...,
         422.32911636,  -158.25337409, -2493.53354293])