In [51]:
import numpy as np
import pandas as pd
import helper

In [52]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectFromModel
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.style as style
style.use('fivethirtyeight')

In [53]:
colors = ["#FF0B04", "#F1BE48",
           "#B9975B", "#8B5B29",
           "#524727",
         ]
sns.set_palette(sns.color_palette(colors))

In [54]:
latlong = pd.read_csv('ames_housing_latlong.csv',index_col=0,low_memory = False)

In [55]:
housing = pd.read_csv('Ames_Housing_Price_Data.csv', index_col=0,low_memory = False)

train, test = helper.data_processing_wrapper(housing, remove_PID=False, 
                                               num_to_cat_list = ['MSSubClass'])
cat_feats = train.select_dtypes(['object','bool']).columns.to_list()
num_cols = train.select_dtypes(['float64','int64']).columns.to_list()
num_cols.remove('SalePrice')


In [56]:
# def lasso_model_score(alpha, train_, test_, target, 
#                              categorical_features,
#                              drop_cols = ['SalePrice', 'TotalBsmtSF']):
    
    

In [57]:
alpha = 0.002
train_score, test_score, feature_list = helper.lasso_model_score(alpha, train, test, 'SalePrice', cat_feats)

In [58]:
train.columns

Index(['PID', 'GrLivArea', 'SalePrice', 'MSSubClass', 'MSZoning',
       'LotFrontage', 'LotArea', 'Street', 'Alley', 'LotShape', 'LandContour',
       'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1',
       'Condition2', 'BldgType', 'HouseStyle', 'OverallQual', 'OverallCond',
       'YearBuilt', 'YearRemodAdd', 'RoofStyle', 'RoofMatl', 'Exterior1st',
       'Exterior2nd', 'MasVnrType', 'MasVnrArea', 'ExterQual', 'ExterCond',
       'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1',
       'BsmtFinSF1', 'BsmtFinType2', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF',
       'Heating', 'HeatingQC', 'CentralAir', 'Electrical', '1stFlrSF',
       '2ndFlrSF', 'LowQualFinSF', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath',
       'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual',
       'TotRmsAbvGrd', 'Functional', 'Fireplaces', 'FireplaceQu', 'GarageType',
       'GarageYrBlt', 'GarageFinish', 'GarageCars', 'GarageArea', 'GarageQual',
       'GarageCond

In [59]:
latlong.columns

Index(['PID', 'GrLivArea', 'SalePrice', 'MSSubClass', 'MSZoning',
       'LotFrontage', 'LotArea', 'Street', 'Alley', 'LotShape', 'LandContour',
       'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1',
       'Condition2', 'BldgType', 'HouseStyle', 'OverallQual', 'OverallCond',
       'YearBuilt', 'YearRemodAdd', 'RoofStyle', 'RoofMatl', 'Exterior1st',
       'Exterior2nd', 'MasVnrType', 'MasVnrArea', 'ExterQual', 'ExterCond',
       'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1',
       'BsmtFinSF1', 'BsmtFinType2', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF',
       'Heating', 'HeatingQC', 'CentralAir', 'Electrical', '1stFlrSF',
       '2ndFlrSF', 'LowQualFinSF', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath',
       'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual',
       'TotRmsAbvGrd', 'Functional', 'Fireplaces', 'FireplaceQu', 'GarageType',
       'GarageYrBlt', 'GarageFinish', 'GarageCars', 'GarageArea', 'GarageQual',
       'GarageCond

In [60]:
colnames = ['PID', 'Prop_Addr', 'latitude', 'longitude']
latlong = latlong[colnames]

In [61]:
latlong.head()

Unnamed: 0,PID,Prop_Addr,latitude,longitude
0,909176150,436 HAYWARD AVE,42.018564,-93.651619
1,905476230,3416 WEST ST,42.024855,-93.663671
2,911128020,320 S 2ND ST,42.021548,-93.614068
3,535377150,1524 DOUGLAS AVE,42.037391,-93.612207
4,534177230,2304 FILLMORE AVE,42.044554,-93.631818


In [62]:
combined_train = train.merge(latlong, how = 'left', left_on = 'PID', right_on = 'PID')
combined_test = test.merge(latlong, how = 'left', left_on = 'PID', right_on = 'PID')

In [63]:
## Identify missing values in lat long 
print(combined_train.latitude.isna().sum())
print(combined_train.longitude.isna().sum())
print(combined_test.latitude.isna().sum())
print(combined_test.longitude.isna().sum())

15
15
4
4


In [64]:
combined_train, latlong_map = helper.geo_cords_imputing(combined_train)
combined_test = helper.geo_cords_imputing(combined_test, latlong_map)
combined_train.dropna(subset=['latitude', 'longitude'], inplace=True)

In [65]:
# Check missing values
## Identify missing values in lat long 
print(combined_train.latitude.isna().sum())
print(combined_train.longitude.isna().sum())
print(combined_test.latitude.isna().sum())
print(combined_test.longitude.isna().sum())

0
0
0
0


In [66]:
combined_train
combined_test 

Unnamed: 0,PID,GrLivArea,SalePrice,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,...,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,Prop_Addr,latitude,longitude
0,903401020,1820,184000,50,RL,60.000000,9120,Pave,2,Reg,...,GdPrv,,0,6,2008,WD,Normal,711 RIDGEWOOD AVE,42.028311,-93.625587
1,909100080,1296,104000,30,RL,67.000000,4853,Pave,0,Reg,...,MnPrv,,0,5,2010,WD,Normal,2816 LINCOLN WAY,42.022668,-93.655936
2,531385020,1434,189000,20,RL,65.000000,8529,Pave,0,IR1,...,,,0,4,2009,WD,Normal,1516 NEBRASKA AVE,42.036989,-93.688774
3,907130110,1499,187000,60,RL,65.000000,12438,Pave,0,IR1,...,,,0,8,2006,WD,Normal,5228 SCHUBERT ST,42.021197,-93.688436
4,924152030,1797,231000,60,RL,74.000000,12961,Pave,0,Reg,...,,,0,3,2010,WD,Normal,3715 JEWEL DR,41.989203,-93.600184
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
620,916226090,1501,244000,120,RL,46.000000,4054,Pave,0,IR1,...,,,0,10,2007,WD,Normal,2106 IRONWOOD CT,42.005322,-93.639727
621,906475110,2087,187500,60,RL,78.868989,12205,Pave,0,IR1,...,,,0,7,2007,WD,Normal,215 PARKRIDGE CIR,42.024523,-93.682255
622,923227030,1160,152500,20,RL,95.723848,17979,Pave,0,IR1,...,GdWo,Shed,500,2,2008,WD,Normal,3207 JEWEL CIR,41.993312,-93.601386
623,907253060,1865,235000,60,RL,72.509189,10316,Pave,0,IR1,...,,,0,6,2008,WD,Normal,4916 HEMINGWAY DR,42.018543,-93.686200


## Latlong

In [110]:
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter

In [127]:
parks = pd.read_csv('parks.csv',index_col=0,low_memory = False)

In [128]:
for i in range(parks.shape[0]):
    try:
        parks['parkAddress'][i] = parks['parkAddress'][i] + ', Ames, IA, USA'
        print(parks['parkAddress'][i])
    except:
        print(parks['parkAddress'][i])

5205 Grand Ames, Ames, IA, USA
605 Billy Sunday Rd, Ames, IA, USA
125 E 5th St, Ames, IA, USA
3050 Northridge Pkwy, Ames, IA, USA
1325 6th st, Ames, IA, USA
3400 Ross Rd, Ames, IA, USA
725 E 13th St., Ames, IA, USA
4320 Dawes Dr., Ames, IA, USA
2130 oakwood rd, Ames, IA, USA
400 abraham dr, Ames, IA, USA
500 crystal st, Ames, IA, USA
340 Wilder Blvd, Ames, IA, USA
1323 duff ave , Ames, IA, USA
147 South Franklin Ave, Ames, IA, USA
1635 13th St, Ames, IA, USA
Gateway Hills Park Dr., Ames, IA, USA
1120 South 16th St., Ames, IA, USA
401 E 20th st, Ames, IA, USA
4517 hutchison st, Ames, IA, USA
2500 duff ave, Ames, IA, USA
bloomington rd & hyde , Ames, IA, USA
1101 beach ave, Ames, IA, USA
2901 harrison, Ames, IA, USA
826 douglas ave, Ames, IA, USA
300 S Maple Ave, Ames, IA, USA
2501 pierce, Ames, IA, USA
1515 idaho, Ames, IA, USA
9th street, Ames, IA, USA
1501 S 4th St, Ames, IA, USA
3501 Emerald Dr, Ames, IA, USA
340 main St, Ames, IA, USA
515 clark ave, Ames, IA, USA
1925 ames high driv

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  parks['parkAddress'][i] = parks['parkAddress'][i] + ', Ames, IA, USA'


In [131]:
# # RUN THIS TO ADD LAT/LONG TO PARKS FILE
# IT IS SAVED AS PARKS_LATLONG IN FOLDER

# for i in range(parks.shape[0]):
#     try:
#         address = parks['parkAddress'][i]
#         geolocator = Nominatim(user_agent="ames_location")
#         geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)
#         loc = geolocator.geocode(address)
#         parks['latitude'] = loc.latitude
#         parks['longitude'] = loc.longitude
#         print(loc.latitude, loc.longitude)
#     except:
#         print(f'{address} didnt work')
# parks.to_csv('parks_latlong.csv')

In [132]:
parks.head(15)

Unnamed: 0_level_0,parkAddress,parkzip,parkNatureArea,parkBikePath,parkPicnicArea,parkWalkingPath,parkGrills,parkShelter,parkParking,parkPlayground,parkGreenSpace,parkRec,latitude,longitude
parkName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Ada Hayden,"5205 Grand Ames, Ames, IA, USA",50010,1,1,1,1,1,1,1,0,0,0,42.025264,-93.670401
Ames Dog Park,"605 Billy Sunday Rd, Ames, IA, USA",50010,0,0,0,1,0,0,1,0,0,0,42.025264,-93.670401
band shell park,"125 E 5th St, Ames, IA, USA",50010,0,0,1,0,0,0,0,1,0,0,42.025264,-93.670401
Moore Memorial,"3050 Northridge Pkwy, Ames, IA, USA",50010,1,1,1,1,1,1,1,1,0,1,42.025264,-93.670401
brookside park,"1325 6th st, Ames, IA, USA",50010,0,0,1,1,1,1,1,1,0,1,42.025264,-93.670401
emma mccarthy lee park,"3400 Ross Rd, Ames, IA, USA",50010,0,0,1,0,1,1,1,1,1,1,42.025264,-93.670401
river valley park,"725 E 13th St., Ames, IA, USA",50010,1,0,1,1,1,1,1,1,1,1,42.025264,-93.670401
charles & June Calhoun Park,"4320 Dawes Dr., Ames, IA, USA",50010,0,0,1,0,0,1,1,0,0,0,42.025264,-93.670401
christofferson park,"2130 oakwood rd, Ames, IA, USA",50010,0,0,0,1,0,1,1,1,1,0,42.025264,-93.670401
christopher gartner park,"400 abraham dr, Ames, IA, USA",50014,0,1,0,0,0,0,0,1,0,0,42.025264,-93.670401
