###  In this notebook , model testing is automated in for-loops,
### This notebook is created for optimization of model testing procedure

In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm, trange
from sklearn import linear_model

In [4]:
## Data loading procedure

In [2]:
X_2house = pd.read_csv('/wgdisk/ho0338/ek79/Tien/X_2house_v0.csv')
X_2house=X_2house.drop(columns=['Unnamed: 0'])
X_2house.index.rename('Order', inplace=True)

In [181]:
y_2house = -1*np.load('Data/Processed/y_2house.npy')

In [4]:
## check on the data dimensions
print('X_input:',X_2house.shape)
print('y_input:',y_2house.shape)

X_input: (4290985, 158)
y_input: (4290985,)


In [355]:
## Select only 2 features from X
tt_att = 'Kitchen Qual'
X_input = X_2house[[tt_att,tt_att+'_2']]

In [356]:
X_input.describe()

Unnamed: 0,Kitchen Qual,Kitchen Qual_2
count,4290985.0,4290985.0
mean,3.531044,3.491482
std,0.6698706,0.6555604
min,1.0,1.0
25%,3.0,3.0
50%,3.0,3.0
75%,4.0,4.0
max,5.0,5.0


In [357]:
## setup train-test split
from sklearn.model_selection import train_test_split
from sklearn import metrics
X_train, X_test, y_train, y_test = train_test_split(X_input, y_2house, test_size=0.5, random_state=12)

In [358]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(2145492, 2)
(2145492,)
(2145493, 2)
(2145493,)


In [359]:
### setup model parameters
#mod_type = ['Lasso','Ridge']
mod_type = ['Lasso','Ridge']
fit_int, norm, copyX, max_itr, tolerance, solv, Precomp, pos, r_stat = False, False, True, None, 0.001, 'auto', False, False, 12
test_alph = [5.0,2.0,1.0,0.1]

In [360]:
# prepare the numpy array to store the model coefficients & intercepts
k = X_train.shape[1]
r_k = len(mod_type)*len(test_alph)
col_names =['model_type','alpha','coef_'+tt_att,'coef_'+tt_att+'_2','intercept','MSE_train','MSE_test','MSE_reduction_ratio']

In [361]:
#  initiate a new dataframe , empty to append
df= pd.DataFrame(columns = col_names)
df.head()

Unnamed: 0,model_type,alpha,coef_Kitchen Qual,coef_Kitchen Qual_2,intercept,MSE_train,MSE_test,MSE_reduction_ratio


In [309]:
pd.set_option('display.max_columns', None)

In [362]:
from sklearn import metrics
for test_type in tqdm(mod_type):
    print('model type: ',test_type)
    for alph in tqdm(test_alph):
        print('alpha is: ', alph)
        if test_type == 'Lasso': 
            model = linear_model.Lasso(alpha=alph, fit_intercept=fit_int, normalize=norm, precompute=Precomp, copy_X=copyX, max_iter=1000, random_state=r_stat, selection='cyclic')
        if test_type == 'Ridge': 
            model = linear_model.Ridge(alpha=alph, fit_intercept=fit_int, normalize=norm, copy_X= copyX, max_iter= max_itr, tol= tolerance, solver= solv, random_state=r_stat)
        model.fit(X_train,y_train)
        y_test_pred = model.predict(X_test)
        y_train_pred = model.predict(X_train)
        MSE_test= metrics.mean_squared_error(y_test, y_test_pred)
        MAE_test= metrics.mean_absolute_error(y_test, y_test_pred)
        MSE_train= metrics.mean_squared_error(y_train, y_train_pred)
        MAE_train= metrics.mean_absolute_error(y_train, y_train_pred)
        MSE_neg_ratio= (MSE_test-MSE_train)/MSE_train*100
        print('MSE score(testing):', MSE_test)
        print('MAE score(testing):', MAE_test)
        print('MSE score(train):', MSE_train)
        print('MAE score(train):', MAE_train)
        print('MSE reduction ratio:', MSE_neg_ratio)
        # save coefficients
        tmp_row=[test_type,alph]+list(model.coef_)+[model.intercept_]+[MSE_train, MSE_test, MSE_neg_ratio]
        df.loc[len(df)]=tmp_row


  0%|          | 0/2 [00:00<?, ?it/s]
  0%|          | 0/4 [00:00<?, ?it/s][A

model type:  Lasso
alpha is:  5.0



 25%|██▌       | 1/4 [00:01<00:04,  1.64s/it][A

MSE score(testing): 6995341398.89
MAE score(testing): 61880.057473
MSE score(train): 6973003862.74
MAE score(train): 61765.2244348
MSE reduction ratio: 0.320343091475
alpha is:  2.0



 50%|█████     | 2/4 [00:03<00:03,  1.70s/it][A

MSE score(testing): 6995340156.34
MAE score(testing): 61880.4855198
MSE score(train): 6973003765.39
MAE score(train): 61765.6529096
MSE reduction ratio: 0.320326672633
alpha is:  1.0



 75%|███████▌  | 3/4 [00:05<00:01,  1.76s/it][A

MSE score(testing): 6995339760.6
MAE score(testing): 61880.6282656
MSE score(train): 6973003751.48
MAE score(train): 61765.7957901
MSE reduction ratio: 0.320321197495
alpha is:  0.1



100%|██████████| 4/4 [00:07<00:00,  1.85s/it][A
 50%|█████     | 1/2 [00:07<00:07,  7.41s/it]
  0%|          | 0/4 [00:00<?, ?it/s][A
 50%|█████     | 2/4 [00:00<00:00, 10.57it/s]

MSE score(testing): 6995339412.42
MAE score(testing): 61880.7567187
MSE score(train): 6973003746.9
MAE score(train): 61765.9243595
MSE reduction ratio: 0.320316270296
model type:  Ridge
alpha is:  5.0
MSE score(testing): 6995339445.75
MAE score(testing): 61880.744292
MSE score(train): 6973003747.01
MAE score(train): 61765.9119219
MSE reduction ratio: 0.320316746507
alpha is:  2.0
MSE score(testing): 6995339402.77
MAE score(testing): 61880.7603171
MSE score(train): 6973003746.87
MAE score(train): 61765.9279611
MSE reduction ratio: 0.320316132066


[A
100%|██████████| 4/4 [00:00<00:00, 10.53it/s][A
100%|██████████| 2/2 [00:07<00:00,  3.90s/it]

alpha is:  1.0
MSE score(testing): 6995339388.46
MAE score(testing): 61880.7656588
MSE score(train): 6973003746.86
MAE score(train): 61765.9333076
MSE reduction ratio: 0.32031592725
alpha is:  0.1
MSE score(testing): 6995339375.6
MAE score(testing): 61880.7704664
MSE score(train): 6973003746.85
MAE score(train): 61765.9381194
MSE reduction ratio: 0.320315742914





class sklearn.linear_model.Lasso(alpha=1.0, fit_intercept=True, normalize=False, precompute=False, copy_X=True, max_iter=1000, tol=0.0001, warm_start=False, positive=False, random_state=None, selection=’cyclic’)

class sklearn.linear_model.Ridge(alpha=1.0, fit_intercept=True, normalize=False, copy_X=True, max_iter=None, tol=0.001, solver=’auto’, random_state=None)

In [363]:
df

Unnamed: 0,model_type,alpha,coef_Kitchen Qual,coef_Kitchen Qual_2,intercept,MSE_train,MSE_test,MSE_reduction_ratio
0,Lasso,5.0,-80995.327218,81015.287979,0.0,6973004000.0,6995341000.0,0.320343
1,Lasso,2.0,-81002.178332,81022.21886,0.0,6973004000.0,6995340000.0,0.320327
2,Lasso,1.0,-81004.462396,81024.529505,0.0,6973004000.0,6995340000.0,0.320321
3,Lasso,0.1,-81006.517654,81026.608694,0.0,6973004000.0,6995339000.0,0.320316
4,Ridge,5.0,-81006.318914,81026.407498,0.0,6973004000.0,6995339000.0,0.320317
5,Ridge,2.0,-81006.575263,81026.666914,0.0,6973004000.0,6995339000.0,0.320316
6,Ridge,1.0,-81006.660713,81026.753387,0.0,6973004000.0,6995339000.0,0.320316
7,Ridge,0.1,-81006.737618,81026.831212,0.0,6973004000.0,6995339000.0,0.320316


In [369]:
## Experiment on the effect of coefficients
case1=pd.DataFrame(columns = X_input.columns)
case1.loc[0]=[2,5]
case1.head()

Unnamed: 0,Kitchen Qual,Kitchen Qual_2
0,2,5


In [370]:
pred_pric = model.predict(case1)

In [371]:
print(pred_pric)

[ 243120.6808237]


In [372]:
pred_pric_m = model.intercept_+ (model.coef_[0]*case1.iloc[0,0])+(model.coef_[1]*case1.iloc[0,1])

In [373]:
print(pred_pric_m)

243120.680824


In [378]:
###Verify full model coefficients
LR_coef = pd.read_csv('Experiments_result/LR_models_hyperparam_saveout.csv')
LR_coef=LR_coef.drop(columns=['Unnamed: 0'])

In [407]:
LR_coef_f = LR_coef.loc[0]*

In [408]:
LR_coef.head()

Unnamed: 0,model_type,alpha,coef_MS SubClass,coef_MS Zoning,coef_Lot Frontage,coef_Lot Area,coef_Street,coef_Alley,coef_Lot Shape,coef_Land Contour,coef_Utilities,coef_Lot Config,coef_Land Slope,coef_Neighborhood,coef_Condition 1,coef_Condition 2,coef_Bldg Type,coef_House Style,coef_Overall Qual,coef_Overall Cond,coef_Year Built,coef_Year Remod/Add,coef_Roof Style,coef_Roof Matl,coef_Exterior 1st,coef_Exterior 2nd,coef_Mas Vnr Type,coef_Mas Vnr Area,coef_Exter Qual,coef_Exter Cond,coef_Foundation,coef_Bsmt Qual,coef_Bsmt Cond,coef_Bsmt Exposure,coef_BsmtFin Type 1,coef_BsmtFin SF 1,coef_BsmtFin Type 2,coef_BsmtFin SF 2,coef_Bsmt Unf SF,coef_Total Bsmt SF,coef_Heating,coef_Heating QC,coef_Central Air,coef_Electrical,coef_1st Flr SF,coef_2nd Flr SF,coef_Low Qual Fin SF,coef_Gr Liv Area,coef_Bsmt Full Bath,coef_Bsmt Half Bath,coef_Full Bath,coef_Half Bath,coef_Bedroom AbvGr,coef_Kitchen AbvGr,coef_Kitchen Qual,coef_TotRms AbvGrd,coef_Functional,coef_Fireplaces,coef_Fireplace Qu,coef_Garage Type,coef_Garage Yr Blt,coef_Garage Finish,coef_Garage Cars,coef_Garage Area,coef_Garage Qual,coef_Garage Cond,coef_Paved Drive,coef_Wood Deck SF,coef_Open Porch SF,coef_Enclosed Porch,coef_3Ssn Porch,coef_Screen Porch,coef_Pool Area,coef_Pool QC,coef_Fence,coef_Misc Feature,coef_Misc Val,coef_Mo Sold,coef_Yr Sold,coef_Sale Type,coef_Sale Condition,coef_MS SubClass_2,coef_MS Zoning_2,coef_Lot Frontage_2,coef_Lot Area_2,coef_Street_2,coef_Alley_2,coef_Lot Shape_2,coef_Land Contour_2,coef_Utilities_2,coef_Lot Config_2,coef_Land Slope_2,coef_Neighborhood_2,coef_Condition 1_2,coef_Condition 2_2,coef_Bldg Type_2,coef_House Style_2,coef_Overall Qual_2,coef_Overall Cond_2,coef_Year Built_2,coef_Year Remod/Add_2,coef_Roof Style_2,coef_Roof Matl_2,coef_Exterior 1st_2,coef_Exterior 2nd_2,coef_Mas Vnr Type_2,coef_Mas Vnr Area_2,coef_Exter Qual_2,coef_Exter Cond_2,coef_Foundation_2,coef_Bsmt Qual_2,coef_Bsmt Cond_2,coef_Bsmt Exposure_2,coef_BsmtFin Type 1_2,coef_BsmtFin SF 1_2,coef_BsmtFin Type 2_2,coef_BsmtFin SF 2_2,coef_Bsmt Unf SF_2,coef_Total Bsmt SF_2,coef_Heating_2,coef_Heating QC_2,coef_Central Air_2,coef_Electrical_2,coef_1st Flr SF_2,coef_2nd Flr SF_2,coef_Low Qual Fin SF_2,coef_Gr Liv Area_2,coef_Bsmt Full Bath_2,coef_Bsmt Half Bath_2,coef_Full Bath_2,coef_Half Bath_2,coef_Bedroom AbvGr_2,coef_Kitchen AbvGr_2,coef_Kitchen Qual_2,coef_TotRms AbvGrd_2,coef_Functional_2,coef_Fireplaces_2,coef_Fireplace Qu_2,coef_Garage Type_2,coef_Garage Yr Blt_2,coef_Garage Finish_2,coef_Garage Cars_2,coef_Garage Area_2,coef_Garage Qual_2,coef_Garage Cond_2,coef_Paved Drive_2,coef_Wood Deck SF_2,coef_Open Porch SF_2,coef_Enclosed Porch_2,coef_3Ssn Porch_2,coef_Screen Porch_2,coef_Pool Area_2,coef_Pool QC_2,coef_Fence_2,coef_Misc Feature_2,coef_Misc Val_2,coef_Mo Sold_2,coef_Yr Sold_2,coef_Sale Type_2,coef_Sale Condition_2,intercept,MSE_train,MSE_test,MSE_reduction_ratio
0,Lasso,5.0,-129.096329,-3381.508762,33.897681,0.378656,12006.857528,1773.737877,-427.939799,1435.067699,2419.185652,248.571051,-4499.01542,161.171232,-18.985918,3167.925479,-620.618564,-9824.430515,11565.960015,4161.712532,-37.635419,-6.422364,1654.237535,-193.337284,-459.228837,15.47976,4735.840039,43.303614,12656.96071,-1057.105255,4225.384087,9706.655054,-10779.045244,2370.358729,513.666464,29.9773,-74.637165,23.166834,14.979953,-2.175342,-2523.033815,2389.133569,-651.138267,-1208.386859,48.727911,59.085258,31.115817,-8.184609,3326.348761,-3506.0247,5161.737942,3722.501662,-5167.529966,-15591.490263,8980.64487,2872.014362,2049.255462,2545.047516,373.903891,936.849904,-98.764782,2183.022033,9739.363433,11.871922,-0.27789,-2633.112666,1426.19066,13.605374,-9.174842,-1.557694,-19.517561,44.566957,-250.554522,21594.047695,-288.426621,4138.765362,-5.344538,-111.437712,-7.372428,725.096981,-2022.575834,157.666615,1699.348512,-6.084111,-0.261714,-4740.083131,-3292.251646,1322.071808,-1436.230435,-1600.961662,-488.588394,7538.234085,-319.477633,-1095.78804,3876.287975,-258.523584,14519.922763,-12454.519638,-4835.576095,31.013613,2.203978,-3303.130317,-2902.215729,1144.507619,-444.255567,-2344.856688,-30.871403,-10947.969884,-65.61774,-4076.352414,-8664.203657,8222.309001,-2871.09335,-1848.250419,-13.48015,1224.858398,-16.829471,-6.696366,0.509561,2471.59787,-2694.874758,3711.94611,1515.618241,-54.755889,-68.029208,-36.923199,9.619379,-3914.234892,3899.595066,-5575.86587,-4021.882943,2848.298068,10588.232972,-9201.97149,-1254.686259,-2418.433564,-1629.982523,-1517.529337,-665.512671,99.858815,-1088.150523,-6786.334729,-19.510528,-678.37616,2904.725924,-1352.986253,-16.528609,28.759308,-0.563511,-13.555538,-46.324356,102.696685,-12385.630952,-153.353397,-8454.194363,12.033208,35.443705,9.383066,-476.490795,3387.200112,0.0,1716168000.0,1723975000.0,0.45492
1,Lasso,2.0,-128.442279,-3370.127547,34.011989,0.378325,12846.062426,1829.010908,-427.384688,1454.280056,2190.627004,247.558295,-4641.022143,161.458676,-20.631128,3202.199788,-631.592344,-9952.653397,11551.581829,4178.499804,-37.557892,-6.419119,1660.4174,-214.895574,-459.777859,16.939582,4734.598607,43.268879,12655.097434,-1084.262445,4221.329556,9715.419331,-10808.647395,2369.236754,513.262114,30.003864,-79.930328,23.251682,15.030923,-2.18974,-2628.5173,2384.782237,-768.826518,-1233.959573,48.664823,59.12016,31.159627,-8.194308,3341.682728,-3573.01314,5201.639807,3758.250612,-5176.660232,-15769.929096,8981.647662,2894.295325,2058.769574,2581.88362,365.149837,940.482919,-98.511768,2179.898128,9771.669668,11.819746,-0.0,-2643.576437,1440.438434,13.573714,-9.223859,-1.521537,-19.522626,44.518666,-252.442647,21800.60145,-291.971254,4273.73512,-5.362516,-110.772565,-7.236981,723.811548,-2037.018052,157.486136,1692.464663,-6.203303,-0.261526,-5577.485467,-3342.011966,1323.388813,-1445.040836,-1406.119355,-490.817095,7639.020356,-319.885093,-1097.844237,3985.64319,-264.894014,14656.948569,-12439.448185,-4843.124621,30.855691,2.188904,-3309.158618,-2905.693027,1146.322518,-445.889721,-2344.248247,-30.840066,-10954.768633,-106.81647,-4071.803106,-8666.165446,8249.962247,-2870.313099,-1847.315616,-13.545985,1235.603318,-16.940847,-6.786019,0.526765,2573.623186,-2689.388657,3833.284605,1541.922841,-54.650607,-68.075064,-36.893687,9.630476,-3932.038693,3952.963419,-5612.297687,-4065.753784,2856.000076,10784.366654,-9196.833054,-1280.005442,-2424.187863,-1670.813698,-1508.418693,-670.379998,99.706906,-1084.739127,-6824.048548,-19.443915,-773.572593,3007.683556,-1366.048832,-16.51866,28.793271,-0.633567,-13.45331,-46.284097,103.943539,-12542.367863,-154.3086,-8612.272278,12.04736,36.803899,9.27588,-476.18584,3389.340575,0.0,1716165000.0,1723974000.0,0.455071
2,Lasso,1.0,-128.226871,-3366.289761,34.050367,0.378213,13126.531123,1847.422765,-427.18228,1460.753826,2107.37137,247.212305,-4688.606351,161.552111,-21.196089,3214.200303,-635.219377,-9994.579291,11547.080212,4184.377224,-37.520826,-6.41833,1662.483556,-222.095931,-459.965602,17.430634,4734.193275,43.257155,12654.064793,-1093.672114,4219.875929,9718.370684,-10818.51113,2368.855082,513.095812,30.012259,-81.700098,23.279479,15.047358,-2.19446,-2663.702824,2383.295418,-808.481666,-1242.203457,48.644229,59.1311,31.173981,-8.197445,3346.891123,-3595.41066,5214.904531,3770.095595,-5179.664088,-15828.954718,8981.921526,2901.680598,2061.929911,2594.160151,362.217054,941.704969,-98.426341,2178.84814,9782.367679,11.802277,-0.0,-2647.001759,1445.091366,13.562899,-9.240577,-1.508954,-19.524327,44.502258,-253.069576,21869.192704,-293.139395,4318.731731,-5.368529,-110.544305,-7.191596,723.363932,-2041.875991,157.427195,1690.141978,-6.243422,-0.261464,-5856.636911,-3358.43725,1323.815683,-1448.00637,-1333.201264,-491.561376,7672.80108,-320.019577,-1098.508393,4021.938129,-267.010941,14702.144043,-12434.646496,-4845.884795,30.788683,2.184059,-3311.165079,-2906.836072,1146.92546,-446.430148,-2344.037615,-30.829536,-10956.738719,-120.253407,-4070.129476,-8666.77639,8259.180809,-2870.042066,-1846.972168,-13.567704,1239.207084,-16.977817,-6.815574,0.532448,2607.770805,-2687.489795,3874.048166,1550.701978,-54.615855,-68.09015,-36.884009,9.634148,-3938.031046,3970.806965,-5624.339918,-4080.24552,2858.488385,10849.343176,-9195.072004,-1288.358322,-2426.061449,-1684.363146,-1505.384823,-672.011339,99.654093,-1083.588499,-6836.543353,-19.421742,-805.345145,3042.064529,-1370.313897,-16.515219,28.804497,-0.657439,-13.419188,-46.270697,104.358752,-12594.579629,-154.631947,-8664.919261,12.052074,37.25633,9.240041,-476.072162,3390.076541,0.0,1716170000.0,1723980000.0,0.455116
3,Lasso,0.1,-128.046352,-3362.490533,34.085953,0.378104,13379.083225,1863.453787,-426.957372,1466.225885,2028.994138,246.845497,-4731.240291,161.652803,-21.62535,3226.143077,-638.116266,-10032.907773,11543.238182,4189.961025,-37.482865,-6.417775,1664.188532,-228.582453,-460.227775,17.973188,4733.853187,43.246654,12652.643986,-1102.127352,4218.288418,9721.599232,-10827.833829,2368.444328,512.892137,30.018313,-83.339492,23.302543,15.061183,-2.198538,-2695.92519,2382.169673,-845.220318,-1249.213823,48.626364,59.141387,31.189993,-8.200336,3351.859922,-3615.181092,5226.460717,3780.9543,-5182.162891,-15883.290208,8982.4608,2908.447269,2064.814309,2605.223514,359.555986,942.921317,-98.350098,2177.608028,9791.657523,11.783845,26.86672,-2675.660607,1449.651734,13.554437,-9.257753,-1.496162,-19.525715,44.487561,-253.633946,21931.052648,-294.162025,4359.695887,-5.373993,-110.368505,-7.151035,722.83426,-2046.708034,157.375725,1688.118805,-6.279122,-0.261406,-6107.14169,-3373.030384,1324.185886,-1450.691029,-1263.079259,-492.229651,7703.293501,-320.139686,-1099.096908,4054.423442,-268.938244,14742.333745,-12430.496708,-4848.535163,30.720763,2.179809,-3312.967167,-2907.854332,1147.467709,-446.911709,-2343.847185,-30.820067,-10958.304643,-132.16005,-4068.533546,-8667.319469,8267.485321,-2869.792869,-1846.64606,-13.587075,1242.458107,-17.010937,-6.841941,0.537528,2638.527049,-2685.752958,3910.921391,1558.587024,-54.584835,-68.103452,-36.875435,9.637414,-3943.458549,3986.891495,-5635.111602,-4093.192226,2860.679129,10907.509053,-9193.428728,-1295.815455,-2427.715935,-1696.514706,-1502.650934,-673.485514,99.605291,-1082.552929,-6847.713435,-19.401908,-833.961475,3073.045874,-1374.091006,-16.512047,28.814641,-0.679191,-13.388511,-46.258622,104.731931,-12641.541551,-154.92874,-8712.260398,12.056325,37.66125,9.207685,-475.968669,3390.744467,0.0,1716177000.0,1723989000.0,0.455177
4,Ridge,5.0,-118.618713,-2330.225184,33.053368,0.394281,9621.50372,658.781867,-287.663778,888.020142,21069.990118,115.371466,-4279.272909,202.707901,-421.370149,3056.613288,-1303.055095,-7090.080071,10993.389717,5403.261501,241.021687,-9.544092,2096.531282,-75.577508,-340.832306,-50.842823,5001.516085,42.675673,11846.425339,-1211.219253,1567.530053,7680.468659,-9362.417904,2195.341635,347.441721,12.039574,-118.277943,5.839282,-2.697818,14.750146,-1973.38549,1861.150112,-3102.292421,-1968.108764,14.239139,21.519543,-5.134702,31.429579,3815.579019,-2909.861046,2496.961863,1220.54612,-5569.994855,-15200.571302,8548.953551,3128.734065,2015.435242,3210.30323,369.174902,1367.354905,-41.790085,1499.423718,8850.593059,8.988576,-705.040464,-2074.056612,-602.952986,11.175903,-7.989748,14.167224,-14.832098,51.412043,-251.55012,20204.985745,-341.798525,5218.371556,-5.457178,-251.20673,-1626.71452,782.14204,-1172.573496,155.082586,1386.035234,-5.394985,-0.269994,-3133.574468,-2359.286833,1203.939384,-1122.201604,-22046.792569,-469.452579,7688.265284,-336.613832,-859.312215,3994.806193,125.657024,12918.453553,-12120.792112,-5440.150275,-112.093236,-22.695315,-3475.908799,-3108.283822,1122.268295,-412.453968,-2511.535075,-31.141274,-10357.214169,-353.718436,-2323.282826,-7576.8198,7642.502509,-2759.679798,-1712.109461,-4.700429,1116.445945,-7.386041,2.492724,-8.228185,2038.627,-2443.357054,5059.338257,2371.945912,-13.238633,-25.237465,4.717879,-34.71261,-3864.018292,3841.105497,-3811.869429,-2270.260812,2915.588939,10111.646312,-8518.672064,-1262.416397,-2441.694312,-2197.8756,-1499.368672,-919.683378,44.411104,-674.684903,-6394.584583,-17.444199,-224.358156,2830.33105,-388.303376,-15.769541,28.260435,-8.901714,-14.333423,-51.465699,102.744501,-11965.428639,-79.073643,-9479.273466,12.357206,48.205308,1525.800994,-524.394567,2805.037891,0.0,1694152000.0,1702334000.0,0.482971


In [409]:
LR_coef_f[160]

-0.0

In [423]:
# form a function to calculate prediciton:
def pred_cal(X_in,coef,y_true):
    y_pred =0
    for i in range(len(X_in)):
        y_pred = y_pred + X_in[i]*coef[i]
    y_pred = y_pred+coef[len(coef)-1]
    print("prediction price difference:", y_pred)
    print("true price difference: ", y_true)
    return y_pred

In [432]:
X_in = list(X_2house.iloc[135])
y_true = y_2house[135]

In [433]:
tmp = zip(X_2house.columns, X_in, range(158))
for ind,item,id in tmp:
    print(ind,item, id)

MS SubClass 20.0 0
MS Zoning 4.0 1
Lot Frontage 141.0 2
Lot Area 31770.0 3
Street 1.0 4
Alley 1.0 5
Lot Shape 0.0 6
Land Contour 3.0 7
Utilities 4.0 8
Lot Config 0.0 9
Land Slope 2.0 10
Neighborhood 15.0 11
Condition 1 2.0 12
Condition 2 2.0 13
Bldg Type 0.0 14
House Style 1.0 15
Overall Qual 6.0 16
Overall Cond 5.0 17
Year Built 1960.0 18
Year Remod/Add 1960.0 19
Roof Style 3.0 20
Roof Matl 1.0 21
Exterior 1st 3.0 22
Exterior 2nd 10.0 23
Mas Vnr Type 4.0 24
Mas Vnr Area 112.0 25
Exter Qual 3.0 26
Exter Cond 3.0 27
Foundation 1.0 28
Bsmt Qual 3.0 29
Bsmt Cond 4.0 30
Bsmt Exposure 4.0 31
BsmtFin Type 1 4.0 32
BsmtFin SF 1 639.0 33
BsmtFin Type 2 1.0 34
BsmtFin SF 2 0.0 35
Bsmt Unf SF 441.0 36
Total Bsmt SF 1080.0 37
Heating 1.0 38
Heating QC 2.0 39
Central Air 1.0 40
Electrical 5.0 41
1st Flr SF 1656.0 42
2nd Flr SF 0.0 43
Low Qual Fin SF 0.0 44
Gr Liv Area 1656.0 45
Bsmt Full Bath 1.0 46
Bsmt Half Bath 0.0 47
Full Bath 1.0 48
Half Bath 0.0 49
Bedroom AbvGr 3.0 50
Kitchen AbvGr 1.0 51
K

In [434]:
# change specific attribute " Kitchen AbvGr"
X_in[129]=2

In [435]:
pred_cal(X_in,LR_coef_f[2:160],y_true)

prediction price difference: 47190.5485204
true price difference:  -35000.0


47190.548520423821