In [17]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb

%matplotlib inline
pd.pandas.set_option('display.max_columns',None)


In [18]:
x_train = pd.read_csv('TrainFinal.csv')
y_train = x_train.pop('SalePrice')
y_train.shape

(1460,)

In [19]:
x_train.shape

(1460, 77)

In [20]:
x_test = pd.read_csv('TestFinal.csv')
x_test.shape

(1459, 77)

### Dropping correlation over %85

In [21]:
corr = x_train.corr().abs()

#get the upper triangle of the corr matrix
up = corr.where(np.triu(np.ones(corr.shape), k=1).astype(bool))

In [22]:
to_drop = [column for column in up.columns if any(up[column] > 0.85)]
x_train = x_train.drop(columns=to_drop)
x_test = x_test.drop(columns=to_drop)

In [23]:
ridge = linear_model.RidgeCV(alphas = [0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1,0.5, 1,10])
fit = ridge.fit(x_train,y_train)

In [24]:
fit.score(x_train,y_train)

0.8427785084401735

In [25]:
ridge.alpha_

10.0

In [26]:
x_train.corr()

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SaleType,SaleCondition,LotFrontageNaN,MasVnrAreaNaN
MSSubClass,1.000000,0.035900,-0.477451,-0.462668,-0.024969,0.119289,-0.002940,-0.022844,0.075910,-0.025672,-0.005985,-0.024762,-0.042395,0.746063,0.397161,0.032628,-0.059316,0.027850,0.040581,-0.117817,-0.031336,-0.089159,-0.032667,0.031451,-0.017161,-0.003254,0.058126,-0.036780,-0.009547,-0.060598,0.017568,-0.035303,0.042193,-0.057576,-0.085537,-0.036472,0.048009,0.020760,-0.101774,0.052275,-0.288607,0.366095,0.046474,0.081717,0.003491,-0.002333,0.131608,0.177354,-0.023438,0.281721,0.010129,0.040380,0.003711,-0.045569,0.058036,0.133138,0.039567,0.033077,-0.040110,-0.087366,0.037170,0.037108,-0.059925,0.028930,0.008958,-0.028258,-0.043825,-0.030167,0.008283,-0.007683,-0.013585,-0.021407,0.012464,-0.024940,-0.015323,-0.016496
MSZoning,0.035900,1.000000,-0.132040,-0.098816,0.087654,0.061887,-0.017854,-0.001192,-0.009895,-0.022055,-0.249679,-0.027874,0.044606,0.005690,-0.105315,-0.160099,0.186951,-0.308908,-0.174728,-0.000301,0.005133,-0.008558,-0.048567,-0.038066,0.200536,-0.096041,-0.235174,0.125629,-0.007027,0.038115,0.025322,-0.047744,-0.028815,0.031549,-0.048487,-0.021759,0.056866,0.134279,-0.049523,-0.070812,-0.061582,-0.015364,0.011520,-0.103578,-0.018526,0.007193,-0.198290,-0.133876,-0.016471,0.049434,0.128976,-0.043200,-0.095722,-0.011349,0.017208,0.141142,-0.258899,0.167695,-0.157042,-0.071456,-0.117099,-0.050433,-0.100366,-0.005498,-0.173157,0.133280,0.000362,0.010967,-0.003128,0.009293,-0.031496,-0.020628,0.097437,0.009494,-0.021144,-0.091493
LotFrontage,-0.477451,-0.132040,1.000000,0.651668,-0.034356,-0.151675,-0.051884,0.001915,-0.166673,0.058155,0.106799,0.000119,0.002322,-0.532248,-0.019644,0.205176,-0.031833,0.085842,0.059485,0.163835,0.066297,0.101823,-0.023758,0.099148,-0.161846,0.037082,0.091009,-0.132947,0.044239,-0.104333,-0.007224,0.043663,-0.026489,0.033141,0.075821,0.034572,-0.017425,-0.076630,0.051731,0.036991,0.422193,-0.072198,0.025208,0.320276,0.065078,-0.003049,0.163144,0.015019,0.267532,0.014703,-0.163512,0.319849,0.039373,0.221889,-0.183275,-0.270461,0.066688,-0.216567,0.272626,0.129252,-0.008148,-0.021481,0.077204,0.046377,0.118966,-0.027986,0.063210,0.043017,0.107654,0.009883,0.020291,0.003997,-0.024021,0.064097,0.033968,0.010114
LotArea,-0.462668,-0.098816,0.651668,1.000000,-0.105723,-0.285425,-0.104666,0.023403,-0.187106,0.255881,0.104687,0.058578,0.028729,-0.564935,-0.045572,0.178215,-0.006305,0.021937,0.027670,0.142942,0.173382,0.085403,-0.002401,0.048867,-0.107621,-0.003444,0.019089,-0.097626,-0.008216,-0.143304,-0.028451,0.063461,-0.062066,0.082101,0.037664,0.036111,0.000072,-0.038262,0.057671,0.030360,0.467465,-0.025310,0.012810,0.385435,0.138273,0.045218,0.179187,0.038717,0.279176,0.001678,-0.115271,0.360129,-0.010965,0.327754,-0.231278,-0.256931,0.026772,-0.190834,0.272007,0.155500,-0.035916,-0.040259,0.023293,0.101503,0.115973,-0.020172,0.055700,0.084741,0.091791,0.047791,0.010521,-0.027164,-0.002437,0.053602,0.128188,0.019996
Street,-0.024969,0.087654,-0.034356,-0.105723,1.000000,-0.010224,0.115995,0.001682,0.013960,-0.179360,-0.011561,-0.071657,0.002039,-0.018243,0.023704,0.058823,0.042848,0.021137,0.065465,-0.019732,0.008081,0.002505,0.010775,0.009646,0.049976,0.005874,0.035277,-0.025811,-0.014963,0.072232,-0.012054,0.001323,0.061663,-0.044297,0.060317,-0.009464,0.007904,-0.053995,0.069869,0.021355,0.004719,0.035527,0.007724,0.051486,-0.050524,0.015485,0.046471,0.027628,0.028865,0.013583,-0.025307,0.046828,-0.016444,-0.005348,-0.024278,-0.017098,0.013046,-0.015529,-0.020025,0.027894,-0.023734,-0.023321,0.024521,0.017993,0.006923,0.026176,0.007473,-0.021289,0.004413,-0.022733,0.003690,-0.025043,0.014339,0.006064,0.001804,0.004768
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YrSold,-0.021407,-0.020628,0.003997,-0.027164,-0.025043,0.036449,0.020507,0.023353,-0.005992,-0.002305,0.036697,-0.009819,-0.021495,0.002006,-0.018005,-0.027347,0.043950,-0.013618,0.035743,0.019385,-0.029057,-0.040181,0.061692,-0.003061,-0.036300,-0.033619,0.009058,0.029369,0.081336,0.059532,-0.036549,0.036881,-0.024237,0.026375,-0.001030,-0.029672,0.011830,0.008846,-0.009420,0.034686,-0.019278,-0.012868,-0.028921,-0.030970,0.067049,-0.046524,-0.019669,-0.010269,-0.036014,0.031687,-0.009706,-0.034516,-0.015506,-0.024096,0.044449,0.008711,-0.009877,-0.001467,-0.039117,-0.012304,0.041003,0.028678,0.006556,0.020181,-0.066303,0.003330,0.018645,0.023325,-0.059689,0.004906,-0.145721,1.000000,-0.002327,0.003880,0.021230,0.003312
SaleType,0.012464,0.097437,-0.024021,-0.002437,0.014339,-0.000911,-0.025754,-0.126770,0.014325,0.054858,-0.036956,-0.002338,0.004848,-0.040306,0.048582,-0.060472,0.095267,-0.052425,0.019647,-0.032945,0.011475,0.011757,-0.026523,-0.060754,0.087012,0.008787,0.016484,0.065410,0.033285,0.000229,-0.046762,0.043374,-0.011938,0.005265,-0.006340,-0.020576,0.011687,-0.016559,-0.004052,0.009457,-0.103304,0.056664,0.014445,-0.048560,0.029290,0.001677,-0.035458,0.031810,0.034123,-0.009790,0.038590,-0.041252,-0.008221,0.014284,0.033210,0.047804,-0.044872,0.050993,-0.068295,-0.035977,-0.015420,-0.002149,-0.039365,0.029471,-0.010585,-0.005379,-0.009034,0.003905,0.011009,0.015773,-0.047386,-0.002327,1.000000,0.184067,0.031351,-0.012583
SaleCondition,-0.024940,0.009494,0.064097,0.053602,0.006064,-0.038118,0.033809,-0.089701,0.051579,-0.043095,0.021867,0.057747,0.045074,-0.003530,0.022753,0.193703,0.017758,0.201044,0.221687,0.065946,-0.045475,0.062552,0.082296,0.078788,-0.188571,0.054232,0.140358,-0.180480,0.016773,-0.072672,0.005995,-0.007773,0.031894,-0.052660,0.042813,0.025630,0.006669,-0.169886,0.071166,0.102263,0.077189,0.016130,-0.070760,0.096716,0.011386,-0.059264,0.143864,0.072135,-0.017358,-0.066113,-0.134391,0.070660,0.022892,0.105618,-0.137252,-0.138910,0.212272,-0.182548,0.184943,0.132618,-0.002232,-0.034137,0.071233,0.062501,0.134686,-0.091809,0.004379,-0.023881,-0.067251,0.013027,0.013320,0.003880,0.184067,1.000000,-0.022113,0.023908
LotFrontageNaN,-0.015323,-0.021144,0.033968,0.128188,0.001804,-0.331060,0.024469,0.056376,-0.186771,0.109453,-0.020312,0.047167,0.033729,-0.026533,0.073258,-0.035953,0.025767,0.049031,-0.033815,-0.030633,0.040374,-0.055971,-0.130542,0.064344,0.047066,-0.019589,0.020575,0.065585,0.047195,-0.026713,-0.106313,0.102285,-0.043805,0.057968,-0.037051,0.006335,-0.026753,0.061418,0.071624,0.034334,0.030115,-0.010013,-0.034407,0.022592,0.061651,0.030791,-0.007660,0.056477,0.007899,-0.049354,0.064840,-0.023297,0.007190,0.136967,-0.019798,-0.101848,0.016401,-0.059503,0.007956,0.057314,0.038219,-0.002811,0.051481,0.048737,0.009781,-0.040419,0.020857,0.002776,0.001047,0.078148,-0.016172,0.021230,0.031351,-0.022113,1.000000,0.014107


In [27]:
model = linear_model.Ridge(alpha=10.0).fit(x_train,y_train)

In [28]:
predict = model.predict(x_test)

In [29]:
predict.shape

(1459,)

In [30]:
predict[predict<0] = 0 

In [31]:
predict

array([104351.73028909, 154809.38548955, 172285.38490133, ...,
       159974.23654251, 134868.43452593, 253582.39949811])

In [32]:
predict=pd.DataFrame(predict)
sub_df=pd.read_csv('sample_submission.csv')
datasets=pd.concat([sub_df['Id'],predict],axis=1)
datasets.columns=['Id','SalePrice']
datasets.to_csv('submission.csv',index=False)


In [37]:
lr = linear_model.LinearRegression()
SGD = linear_model.SGDRegressor()
foo = [model,lr,SGD]

for i in foo:
    i.fit(x_train,y_train)
    cv_score = cross_val_score(i,x_train,y_train,cv=10)
    print(str(i),cv_score.mean())

Ridge(alpha=10.0) 0.8275171567697962
LinearRegression() -2.603137749249716e+23
SGDRegressor() 0.8225952208882845


In [34]:
p = SGD.predict(x_test)

In [35]:
p = pd.DataFrame(p)
d1 = pd.concat([sub_df['Id'],p],axis=1)
d1.columns=['Id','SalePrice']
d1.to_csv('submissionRF.csv',index=False)
