In [1]:
#======================= IMPORT PACKAGES ============================

import pandas as pd
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')
from sklearn import preprocessing

In [2]:
#======================= DATA SELECTION =========================

print("=======================================")
print("---------- Data Selection -------------")
print("=======================================")
data=pd.read_csv('train.csv')
print(data.head(10))
print()

---------- Data Selection -------------
     Id  MSSubClass MSZoning  LotFrontage  LotArea Street Alley LotShape  \
0   127         120       RL          NaN     4928   Pave   NaN      IR1   
1   889          20       RL         95.0    15865   Pave   NaN      IR1   
2   793          60       RL         92.0     9920   Pave   NaN      IR1   
3   110          20       RL        105.0    11751   Pave   NaN      IR1   
4   422          20       RL          NaN    16635   Pave   NaN      IR1   
5  1197          60       RL         58.0    14054   Pave   NaN      IR1   
6   561          20       RL          NaN    11341   Pave   NaN      IR1   
7  1041          20       RL         88.0    13125   Pave   NaN      Reg   
8   503          20       RL         70.0     9170   Pave   NaN      Reg   
9   576          50       RL         80.0     8480   Pave   NaN      Reg   

  LandContour Utilities  ... PoolArea PoolQC  Fence MiscFeature MiscVal  \
0         Lvl    AllPub  ...        0    NaN    

In [3]:
#==================== PREPROCESSING =======================================

#checking missing values

print("=====================================================")
print("--------- Before Checking missing values ------------")
print("=====================================================")
print(data.isnull().sum())
print()


print("=====================================================")
print("--------- After Checking missing values ------------")
print("=====================================================")
data=data.fillna(0)
print(data.isnull().sum())
print()


--------- Before Checking missing values ------------
Id                 0
MSSubClass         0
MSZoning           0
LotFrontage      214
LotArea            0
                ... 
MoSold             0
YrSold             0
SaleType           0
SaleCondition      0
SalePrice          0
Length: 81, dtype: int64

--------- After Checking missing values ------------
Id               0
MSSubClass       0
MSZoning         0
LotFrontage      0
LotArea          0
                ..
MoSold           0
YrSold           0
SaleType         0
SaleCondition    0
SalePrice        0
Length: 81, dtype: int64



In [4]:
#==== LABEL ENCODING ====

label_encoder = preprocessing.LabelEncoder() 
print("------------------------------------------------------")
print(" Before label encoding ")
print("------------------------------------------------------")
print()
print(data['SaleCondition'].head(10))

print("------------------------------------------------------")
print("After label encoding ")
print("------------------------------------------------------")
print()

data= data.astype(str).apply(label_encoder.fit_transform)

print(data['SaleCondition'].head(10))

------------------------------------------------------
 Before label encoding 
------------------------------------------------------

0     Normal
1     Normal
2     Normal
3     Normal
4     Normal
5    Partial
6     Normal
7     Normal
8     Normal
9    Abnorml
Name: SaleCondition, dtype: object
------------------------------------------------------
After label encoding 
------------------------------------------------------

0    4
1    4
2    4
3    4
4    4
5    5
6    4
7    4
8    4
9    0
Name: SaleCondition, dtype: int32


In [5]:
#========================= DATA SPLITTING ============================

#=== TEST AND TRAIN ===

x=data.drop('SalePrice',axis=1)
y=data['SalePrice']
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=2)

print("-----------------------------------------------------------")
print("======================= Data splitting ====================")
print("-----------------------------------------------------------")
print()
print("Total No Of data          :",data.shape[0])
print()
print("Total No of Training data :",X_train.shape[0])
print()
print("Total No of Testing data :",X_test.shape[0])
print()


-----------------------------------------------------------
-----------------------------------------------------------

Total No Of data          : 1168

Total No of Training data : 817

Total No of Testing data : 351



In [6]:
#========================= CLASSIFICATION ============================

from sklearn.linear_model import Ridge
from sklearn import metrics

#=== ridge regression ===

#initialize the model
ridgeR = Ridge(alpha = 1)

#fitting the model
ridgeR.fit(X_train, y_train)

#predict the model
y_pred = ridgeR.predict(X_test)


print("-----------------------------------------------------------")
print("======================= RIDGE REGRESSION ==================")
print("-----------------------------------------------------------")
print()


mae_ridge=metrics.mean_absolute_error(y_test, y_pred)

print("1.Mean Absolute Error       : ",mae_ridge)
print()
mse_ridge=metrics.mean_squared_error(y_test, y_pred)/1000
print("2.Mean Squared Error       : ",mae_ridge)
print()
import numpy as np
rmse_rid=np.sqrt(mse_ridge)
print("3.Root Mean Squared Error  : ",rmse_rid)


-----------------------------------------------------------
-----------------------------------------------------------

1.Mean Absolute Error       :  79.83190388942151

2.Mean Squared Error       :  79.83190388942151

3.Root Mean Squared Error  :  3.584070103425709


In [7]:
#=== lasso regression ===

from sklearn.linear_model import Lasso

#initialize the model
lasso = Lasso(alpha = 1)

#fitting the model
lasso.fit(X_train, y_train)

#predict the model
y_pred = lasso.predict(X_test)


print("-----------------------------------------------------------")
print("======================= LASSO REGRESSION ==================")
print("-----------------------------------------------------------")
print()


mae_lasso=metrics.mean_absolute_error(y_test, y_pred)

print("1.Mean Absolute Error       : ",mae_lasso)
print()
mse_lasso=metrics.mean_squared_error(y_test, y_pred)/1000
print("2.Mean Squared Error       : ",mse_lasso)
print()
import numpy as np
rmse_las=np.sqrt(mse_lasso)
print("3.Root Mean Squared Error  : ",rmse_las)

-----------------------------------------------------------
-----------------------------------------------------------

1.Mean Absolute Error       :  81.17642068315187

2.Mean Squared Error       :  13.135842128655247

3.Root Mean Squared Error  :  3.6243402335673793


In [8]:
#========================= PREDICTION ============================

print("-----------------------------------------------------------")
print("======================= PREDICTION ========================")
print("-----------------------------------------------------------")
print()

for i in range(0,10):
    Results=y_pred[i]
    print("------------------------------------------")
    print()
    print([i],"The predicted house price is ", Results)
    print()


-----------------------------------------------------------
-----------------------------------------------------------

------------------------------------------

[0] The predicted house price is  446.2012200368489

------------------------------------------

[1] The predicted house price is  170.02204911746094

------------------------------------------

[2] The predicted house price is  147.01617944576765

------------------------------------------

[3] The predicted house price is  265.1751409161578

------------------------------------------

[4] The predicted house price is  249.0156129133469

------------------------------------------

[5] The predicted house price is  93.18215427129098

------------------------------------------

[6] The predicted house price is  73.28736011860383

------------------------------------------

[7] The predicted house price is  199.06645757730087

------------------------------------------

[8] The predicted house price is  156.14699240520633

--