In [72]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import missingno as msno
from typing import Optional
    
from lightgbm import LGBMRegressor
from lightgbm import plot_importance

from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from prophet import Prophet

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge

from sklearn.model_selection import train_test_split
from sklearn.model_selection import TimeSeriesSplit
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import mean_squared_error 
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import make_scorer

from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

from sklearn.cluster import KMeans

In [2]:
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

In [86]:
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
sample_submission = pd.read_csv("sample_submission.csv")

In [18]:
# Verisetinin değişkenlerini tespit etme                                                                                     
def grab_col_names(df, cat_th=10, car_th=20):

    # Categorical Columns
    cat_cols = [col for col in df.columns if df[col].dtypes == "O"]
    num_but_cat = [col for col in df.columns if df[col].nunique() < cat_th and df[col].dtypes != "O"]
    cat_but_car = [col for col in df.columns if df[col].nunique() > car_th and df[col].dtypes == "O"]
    cat_cols = cat_cols + num_but_cat
    cat_cols = [col for col in cat_cols if col not in cat_but_car]

    # Numerical Columns
    num_cols = [col for col in df.columns if df[col].dtypes != "O"]
    num_cols = [col for col in num_cols if col not in num_but_cat]

    # Results
    print(f"Observations: {df.shape[0]}")
    print(f"Variables: {df.shape[1]}")
    print(f'cat_cols: {len(cat_cols)}')
    print(f'num_cols: {len(num_cols)}')
    print(f'cat_but_car: {len(cat_but_car)}')
    print(f'num_but_cat: {len(num_but_cat)}')

    return cat_cols, cat_but_car, num_cols    

In [87]:
# Kategorik değişken analizi                              
def categorical_value_counts(df, col, target, rare: Optional[float]=None):   
    temp = df.groupby(col).agg(Count = (col, lambda x:x.count()), \
                          Ratio = (col, lambda x:x.count() / len(df)), \
                          Target_Ratio = (target, lambda x:x.sum() / df[target].sum())) \
                          .sort_values("Count", ascending = False).reset_index()
    
    if rare is not None:
        rares = temp.loc[temp["Ratio"] <= float(rare), col].tolist()
        df.loc[df[col].isin(rares), col] = "Rare"
        print("---- Done! --- ")
        print(df.groupby(col).agg(Count = (col, lambda x:x.count()), \
                          Ratio = (col, lambda x:x.count() / len(df)), \
                          Target_Ratio = (target, lambda x:x.sum() / df[target].sum())) \
                          .sort_values("Count", ascending = False).reset_index())
    else:
        print(temp)

In [19]:
cat_cols, cat_but_car, num_cols = grab_col_names(train)

Observations: 1460
Variables: 81
cat_cols: 53
num_cols: 27
cat_but_car: 1
num_but_cat: 11


In [53]:
for col in cat_cols:
    categorical_value_counts(train, col, "SalePrice")

          Count     Ratio  Target_Ratio
MSZoning                               
RL         1151  0.788356      0.832296
RM          218  0.149315      0.104250
FV           65  0.044521      0.052664
RH           16  0.010959      0.007969
C (all)      10  0.006849      0.002821 

        Count    Ratio  Target_Ratio
Street                              
Pave     1454  0.99589      0.997043
Grvl        6  0.00411      0.002957 

       Count     Ratio  Target_Ratio
Alley                               
Grvl      50  0.034247      0.023135
Pave      41  0.028082      0.026077 

          Count     Ratio  Target_Ratio
LotShape                               
Reg         925  0.633562      0.576949
IR1         484  0.331507      0.377646
IR2          41  0.028082      0.037226
IR3          10  0.006849      0.008179 

             Count     Ratio  Target_Ratio
LandContour                               
Lvl           1311  0.897945      0.894285
Bnk             63  0.043151      0.034131
HLS 

            Count     Ratio  Target_Ratio
PavedDrive                               
Y            1340  0.917808      0.945774
N              90  0.061644      0.039196
P              30  0.020548      0.015029 

        Count     Ratio  Target_Ratio
PoolQC                               
Gd          3  0.002055      0.002294
Ex          2  0.001370      0.003710
Fa          2  0.001370      0.001632 

       Count     Ratio  Target_Ratio
Fence                               
MnPrv    157  0.107534      0.088413
GdPrv     59  0.040411      0.039966
GdWo      54  0.036986      0.028698
MnWw      11  0.007534      0.005592 

             Count     Ratio  Target_Ratio
MiscFeature                               
Shed            49  0.033562      0.028046
Gar2             2  0.001370      0.001293
Othr             2  0.001370      0.000712
TenC             1  0.000685      0.000946 

          Count     Ratio  Target_Ratio
SaleType                               
WD         1267  0.867808      0

In [89]:
categorical_value_counts(train, "RoofStyle", "SalePrice")

  RoofStyle  Count     Ratio  Target_Ratio
0     Gable   1141  0.781507      0.740742
1       Hip    286  0.195890      0.236987
2      Rare     33  0.022603      0.022272


In [46]:
rares = temp.loc[temp["Ratio"] <= 0.01, "RoofStyle"].tolist()

In [49]:
temp_train.loc[temp_train["RoofStyle"].isin(rares), "RoofStyle"] = "Rare"

In [50]:
col = "RoofStyle"
target = "SalePrice"
temp_train.groupby(col).agg(Count = (col, lambda x:x.count()), \
                          Ratio = (col, lambda x:x.count() / len(temp_train)), \
                          Target_Ratio = (target, lambda x:x.sum() / train[target].sum())) \
                          .sort_values("Count", ascending = False)

Unnamed: 0_level_0,Count,Ratio,Target_Ratio
RoofStyle,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Gable,1141,0.781507,0.740742
Hip,286,0.19589,0.236987
Rare,33,0.022603,0.022272


In [13]:
train["RoofStyle"] = train["RoofStyle"].apply(lambda x: "Rare" if x in ["Shed", "Mansard", "Gambrel", "Flat"] else x)
#train = pd.get_dummies(data=train, columns=["RoofStyle"], drop_first=True)

In [25]:
train.isnull().sum()

Id                  0
MSSubClass          0
MSZoning            0
LotFrontage       259
LotArea             0
Street              0
Alley            1369
LotShape            0
LandContour         0
Utilities           0
LotConfig           0
LandSlope           0
Neighborhood        0
Condition1          0
Condition2          0
BldgType            0
HouseStyle          0
OverallQual         0
OverallCond         0
YearBuilt           0
YearRemodAdd        0
RoofStyle           0
RoofMatl            0
Exterior1st         0
Exterior2nd         0
MasVnrType          8
MasVnrArea          8
ExterQual           0
ExterCond           0
Foundation          0
BsmtQual           37
BsmtCond           37
BsmtExposure       38
BsmtFinType1       37
BsmtFinSF1          0
BsmtFinType2       38
BsmtFinSF2          0
BsmtUnfSF           0
TotalBsmtSF         0
Heating             0
HeatingQC           0
CentralAir          0
Electrical          1
1stFlrSF            0
2ndFlrSF            0
LowQualFin

In [134]:
train.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Id,1460.0,730.5,421.610009,1.0,365.75,730.5,1095.25,1460.0
MSSubClass,1460.0,56.89726,42.300571,20.0,20.0,50.0,70.0,190.0
LotFrontage,1460.0,0.049998,1.048763,-2.285714,-0.428571,0.049998,0.47619,11.619048
LotArea,1460.0,10516.828082,9981.264932,1300.0,7553.5,9478.5,11601.5,215245.0
OverallQual,1460.0,6.099315,1.382997,1.0,5.0,6.0,7.0,10.0
OverallCond,1460.0,5.575342,1.112799,1.0,5.0,5.0,6.0,9.0
YearBuilt,1460.0,1971.267808,30.202904,1872.0,1954.0,1973.0,2000.0,2010.0
YearRemodAdd,1460.0,1984.865753,20.645407,1950.0,1967.0,1994.0,2004.0,2010.0
MasVnrArea,1460.0,103.117123,180.731373,0.0,0.0,0.0,164.25,1600.0
BsmtFinSF1,1460.0,443.639726,456.098091,0.0,0.0,383.5,712.25,5644.0


In [107]:
RS = RobustScaler()
train["LotFrontage"] = RS.fit_transform(train[["LotFrontage"]])
train["LotFrontage"] = train["LotFrontage"].fillna(train["LotFrontage"].mean())

In [125]:
train["MasVnrArea"] = train["MasVnrArea"].fillna(0)

In [123]:
train[train["MasVnrArea"].isnull()]

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
234,235,60,RL,0.049998,7851,Pave,,Reg,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,5,2002,2002,Gable,CompShg,VinylSd,VinylSd,,,Gd,TA,PConc,Gd,TA,No,GLQ,625,Unf,0,235,860,GasA,Ex,Y,SBrkr,860,1100,0,1960,1,0,2,1,4,1,Gd,8,Typ,2,TA,BuiltIn,2002.0,Fin,2,440,TA,TA,Y,288,48,0,0,0,0,,,,0,5,2010,WD,Normal,216500
529,530,20,RL,0.049998,32668,Pave,,IR1,Lvl,AllPub,CulDSac,Gtl,Crawfor,Norm,Norm,1Fam,1Story,6,3,1957,1975,Hip,CompShg,Wd Sdng,Stone,,,Gd,TA,PConc,TA,TA,No,Rec,1219,Unf,0,816,2035,GasA,TA,Y,SBrkr,2515,0,0,2515,1,0,3,0,4,2,TA,9,Maj1,2,TA,Attchd,1975.0,RFn,2,484,TA,TA,Y,0,0,200,0,0,0,,,,0,3,2007,WD,Alloca,200624
650,651,60,FV,-0.190476,8125,Pave,,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,2Story,7,6,2007,2007,Gable,CompShg,CemntBd,CmentBd,,,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,813,813,GasA,Ex,Y,SBrkr,822,843,0,1665,0,0,2,1,3,1,Gd,7,Typ,0,,Attchd,2007.0,RFn,2,562,TA,TA,Y,0,0,0,0,0,0,,,,0,5,2008,WD,Normal,205950
936,937,20,RL,-0.095238,10083,Pave,,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,1Story,7,5,2003,2003,Gable,CompShg,VinylSd,VinylSd,,,Gd,TA,PConc,Gd,TA,No,GLQ,833,Unf,0,343,1176,GasA,Ex,Y,SBrkr,1200,0,0,1200,1,0,2,0,2,1,Gd,5,Typ,0,,Attchd,2003.0,RFn,2,555,TA,TA,Y,0,41,0,0,0,0,,,,0,8,2009,WD,Normal,184900
973,974,20,FV,1.238095,11639,Pave,,Reg,Lvl,AllPub,Corner,Gtl,Somerst,Norm,Norm,1Fam,1Story,7,5,2007,2008,Gable,CompShg,CemntBd,CmentBd,,,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1428,1428,GasA,Ex,Y,SBrkr,1428,0,0,1428,0,0,2,0,3,1,Gd,6,Typ,0,,Attchd,2007.0,Fin,2,480,TA,TA,Y,0,120,0,0,0,0,,,,0,12,2008,New,Partial,182000
977,978,120,FV,-1.619048,4274,Pave,Pave,IR1,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,TwnhsE,1Story,7,5,2006,2007,Gable,CompShg,VinylSd,VinylSd,,,Gd,TA,PConc,Gd,TA,No,GLQ,1106,Unf,0,135,1241,GasA,Ex,Y,SBrkr,1241,0,0,1241,1,0,1,1,1,1,Gd,4,Typ,0,,Attchd,2007.0,Fin,2,569,TA,TA,Y,0,116,0,0,0,0,,,,0,11,2007,New,Partial,199900
1243,1244,20,RL,1.809524,13891,Pave,,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,1Story,10,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,,,Ex,TA,PConc,Ex,Gd,Gd,GLQ,1386,Unf,0,690,2076,GasA,Ex,Y,SBrkr,2076,0,0,2076,1,0,2,1,2,1,Ex,7,Typ,1,Gd,Attchd,2006.0,Fin,3,850,TA,TA,Y,216,229,0,0,0,0,,,,0,9,2006,New,Partial,465000
1278,1279,60,RL,0.285714,9473,Pave,,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,8,5,2002,2002,Gable,CompShg,VinylSd,VinylSd,,,Gd,TA,PConc,Gd,TA,No,GLQ,804,Unf,0,324,1128,GasA,Ex,Y,SBrkr,1128,903,0,2031,1,0,2,1,3,1,Gd,7,Typ,1,Gd,Attchd,2002.0,RFn,2,577,TA,TA,Y,0,211,0,0,0,0,,,,0,3,2008,WD,Normal,237000


In [135]:
y_train = train["SalePrice"]
X_train = train[["MSSubClass", "LotArea", "OverallQual", 
                 "OverallCond", "YearBuilt", "TotalBsmtSF", 
                 "1stFlrSF", "2ndFlrSF", "BsmtFullBath", 
                 "FullBath", "BedroomAbvGr", "KitchenAbvGr", "RoofStyle_Rare",
                 "TotRmsAbvGrd", "GarageCars", "GarageYrBlt", "RoofStyle_Hip",
                  "ScreenPorch", "YrSold", "LotFrontage", "MasVnrArea",
                 "MiscVal", "3SsnPorch", "WoodDeckSF", "YearRemodAdd",
                 "MoSold", "EnclosedPorch", "OpenPorchSF", "GarageArea"]]

In [137]:
CB_model = CatBoostRegressor(verbose=False)
CB_model.fit(X_train, y_train)
print("Train Score:", CB_model.score(X_train, y_train))
y_pred = CB_model.predict(X_train)
print("MAPE:", mean_absolute_percentage_error(y_train, y_pred))
print("MAE:", mean_absolute_error(y_train, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_train, y_pred)))

Train Score: 0.9905205295795204
MAPE: 0.038261803008740936
MAE: 5876.944898870679
RMSE: 7732.077071721227


In [136]:
LGBM_model = LGBMRegressor()
LGBM_model.fit(X_train, y_train)
print("Train Score:", LGBM_model.score(X_train, y_train))
y_pred = LGBM_model.predict(X_train)
print("MAPE:", mean_absolute_percentage_error(y_train, y_pred))
print("MAE:", mean_absolute_error(y_train, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_train, y_pred)))

Train Score: 0.96916813247492
MAPE: 0.04408591533766339
MAE: 7414.635797703631
RMSE: 13944.535495030885


In [114]:
RS = RobustScaler()
test["LotFrontage"] = RS.fit_transform(test[["LotFrontage"]])

In [115]:
test["LotFrontage"] = test["LotFrontage"].fillna(test["LotFrontage"].mean())

In [138]:
test["MasVnrArea"] = test["MasVnrArea"].fillna(0)

In [139]:
test["RoofStyle"] = test["RoofStyle"].apply(lambda x: "Rare" if x in ["Shed", "Mansard", "Gambrel", "Flat"] else x)
test = pd.get_dummies(data=test, columns=["RoofStyle"], drop_first=True)

In [113]:
test.isnull().sum()

Id                  0
MSSubClass          0
MSZoning            4
LotFrontage       227
LotArea             0
Street              0
Alley            1352
LotShape            0
LandContour         0
Utilities           2
LotConfig           0
LandSlope           0
Neighborhood        0
Condition1          0
Condition2          0
BldgType            0
HouseStyle          0
OverallQual         0
OverallCond         0
YearBuilt           0
YearRemodAdd        0
RoofStyle           0
RoofMatl            0
Exterior1st         1
Exterior2nd         1
MasVnrType         16
MasVnrArea         15
ExterQual           0
ExterCond           0
Foundation          0
BsmtQual           44
BsmtCond           45
BsmtExposure       44
BsmtFinType1       42
BsmtFinSF1          1
BsmtFinType2       42
BsmtFinSF2          1
BsmtUnfSF           1
TotalBsmtSF         1
Heating             0
HeatingQC           0
CentralAir          0
Electrical          0
1stFlrSF            0
2ndFlrSF            0
LowQualFin

In [140]:
#y_test = test["SalePrice"]
X_test = test[["MSSubClass", "LotArea", "OverallQual", 
                 "OverallCond", "YearBuilt", "TotalBsmtSF", 
                 "1stFlrSF", "2ndFlrSF", "BsmtFullBath", 
                 "FullBath", "BedroomAbvGr", "KitchenAbvGr", "RoofStyle_Rare",
                 "TotRmsAbvGrd", "GarageCars", "GarageYrBlt", "RoofStyle_Hip",
                  "ScreenPorch", "YrSold", "LotFrontage", "YearRemodAdd",
                 "MiscVal", "3SsnPorch", "WoodDeckSF", "MasVnrArea",
                 "MoSold", "EnclosedPorch", "OpenPorchSF", "GarageArea"]]

In [141]:
y_pred_test = CB_model.predict(X_test)

In [142]:
y_pred_test

array([123533.85380732, 169509.93600543, 172955.07659892, ...,
       186929.00050822, 115899.56014149, 230052.33109944])

In [101]:
ID = test["Id"]

In [143]:
submission = pd.DataFrame({"Id": test["Id"], "SalePrice": y_pred_test})

In [144]:
submission.to_csv("pred3.csv", index=False)

In [100]:
test.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
0,1461,20,RH,80.0,11622,Pave,,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Feedr,Norm,1Fam,1Story,5,6,1961,1961,Gable,CompShg,VinylSd,VinylSd,,0.0,TA,TA,CBlock,TA,TA,No,Rec,468.0,LwQ,144.0,270.0,882.0,GasA,TA,Y,SBrkr,896,0,0,896,0.0,0.0,1,0,2,1,TA,5,Typ,0,,Attchd,1961.0,Unf,1.0,730.0,TA,TA,Y,140,0,0,0,120,0,,MnPrv,,0,6,2010,WD,Normal
1,1462,20,RL,81.0,14267,Pave,,IR1,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,6,1958,1958,Hip,CompShg,Wd Sdng,Wd Sdng,BrkFace,108.0,TA,TA,CBlock,TA,TA,No,ALQ,923.0,Unf,0.0,406.0,1329.0,GasA,TA,Y,SBrkr,1329,0,0,1329,0.0,0.0,1,1,3,1,Gd,6,Typ,0,,Attchd,1958.0,Unf,1.0,312.0,TA,TA,Y,393,36,0,0,0,0,,,Gar2,12500,6,2010,WD,Normal
2,1463,60,RL,74.0,13830,Pave,,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,5,5,1997,1998,Gable,CompShg,VinylSd,VinylSd,,0.0,TA,TA,PConc,Gd,TA,No,GLQ,791.0,Unf,0.0,137.0,928.0,GasA,Gd,Y,SBrkr,928,701,0,1629,0.0,0.0,2,1,3,1,TA,6,Typ,1,TA,Attchd,1997.0,Fin,2.0,482.0,TA,TA,Y,212,34,0,0,0,0,,MnPrv,,0,3,2010,WD,Normal
3,1464,60,RL,78.0,9978,Pave,,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,6,1998,1998,Gable,CompShg,VinylSd,VinylSd,BrkFace,20.0,TA,TA,PConc,TA,TA,No,GLQ,602.0,Unf,0.0,324.0,926.0,GasA,Ex,Y,SBrkr,926,678,0,1604,0.0,0.0,2,1,3,1,Gd,7,Typ,1,Gd,Attchd,1998.0,Fin,2.0,470.0,TA,TA,Y,360,36,0,0,0,0,,,,0,6,2010,WD,Normal
4,1465,120,RL,43.0,5005,Pave,,IR1,HLS,AllPub,Inside,Gtl,StoneBr,Norm,Norm,TwnhsE,1Story,8,5,1992,1992,Gable,CompShg,HdBoard,HdBoard,,0.0,Gd,TA,PConc,Gd,TA,No,ALQ,263.0,Unf,0.0,1017.0,1280.0,GasA,Ex,Y,SBrkr,1280,0,0,1280,0.0,0.0,2,0,2,1,Gd,5,Typ,0,,Attchd,1992.0,RFn,2.0,506.0,TA,TA,Y,0,82,0,0,144,0,,,,0,1,2010,WD,Normal


In [99]:
sample_submission

Unnamed: 0,Id,SalePrice
0,1461,169277.052498
1,1462,187758.393989
2,1463,183583.68357
3,1464,179317.477511
4,1465,150730.079977
5,1466,177150.989247
6,1467,172070.659229
7,1468,175110.95652
8,1469,162011.698832
9,1470,160726.247831


In [46]:
LR = LinearRegression()
LR.fit(X_train, y_train)
print("Train Score:", LR.score(X_train, y_train))
y_pred = LR.predict(X_train)
print("MAPE:", mean_absolute_percentage_error(y_train, y_pred))
print("MAE:", mean_absolute_error(y_train, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_train, y_pred)))
#print("Test Score:", LR.score(X_test_, y_test_))
#y_pred_test = LR.predict(X_test_)
#print("Test:", mean_absolute_percentage_error(y_test_, y_pred_test))

Train Score: 0.6758608599442572
MAPE: 0.18500138492137902
MAE: 30884.523303489223
RMSE: 45213.68187361539
