In [64]:
import numpy as np
import pandas as pd

In [65]:
df_train = pd.read_csv('housing_price_train.csv')
test = pd.read_csv('housing_price_test.csv')

In [66]:
df_train["LotFrontage"] = df_train.groupby("Neighborhood")["LotFrontage"].transform(
    lambda x: x.fillna(x.median()))
for col in ('GarageType', 'GarageFinish', 'GarageQual', 'GarageCond'):
    df_train[col] = df_train[col].fillna('None')
for col in ('GarageYrBlt', 'GarageArea', 'GarageCars'):
    df_train[col] = df_train[col].fillna(0)
for col in ('BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF','TotalBsmtSF', 'BsmtFullBath', 'BsmtHalfBath'):
    df_train[col] = df_train[col].fillna(0)
for col in ('BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2'):
    df_train[col] = df_train[col].fillna('None')
df_train["MasVnrType"] = df_train["MasVnrType"].fillna("None")
df_train["MasVnrArea"] = df_train["MasVnrArea"].fillna(0)
df_train['MSZoning'] = df_train['MSZoning'].fillna(df_train['MSZoning'].mode()[0])
# df_train = df_train.drop(['Utilities'], axis=1)
df_train["Functional"] = df_train["Functional"].fillna("Typ")
df_train['Electrical'] = df_train['Electrical'].fillna(df_train['Electrical'].mode()[0])
df_train['KitchenQual'] = df_train['KitchenQual'].fillna(df_train['KitchenQual'].mode()[0])
df_train['Exterior1st'] = df_train['Exterior1st'].fillna(df_train['Exterior1st'].mode()[0])
df_train['Exterior2nd'] = df_train['Exterior2nd'].fillna(df_train['Exterior2nd'].mode()[0])
df_train['SaleType'] = df_train['SaleType'].fillna(df_train['SaleType'].mode()[0])
df_train['MSSubClass'] = df_train['MSSubClass'].fillna("None")

df_train.drop(['PoolQC','MiscFeature','Alley','Fence','FireplaceQu','Id'], axis=1, inplace=True)
test.drop(['PoolQC','MiscFeature','Alley','Fence','FireplaceQu'], axis=1, inplace=True)

In [67]:
df_dtype = df_train.dtypes
df_dtype.unique()

array([dtype('int64'), dtype('O'), dtype('float64')], dtype=object)

In [68]:
std_sale = np.std(df_train.SalePrice)
size = df_train.shape[0]

In [69]:
def compute_sdr(df_train):
    SDR = {}

    for col in df_train:

        x = []
        if df_train[col].dtypes == 'O':
#             print(col)
            for i in df_train[col].unique():
                df_cat = df_train
                df_cat = df_cat.loc[df_cat[col] == i]
                freq = df_cat.shape[0]
                prob = freq/size
                std_attr = np.std(df_cat.SalePrice)
                x.append(prob*std_attr)
            SDR[col] = std_sale - sum(x)

        if df_train[col].dtypes == 'int64' or df_train[col].dtypes == 'float64':
#             print(col)
            over, less = [],[]
            over = df_train.SalePrice.loc[df_train[col] > df_train[col].median()]
            less = df_train.SalePrice.loc[df_train[col] <= df_train[col].median()]
            std_over = np.std(over)
            std_less = np.std(less)
            p_over = over.count()/size
            p_less = less.count()/size
            result = p_over*std_over + p_less*std_less
            result = std_sale - result
            SDR[col] = result
            
    del SDR['SalePrice']
    SDR = sorted(SDR.items(), key=lambda y: y[1],reverse=True)
   
    return SDR
    


In [78]:
def predict_price(row):
    df_x = df_train
#     print(df_x.shape[0])
    
    while df_x.shape[0] > 40:
        sdr = compute_sdr(df_x)
        cond = sdr[0][0]
        temp = df_x.copy()
        print(sdr[0][0])
        if type(row[cond]) == str:
            df_x = df_x.loc[df_x[cond] == row[cond]]
        elif type(row[cond]) == 'int64' or type(row[cond]) == 'float64':
            if row[cond] < np.median(df_x[cond]):
#                 print('int greater')
                df_x = df_x.loc[df_x[cond] < np.median(df_x[cond])]
            else:
#                 print('int less')
                df_x = df_x.loc[df_x[cond] >= np.median(df_x[cond])]
        if df_x.shape[0] < 2:
            df_x = temp
            cond2 = sdr[1][0]
            if type(row[cond2]) == str:
                df_x = df_x.loc[df_x[cond2] == row[cond2]]
            elif type(row[cond]) == 'int64' or type(row[cond]) == 'float64':
                if row[cond2] < np.median(df_x[cond2]):
#                     print('greater')
                    df_x = df_x.loc[df_x[cond2] < np.median(df_x[cond2])]
                else:
#                     print('less')
                    df_x = df_x.loc[df_x[cond2] >= np.median(df_x[cond2])]
            df_x.drop([cond2], axis=1, inplace=True)
        if df_x.shape[0] < 2:
            df_x = temp
            cond2 = sdr[1][0]
            if type(row[cond2]) == str:
                df_x = df_x.loc[df_x[cond2] == row[cond2]]
            elif type(row[cond]) == 'int64' or type(row[cond]) == 'float64':
                if row[cond2] < np.median(df_x[cond2]):
#                     print('greater')
                    df_x = df_x.loc[df_x[cond2] < np.median(df_x[cond2])]
                else:
#                     print('less')
                    df_x = df_x.loc[df_x[cond2] >= np.median(df_x[cond2])]
            df_x.drop([cond2], axis=1, inplace=True)
        df_x.drop([cond], axis=1, inplace=True)
#         print(df_x.shape[0])
    return np.mean(df_x['SalePrice'])
        

In [79]:
def predict_all(test_df):
    ids = []
    predictions = []
    for i in range(test.shape[0]):
        row = test.iloc[i,:]
        predicted_price = predict_price(row)
        ids.append(i)
        predictions.append(predicted_price)
    return ids, predictions

In [80]:
def createSubmission(test_ids, predictions):
    sub = pd.DataFrame()
    sub['Id'] = test_ids
    sub['SalePrice'] = predictions
    sub.to_csv('sub.csv',index=False)

In [None]:
def main():
    ids, predictions = predict_all(test)

    createSubmission(ids, predictions)
if __name__ == '__main__':
    main()

Neighborhood
BsmtExposure
GrLivArea
OverallQual
GarageFinish
Exterior2nd
Neighborhood
BsmtExposure
GrLivArea
OverallQual
GarageFinish
Exterior2nd
Neighborhood
ExterQual
Neighborhood
ExterQual
Neighborhood
Neighborhood
ExterQual
Neighborhood
ExterQual
Neighborhood
ExterQual
Neighborhood
ExterQual
Neighborhood
BsmtExposure
GrLivArea
OverallQual
GarageFinish
Neighborhood
BsmtExposure
GrLivArea
OverallQual
GarageFinish
Neighborhood
Neighborhood
Neighborhood
Neighborhood
Neighborhood
OverallCond
BsmtQual
YearRemodAdd
BsmtExposure
Neighborhood
OverallCond
BsmtQual
Neighborhood
OverallCond
BsmtQual
YearRemodAdd
BsmtExposure
Neighborhood
OverallCond
BsmtQual
YearRemodAdd
BsmtExposure
Neighborhood
OverallCond
BsmtQual
YearRemodAdd
BsmtExposure
Neighborhood
OverallCond
BsmtQual
Neighborhood
OverallCond
BsmtQual
Neighborhood
ExterQual
LotConfig
Neighborhood
Neighborhood
ExterQual
LotConfig
Neighborhood
ExterQual
LotConfig
Neighborhood
MSSubClass
OverallQual
BsmtQual
Neighborhood
LotArea
BldgType


BsmtQual
Neighborhood
OverallCond
BsmtQual
YearRemodAdd
BsmtExposure
Neighborhood
OverallCond
BsmtQual
Neighborhood
OverallCond
BsmtQual
Neighborhood
OverallCond
BsmtQual
Neighborhood
OverallCond
BsmtQual
Neighborhood
OverallCond
BsmtQual
Neighborhood
OverallCond
BsmtQual
YearRemodAdd
BsmtExposure
Neighborhood
OverallCond
BsmtQual
YearRemodAdd
BsmtExposure
Neighborhood
OverallCond
BsmtQual
YearRemodAdd
BsmtExposure
Neighborhood
OverallCond
BsmtQual
YearRemodAdd
BsmtExposure
Neighborhood
OverallCond
BsmtQual
YearRemodAdd
BsmtExposure
Neighborhood
OverallCond
BsmtQual
Neighborhood
ExterQual
LotConfig
Neighborhood
ExterQual
LotConfig
Neighborhood
Neighborhood
ExterQual
LotConfig
Neighborhood
ExterQual
LotConfig
Neighborhood
ExterQual
LotConfig
Neighborhood
ExterQual
LotConfig
Neighborhood
ExterQual
LotConfig
Neighborhood
ExterQual
LotConfig
Neighborhood
ExterQual
LotConfig
Neighborhood
ExterQual
Neighborhood
ExterQual
Neighborhood
ExterQual
LotConfig
Neighborhood
ExterQual
Neighborhood
MS

Neighborhood
OverallQual
HouseStyle
Neighborhood
OverallQual
HouseStyle
Neighborhood
BsmtQual
Neighborhood
BsmtQual
Neighborhood
BsmtQual
OverallQual
Exterior1st
MSZoning
MasVnrType
HouseStyle
Neighborhood
BsmtQual
Neighborhood
BsmtQual
OverallQual
Exterior1st
MSZoning
MasVnrType
HouseStyle
Neighborhood
BsmtQual
OverallQual
Exterior1st
MSZoning
MasVnrType
MasVnrArea
LotShape
Neighborhood
BsmtQual
Neighborhood
BsmtQual
OverallQual
Exterior1st
MSZoning
MasVnrType
HouseStyle
Neighborhood
Neighborhood
BsmtQual
Neighborhood
BsmtQual
OverallQual
Exterior1st
MSZoning
MasVnrType
HouseStyle
Neighborhood
BsmtQual
OverallQual
Exterior1st
MSZoning
MasVnrType
MasVnrArea
LotShape
Neighborhood
BsmtQual
OverallQual
Exterior1st
MSZoning
MasVnrType
MasVnrArea
LotShape
Neighborhood
BsmtQual
OverallQual
Exterior1st
MSZoning
MasVnrType
MasVnrArea
LotShape
Neighborhood
BsmtQual
OverallQual
Exterior1st
Neighborhood
BsmtQual
Neighborhood
BsmtQual
OverallQual
Exterior1st
MSZoning
MasVnrType
HouseStyle
Neighbor

In [None]:
k.shape

In [None]:
range(X_test.shape[0])

In [None]:
j = pd.read_csv('sub.csv')
np.mean(j['SalePrice'])
k = pd.read_csv('sub.csv')

In [None]:
A = np.array(sale)
B = np.array(j['SalePrice'])
H = sum(np.square(A-B))/len(A)**2
H

In [None]:
from sklearn.metrics import mean_squared_error

In [None]:
mean_squared_error(X_test['SalePrice'],k['SalePrice'])