In [19]:
orders = {'Alley'  : ['Grvl' , 'Pave'],
'LotShape' : ['IR3' , 'IR2' , 'IR1' , 'Reg'],
'Utilities' : ['ELO' , 'NoSeWa' , 'NoSeWr' , 'AllPub'],
'LandSlope' : ['Gtl' , 'Mod' , 'Sev'],
'ExterQual' : ['Po' , 'Fa' , 'TA' , 'Gd' , 'Ex'],
'ExterCond' : ['Po' , 'Fa' , 'TA' , 'Gd' , 'Ex'],
'BsmtQual' : ['Po' , 'Fa' , 'TA' , 'Gd' , 'Ex'],
'BsmtCond' : ['Po' , 'Fa' , 'TA' , 'Gd' , 'Ex'],
'BsmtExposure' : ['Mn' , 'Av' , 'Gd'],
'BsmtFinType1' : list(reversed(['GLQ','ALQ','BLQ','Rec','LwQ','Unf'])),
'BsmtFinType2' : list(reversed(['GLQ','ALQ','BLQ','Rec','LwQ','Unf'])),
'HeatingQC' : ['Po' , 'Fa' , 'TA' , 'Gd' , 'Ex'] , 
'CentralAir' : ['Y'] , 
'KitchenQual' : ['Po' , 'Fa' , 'TA' , 'Gd' , 'Ex'],
'FireplaceQu' : ['Po' , 'Fa' , 'TA' , 'Gd' , 'Ex'],
'GarageFinish' : ['Unf' , 'RFn' , 'Fin'],
'GarageQual' : ['Po' , 'Fa' , 'TA' , 'Gd' , 'Ex'],
'GarageCond' : ['Po' , 'Fa' , 'TA' , 'Gd' , 'Ex'],
'PavedDrive' : ['P' , 'Y'],
'PoolQC' : ['Po' , 'Fa' , 'TA' , 'Gd' , 'Ex']} 

In [20]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns

from xgboost import XGBRegressor

from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error , mean_absolute_error
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer , SimpleImputer , KNNImputer
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import mutual_info_regression
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense


# Input data files are available in the read-only "../input/" directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [21]:
def ordinal(column , order):
  new_col = pd.Series(np.zeros(column.shape))
  n_rows = column.shape[0]

  order_dict = dict()
  for element in enumerate(order):
    order_dict[element[1]] = element[0]+1
  
  order_dict['NO'] = 0

  for i in range(n_rows):
    val = column[i]
    if(val == 'N' or val == 'NO' or val == 'No'):
        new_col[i] = 0
    else:
        new_col[i] = order_dict[column[i]]

  return new_col

In [22]:
train_data = pd.read_csv("/kaggle/input/house-prices-advanced-regression-techniques/train.csv")
test_data = pd.read_csv("/kaggle/input/house-prices-advanced-regression-techniques/test.csv")

n_cols = train_data.shape[1]

no_null = list(orders.keys())

train_data[no_null] = train_data[no_null].replace(np.nan , 'NO') 
test_data[no_null] = test_data[no_null].replace(np.nan , 'NO')


X_train = train_data.iloc[:,:n_cols-1]
y_train = train_data.iloc[:,n_cols-1]

X_test = test_data.iloc[:,:n_cols-1]

In [23]:
cols = X_train.columns

for column in cols:
    if(column in orders.keys()):
      X_train[column] = ordinal(X_train[column] , orders[column])
      X_test[column] = ordinal(X_test[column] , orders[column])

In [24]:
X = pd.concat([X_train , X_test] , axis = 0)

In [25]:
X = pd.get_dummies(X)
X_train = X.iloc[:y_train.shape[0],:]
X_test = X.iloc[y_train.shape[0]: , :]

print(X_train.shape , X_test.shape)


In [32]:
steps = []
steps.append(('imputer' , KNNImputer()))
steps.append(('fs' , SelectKBest(score_func = mutual_info_regression , k = 10 )))
steps.append(('reg' , XGBRegressor()))

model = Pipeline(steps)

model.get_params()

param_grid = {'fs__k' : list(range(1,100,5)) , 
             'imputer__n_neighbors' : list(range(5,20,5)),
             'reg' : [XGBRegressor() , RandomForestRegressor()]}


search =  GridSearchCV(estimator = model , param_grid = param_grid , scoring = 'neg_mean_absolute_error' )
result = search.fit(X_train , y_train)


In [29]:
steps = []
steps.append(('imputer' , imputer = IterativeImputer(estimator=BayesianRidge(), n_nearest_features=None, imputation_order='ascending')))
steps.append(('fs' , SelectKBest(score_func = mutual_info_regression , k = 10 )))
steps.append(('reg' , XGBRegressor()))

model = Pipeline(steps)

model.get_params()

param_grid = {'fs__k' : list(range(1,100,5)) , 
             'imputer__n_neighbors' : list(range(5,20,5)),
             'reg' : [XGBRegressor() , RandomForestRegressor()]}


search =  GridSearchCV(estimator = model , param_grid = param_grid , scoring = 'neg_mean_absolute_error' )
result = search.fit(X_train , y_train)

In [43]:
prediction = pd.Series(result.predict(X_test))
prediction.rename("SalePrice")

In [47]:
submission = pd.concat([X_test['Id'] , prediction] , axis = 1)
submission.columns = ['Id' , 'SalePrice']
submission.head()
submission.to_csv("submit.csv")