In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPClassifier

import numpy as np

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV

df_train = pd.read_csv('../input/house-prices-advanced-regression-techniques/train.csv')
df_test = pd.read_csv('../input/house-prices-advanced-regression-techniques/test.csv')

In [None]:
useful_columns = ['MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea', 'Street', 'LotShape', 'LandContour', 'Utilities', 'LotConfig','LandSlope','Neighborhood','Condition1','Condition2','BldgType',
                    'HouseStyle','OverallQual','OverallCond','YearBuilt','YearRemodAdd','RoofStyle',
                    'RoofMatl','Exterior1st','Exterior2nd','MasVnrType','MasVnrArea','ExterQual','ExterCond','Foundation',
                    'Heating','HeatingQC','CentralAir','Electrical','1stFlrSF','FullBath','HalfBath','BedroomAbvGr',
                    'KitchenAbvGr','KitchenQual','TotRmsAbvGrd','Functional','Fireplaces','PavedDrive','MoSold','YrSold', 'SaleType', 'SaleCondition', 'SalePrice']

In [None]:
data = df_train[useful_columns]

In [None]:
data = data.drop(columns=['LotFrontage', 'MasVnrArea'])
## useless columns

In [None]:
data = data.dropna()

#  Deal with categorical features


In [None]:
obj = data.loc[:,data.dtypes == object]

# Pipeline

In [None]:
data_int = data.loc[:,data.dtypes == int]
data_int = data_int.drop(columns='SalePrice')

numeric_features = data_int.columns
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())]) ###--- NUMERIC

categorical_features = obj.columns
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])

clf = Pipeline(steps=[('preprocessor', preprocessor),
                      ('classifier', RandomForestRegressor())])

X = data.drop(columns=['SalePrice'])
y = data['SalePrice'].astype('int')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
clf.fit(X_train, y_train)

In [None]:
print("model score: %.3f" % clf.score(X_test, y_test))

# Making predictions

In [None]:
usfl = ['MSSubClass', 'MSZoning', 'LotArea', 'Street', 'LotShape', 'LandContour', 'Utilities', 'LotConfig','LandSlope','Neighborhood','Condition1','Condition2','BldgType',
                    'HouseStyle','OverallQual','OverallCond','YearBuilt','YearRemodAdd','RoofStyle',
                    'RoofMatl','Exterior1st','Exterior2nd','MasVnrType','ExterQual','ExterCond','Foundation',
                    'Heating','HeatingQC','CentralAir','Electrical','1stFlrSF','FullBath','HalfBath','BedroomAbvGr',
                    'KitchenAbvGr','KitchenQual','TotRmsAbvGrd','Functional','Fireplaces','PavedDrive','MoSold','YrSold', 'SaleType', 'SaleCondition']
test_df = df_test[usfl]

In [None]:
test_preds2 = clf.predict(test_df)
test_preds2

# Submitting

In [None]:
output2 = pd.DataFrame({'Id': df_test.Id,
                       'SalePrice': test_preds2})
output2.to_csv('final_submission3.csv', index=False)