In [69]:
import os
import json
import joblib
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
import seaborn as sns

In [70]:
def load_data(filepath):
    """Load the dataset from a CSV file."""
    return pd.read_csv(filepath)

In [71]:
def preprocess_data(df):
    """Preprocess the dataset."""
    df = df.dropna(subset=['SalePrice'])
    numeric_cols = df.select_dtypes(include=np.number).columns
    df[numeric_cols] = df[numeric_cols].fillna(0)
    categorical_cols = df.select_dtypes(include='object').columns
    for col in categorical_cols:
        df[col].fillna('None', inplace=True)
    df = pd.get_dummies(df, columns=categorical_cols, drop_first=True)
    return df

In [72]:
def split_data(df, target='SalePrice'):
    """Split the dataset."""
    X = df.drop(columns=[target])
    y = df[target]
    return train_test_split(X, y, test_size=0.2, random_state=42)

In [73]:
def evaluate_model(model, X_test, y_test, model_name="Model"):
    """
    Evaluate a model and calculate RMSE manually.
    """
    predictions = model.predict(X_test)
    
    # Calculate RMSE manually
    mse = np.mean((y_test - predictions) ** 2)
    rmse = np.sqrt(mse)
    
    # Calculate MAE
    mae = mean_absolute_error(y_test, predictions)
    
    save_plot_results(y_test, predictions, model_name)
    
    return rmse, mae, predictions

In [74]:
def save_plot_results(y_test, predictions, model_name="Model"):
    """Plot actual vs predicted."""
    plt.figure(figsize=(8, 6))
    sns.scatterplot(x=y_test, y=predictions, alpha=0.6)
    plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
    plt.title(f"Actual vs Predicted: {model_name}")
    plt.xlabel("Actual Prices")
    plt.ylabel("Predicted Prices")
    file_path = f'reports/{model_name}_actual_vs_predicted.png'
    if not os.path.exists('reports'):
        os.makedirs('reports')
    plt.savefig(file_path)
    plt.close()
    print(f"The scatter plot of {model_name} results has been successfully saved in the 'reports' folder")

In [75]:
    # Load and preprocess the dataset
    file_path = "data/house_prices.csv"
    data = load_data(file_path)
    data = preprocess_data(data.copy())
    X_train, X_test, y_train, y_test = split_data(data)

In [76]:
    # List of models
    models = {
        "Linear Regression": LinearRegression(),
        "Ridge Regression": Ridge(alpha=1.0),
        "Lasso Regression": Lasso(alpha=10.0, max_iter=5000),
        "Decision Tree": DecisionTreeRegressor(max_depth=10),
        "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
        "Gradient Boosting": GradientBoostingRegressor(n_estimators=100, random_state=42),
        "XGBoost": XGBRegressor(n_estimators=100, random_state=42),
        "LightGBM": LGBMRegressor(n_estimators=100, random_state=42, force_row_wise=True),
        "SVR": SVR(kernel='rbf', C=100)
    }

In [77]:
    # Train and evaluate traditional models
    results = {}
    model_instances = {}
    for name, model in models.items():
        print(f"\nTraining {name}...")
        model.fit(X_train, y_train)
        rmse, mae, predictions = evaluate_model(model, X_test, y_test, model_name=name)
        results[name] = rmse
        model_instances[name] = model


Training Linear Regression...
The scatter plot of Linear Regression results has been successfully saved in the 'reports' folder

Training Ridge Regression...
The scatter plot of Ridge Regression results has been successfully saved in the 'reports' folder

Training Lasso Regression...
The scatter plot of Lasso Regression results has been successfully saved in the 'reports' folder

Training Decision Tree...
The scatter plot of Decision Tree results has been successfully saved in the 'reports' folder

Training Random Forest...
The scatter plot of Random Forest results has been successfully saved in the 'reports' folder

Training Gradient Boosting...
The scatter plot of Gradient Boosting results has been successfully saved in the 'reports' folder

Training XGBoost...
The scatter plot of XGBoost results has been successfully saved in the 'reports' folder

Training LightGBM...
[LightGBM] [Info] Total Bins 1848
[LightGBM] [Info] Number of data points in the train set: 1168, number of used fe

In [78]:
    # Neural Network
    print("\nTraining Neural Network...")
    input_dim = X_train.shape[1]
    nn = Sequential([
        Input(shape=(input_dim,)),
        Dense(128, activation='relu'),
        Dense(64, activation='relu'),
        Dense(1)
    ])
    nn.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    nn.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, batch_size=32, callbacks=[early_stopping], verbose=0)
    nn_predictions = nn.predict(X_test).flatten()
    # RMSE calculation for Neural Network
    nn_mse = np.mean((y_test - nn_predictions) ** 2)
    nn_rmse = np.sqrt(nn_mse)
    nn_mae = mean_absolute_error(y_test, nn_predictions)
    results["Neural Network"] = nn_rmse
    model_instances["Neural Network"] = nn
    save_plot_results(y_test, nn_predictions, model_name="Neural Network")


Training Neural Network...
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
The scatter plot of Neural Network results has been successfully saved in the 'reports' folder


In [79]:
    # Summary of results
    best_model = min(results, key=results.get)
    print("\nSummary of Results:")
    print("===================================")
    for model_name, rmse in results.items():
        print(f"{model_name}: RMSE = {rmse:.4f}")
    print("\nBest Model (Lowest RMSE):")
    print(f"{best_model}: RMSE = {results[best_model]:.4f}")

    # Save best model information to a JSON file
    best_model_info = {
        "model_name": best_model,
        "rmse": results[best_model]
    }
    output_file = "reports/best_model.json"
    if not os.path.exists('reports'):
        os.makedirs('reports')
    with open(output_file, "w") as f:
        json.dump(best_model_info, f, indent=4)
    print(f"\nBest model information saved to {output_file}")

    # Save the best model
    best_model_instance = model_instances[best_model]
    model_file = "models/best_model.pkl"
    if not os.path.exists('models'):
        os.makedirs('models')
    joblib.dump(best_model_instance, model_file)
    print(f"\nBest model saved to {model_file}")


Summary of Results:
Linear Regression: RMSE = 39028.0612
Ridge Regression: RMSE = 35281.5808
Lasso Regression: RMSE = 38581.1079
Decision Tree: RMSE = 38714.9745
Random Forest: RMSE = 29785.0590
Gradient Boosting: RMSE = 27580.5181
XGBoost: RMSE = 27953.1538
LightGBM: RMSE = 30486.1070
SVR: RMSE = 87432.5748
Neural Network: RMSE = 45637.9517

Best Model (Lowest RMSE):
Gradient Boosting: RMSE = 27580.5181

Best model information saved to reports/best_model.json

Best model saved to models/best_model.pkl


In [80]:
import os
import subprocess
import shap
import json
import joblib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import streamlit as st
from sklearn.model_selection import train_test_split

In [81]:
def load_data(filepath):
    """Load the dataset from a CSV file."""
    return pd.read_csv(filepath)

In [82]:
def preprocess_data(df):
    """Preprocess the dataset."""
    if 'SalePrice' in df.columns:
        df = df.dropna(subset=['SalePrice'])
    numeric_cols = df.select_dtypes(include=np.number).columns
    df[numeric_cols] = df[numeric_cols].fillna(0)
    categorical_cols = df.select_dtypes(include='object').columns
    for col in categorical_cols:
        df[col].fillna('None', inplace=True)
    df = pd.get_dummies(df, columns=categorical_cols, drop_first=True)
    return df

In [83]:
def split_data(df, target='SalePrice'):
    """Split the dataset."""
    X = df.drop(columns=[target])
    y = df[target]
    return train_test_split(X, y, test_size=0.2, random_state=42)

In [84]:
# Function to collect user input
def user_input_features():
    st.sidebar.header('User Input Features')

    # Categorical features (multiple choice or single choice)
    MSZoning = st.sidebar.selectbox('MSZoning', ['A', 'C', 'FV', 'I', 'RH', 'RL', 'RP', 'RM'])
    Street = st.sidebar.selectbox('Street', ['Grvl', 'Pave'])
    LotShape = st.sidebar.selectbox('LotShape', ['Reg', 'IR1', 'IR2', 'IR3'])
    Utilities = st.sidebar.selectbox('Utilities', ['AllPub', 'NoSewr', 'NoSeWa', 'ELO'])
    LotConfig = st.sidebar.selectbox('LotConfig', ['Inside', 'Corner', 'CulDSac', 'FR2', 'FR3'])
    LandSlope = st.sidebar.selectbox('LandSlope', ['Gtl', 'Mod', 'Sev'])
    RoofStyle = st.sidebar.selectbox('RoofStyle', ['Flat', 'Gable', 'Gambrel', 'Hip', 'Mansard', 'Shed'])
    ExterQual = st.sidebar.selectbox('ExterQual', ['Ex', 'Gd', 'TA', 'Fa', 'Po'])
    Foundation = st.sidebar.selectbox('Foundation', ['BrkTil', 'CBlock', 'PConc', 'Slab', 'Stone', 'Wood'])
    BsmtQual = st.sidebar.selectbox('BsmtQual', ['Ex', 'Gd', 'TA', 'Fa', 'Po', 'NA'])
    Heating = st.sidebar.selectbox('Heating', ['Floor', 'GasA', 'GasW', 'Grav', 'OthW', 'Wall'])
    HeatingQC = st.sidebar.selectbox('HeatingQC', ['Ex', 'Gd', 'TA', 'Fa', 'Po'])
    CentralAir = st.sidebar.selectbox('CentralAir', ['N', 'Y'])
    Electrical = st.sidebar.selectbox('Electrical', ['SBrkr', 'FuseA', 'FuseF', 'FuseP', 'Mix'])
    KitchenQual = st.sidebar.selectbox('KitchenQual', ['Ex', 'Gd', 'TA', 'Fa', 'Po'])
    FireplaceQu = st.sidebar.selectbox('FireplaceQu', ['Ex', 'Gd', 'TA', 'Fa', 'Po', 'NA'])
    GarageType = st.sidebar.selectbox('GarageType', ['2Types', 'Attchd', 'Basment', 'BuiltIn', 'CarPort', 'Detchd', 'NA'])
    GarageQual = st.sidebar.selectbox('GarageQual', ['Ex', 'Gd', 'TA', 'Fa', 'Po', 'NA'])
    PavedDrive = st.sidebar.selectbox('PavedDrive', ['Y', 'P', 'N'])
    PoolQC = st.sidebar.selectbox('PoolQC', ['Ex', 'Gd', 'TA', 'Fa', 'NA'])
    Fence = st.sidebar.selectbox('Fence', ['GdPrv', 'MnPrv', 'GdWo', 'MnWw', 'NA'])
    MiscFeature = st.sidebar.selectbox('MiscFeature', ['Elev', 'Gar2', 'Othr', 'Shed', 'TenC', 'NA'])

    # Numerical features (for continuous data)
    LotFrontage = st.sidebar.number_input('LotFrontage', min_value=0, max_value=1000, value=80)
    LotArea = st.sidebar.number_input('LotArea', min_value=0, max_value=1000000, value=5000)
    OverallQual = st.sidebar.selectbox('OverallQual', [10, 9, 8, 7, 6, 5, 4, 3, 2, 1])
    YearBuilt = st.sidebar.number_input('YearBuilt', min_value=1900, max_value=2024, value=2000)
    YearRemodAdd = st.sidebar.number_input('YearRemodAdd', min_value=1900, max_value=2024, value=2005)
    TotalBsmtSF = st.sidebar.number_input('TotalBsmtSF', min_value=0, max_value=5000, value=800)
    fstFlrSF = st.sidebar.number_input('1stFlrSF', min_value=0, max_value=5000, value=1000)
    sndFlrSF = st.sidebar.number_input('2ndFlrSF', min_value=0, max_value=5000, value=500)
    GrLivArea = st.sidebar.number_input('GrLivArea', min_value=0, max_value=5000, value=2000)
    BsmtFullBath = st.sidebar.number_input('BsmtFullBath', min_value=0, max_value=5, value=1)
    BsmtHalfBath = st.sidebar.number_input('BsmtHalfBath', min_value=0, max_value=5, value=1)
    FullBath = st.sidebar.number_input('FullBath', min_value=0, max_value=5, value=2)
    HalfBath = st.sidebar.number_input('HalfBath', min_value=0, max_value=5, value=1)
    Bedroom = st.sidebar.number_input('Bedroom', min_value=0, max_value=10, value=3)
    Kitchen = st.sidebar.number_input('Kitchen', min_value=0, max_value=5, value=1)
    TotRmsAbvGrd = st.sidebar.number_input('TotRmsAbvGrd', min_value=0, max_value=20, value=8)
    Fireplaces = st.sidebar.number_input('Fireplaces', min_value=0, max_value=5, value=1)
    GarageCars = st.sidebar.number_input('GarageCars', min_value=0, max_value=5, value=2)
    WoodDeckSF = st.sidebar.number_input('WoodDeckSF', min_value=0, max_value=1000, value=100)
    OpenPorchSF = st.sidebar.number_input('OpenPorchSF', min_value=0, max_value=1000, value=50)
    EnclosedPorch = st.sidebar.number_input('EnclosedPorch', min_value=0, max_value=1000, value=20)
    PoolArea = st.sidebar.number_input('PoolArea', min_value=0, max_value=1000, value=0)

    # Organize the inputs into a DataFrame
    input_data = {
        'MSZoning': MSZoning,
        'Street': Street,
        'LotShape': LotShape,
        'Utilities': Utilities,
        'LotConfig': LotConfig,
        'LandSlope': LandSlope,
        'RoofStyle': RoofStyle,
        'ExterQual': ExterQual,
        'Foundation': Foundation,
        'BsmtQual': BsmtQual,
        'Heating': Heating,
        'HeatingQC': HeatingQC,
        'CentralAir': CentralAir,
        'Electrical': Electrical,
        'KitchenQual': KitchenQual,
        'FireplaceQu': FireplaceQu,
        'GarageType': GarageType,
        'GarageQual': GarageQual,
        'PavedDrive': PavedDrive,
        'PoolQC': PoolQC,
        'Fence': Fence,
        'MiscFeature': MiscFeature,
        'LotFrontage': LotFrontage,
        'LotArea': LotArea,
        'OverallQual': OverallQual,
        'YearBuilt': YearBuilt,
        'YearRemodAdd': YearRemodAdd,
        'TotalBsmtSF': TotalBsmtSF,
        '1stFlrSF': fstFlrSF,
        '2ndFlrSF': sndFlrSF,
        'GrLivArea': GrLivArea,
        'BsmtFullBath': BsmtFullBath,
        'BsmtHalfBath': BsmtHalfBath,
        'FullBath': FullBath,
        'HalfBath': HalfBath,
        'Bedroom': Bedroom,
        'Kitchen': Kitchen,
        'TotRmsAbvGrd': TotRmsAbvGrd,
        'Fireplaces': Fireplaces,
        'GarageCars': GarageCars,
        'WoodDeckSF': WoodDeckSF,
        'OpenPorchSF': OpenPorchSF,
        'EnclosedPorch': EnclosedPorch,
        'PoolArea': PoolArea
    }
    
    features = pd.DataFrame(input_data, index=[0])
    
    return features

In [85]:
    try:
        model_file = "models/best_model.pkl"
        model = joblib.load(model_file)
         # Open and read the JSON file
        json_file_path = "reports/best_model.json"
        with open(json_file_path, 'r') as file:
            best_model_data = json.load(file)
        # Retrieve the model_name
        model_name = best_model_data.get("model_name", None)
        print(f"Model {model_name} loaded successfully!")
    except FileNotFoundError:
        print("Model not found. Running `main.py` to train the model...")
        # Execute main.py
        subprocess.run(["python", "main.py"], check=True)
        model = joblib.load(model_file)
        print(f"Model {model_name} loaded successfully after training!")

Model Gradient Boosting loaded successfully!


In [86]:
    # Load the dataset (ensure preprocessing matches training)
    data_path = "data/house_prices.csv"
    data = load_data(data_path)

In [95]:
    # Preprocess the data (update this with your preprocessing function)
    data = preprocess_data(data.copy())  # Ensure this function matches your training pipeline
    X = data.drop(columns=["SalePrice"])
    y = data["SalePrice"]
    # Train-Test Split
    X_train, X_test, y_train, y_test = split_data(data)

In [96]:
data

Unnamed: 0,LotFrontage,LotArea,OverallQual,YearBuilt,YearRemodAdd,TotalBsmtSF,1stFlrSF,2ndFlrSF,GrLivArea,BsmtFullBath,...,PoolQC_Gd,PoolQC_None,Fence_GdWo,Fence_MnPrv,Fence_MnWw,Fence_None,MiscFeature_None,MiscFeature_Othr,MiscFeature_Shed,MiscFeature_TenC
0,65.0,8450,7,2003,2003,856.0,856,854,1710,1.0,...,False,True,False,False,False,True,True,False,False,False
1,80.0,9600,6,1976,1976,1262.0,1262,0,1262,0.0,...,False,True,False,False,False,True,True,False,False,False
2,68.0,11250,7,2001,2002,920.0,920,866,1786,1.0,...,False,True,False,False,False,True,True,False,False,False
3,60.0,9550,7,1915,1970,756.0,961,756,1717,1.0,...,False,True,False,False,False,True,True,False,False,False
4,84.0,14260,8,2000,2000,1145.0,1145,1053,2198,1.0,...,False,True,False,False,False,True,True,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,62.0,7917,6,1999,2000,953.0,953,694,1647,0.0,...,False,True,False,False,False,True,True,False,False,False
1456,85.0,13175,6,1978,1988,1542.0,2073,0,2073,1.0,...,False,True,False,True,False,False,True,False,False,False
1457,66.0,9042,7,1941,2006,1152.0,1188,1152,2340,0.0,...,False,True,False,False,False,False,False,False,True,False
1458,68.0,9717,5,1950,1996,1078.0,1078,0,1078,1.0,...,False,True,False,False,False,True,True,False,False,False


In [94]:
    # Initial value for categorical features preprocess columns
    MSZoning_FV, MSZoning_RH, MSZoning_RL, MSZoning_RM, Street_Pave, LotShape_IR2, LotShape_IR3, LotShape_Reg, Utilities_NoSeWa, LotConfig_CulDSac, LotConfig_FR2, LotConfig_FR3, LotConfig_Inside, LandSlope_Mod, LandSlope_Sev, RoofStyle_Gable, RoofStyle_Gambrel, RoofStyle_Hip, RoofStyle_Mansard, RoofStyle_Shed, ExterQual_Fa, ExterQual_Gd, ExterQual_TA, Foundation_CBlock, Foundation_PConc, Foundation_Slab, Foundation_Stone, Foundation_Wood, BsmtQual_Fa, BsmtQual_Gd, BsmtQual_None, BsmtQual_TA, Heating_GasA, Heating_GasW, Heating_Grav, Heating_OthW, Heating_Wall, HeatingQC_Fa, HeatingQC_Gd, HeatingQC_Po, HeatingQC_TA, CentralAir_Y, Electrical_FuseF, Electrical_FuseP, Electrical_Mix, Electrical_None, Electrical_SBrkr, KitchenQual_Fa, KitchenQual_Gd, KitchenQual_TA, FireplaceQu_Fa, FireplaceQu_Gd, FireplaceQu_None, FireplaceQu_Po, FireplaceQu_TA, GarageType_Attchd, GarageType_Basment, GarageType_BuiltIn, GarageType_CarPort, GarageType_Detchd, GarageType_None, GarageQual_Fa, GarageQual_Gd, GarageQual_None, GarageQual_Po, GarageQual_TA, PoolQC_Fa, PoolQC_Gd, PoolQC_None, Fence_GdWo, Fence_MnPrv, Fence_MnWw, Fence_None, MiscFeature_None, MiscFeature_Othr, MiscFeature_Shed, MiscFeature_TenC = False
    
    # Categorical features (multiple choice or single choice)
    MSZoning = 'A'
    Street = 'Grvl'
    LotShape = 'Reg'
    Utilities = 'AllPub'
    LotConfig = 'Inside'
    LandSlope = 'Gtl'
    RoofStyle = 'Flat'
    ExterQual = 'Ex'
    Foundation = 'BrkTil'
    BsmtQual = 'Ex'
    Heating = 'Floor'
    HeatingQC = 'Ex'
    CentralAir = 'N'
    Electrical = 'SBrkr'
    KitchenQual = 'Ex'
    FireplaceQu = 'Ex'
    GarageType = '2Types'
    GarageQual = 'Ex'
    PoolQC = 'Ex'
    Fence = 'GdPrv'
    MiscFeature = 'Elev'

    # Numerical features (for continuous data)
    LotFrontage = 80
    LotArea = 5000
    OverallQual = 9
    YearBuilt =2000
    YearRemodAdd =2005
    TotalBsmtSF =800
    fstFlrSF =1000
    sndFlrSF =500
    GrLivArea =2000
    BsmtFullBath =1
    BsmtHalfBath =1
    FullBath =2
    HalfBath =1
    Bedroom =3
    Kitchen =1
    TotRmsAbvGrd =8
    Fireplaces =1
    GarageCars =2
    WoodDeckSF =100
    OpenPorchSF =50
    EnclosedPorch =20
    PoolArea =0

    # Organize the inputs into a DataFrame
    input_data = {
        'MSZoning': MSZoning,
        'Street': Street,
        'LotShape': LotShape,
        'Utilities': Utilities,
        'LotConfig': LotConfig,
        'LandSlope': LandSlope,
        'RoofStyle': RoofStyle,
        'ExterQual': ExterQual,
        'Foundation': Foundation,
        'BsmtQual': BsmtQual,
        'Heating': Heating,
        'HeatingQC': HeatingQC,
        'CentralAir': CentralAir,
        'Electrical': Electrical,
        'KitchenQual': KitchenQual,
        'FireplaceQu': FireplaceQu,
        'GarageType': GarageType,
        'GarageQual': GarageQual,
        'PoolQC': PoolQC,
        'Fence': Fence,
        'MiscFeature': MiscFeature,
        'LotFrontage': LotFrontage,
        'LotArea': LotArea,
        'OverallQual': OverallQual,
        'YearBuilt': YearBuilt,
        'YearRemodAdd': YearRemodAdd,
        'TotalBsmtSF': TotalBsmtSF,
        '1stFlrSF': fstFlrSF,
        '2ndFlrSF': sndFlrSF,
        'GrLivArea': GrLivArea,
        'BsmtFullBath': BsmtFullBath,
        'BsmtHalfBath': BsmtHalfBath,
        'FullBath': FullBath,
        'HalfBath': HalfBath,
        'BedroomAbvGr': Bedroom,
        'Kitchen': Kitchen,
        'TotRmsAbvGrd': TotRmsAbvGrd,
        'Fireplaces': Fireplaces,
        'GarageCars': GarageCars,
        'WoodDeckSF': WoodDeckSF,
        'OpenPorchSF': OpenPorchSF,
        'EnclosedPorch': EnclosedPorch,
        'PoolArea': PoolArea
    }
    
    features = pd.DataFrame(input_data, index=[0])

TypeError: cannot unpack non-iterable bool object

In [90]:
user_data = features
features

Unnamed: 0,MSZoning,Street,LotShape,Utilities,LotConfig,LandSlope,RoofStyle,ExterQual,Foundation,BsmtQual,...,HalfBath,BedroomAbvGr,Kitchen,TotRmsAbvGrd,Fireplaces,GarageCars,WoodDeckSF,OpenPorchSF,EnclosedPorch,PoolArea
0,A,Grvl,Reg,AllPub,Inside,Gtl,Flat,Ex,BrkTil,Ex,...,1,3,1,8,1,2,100,50,20,0


In [97]:
list(data.columns)

['LotFrontage',
 'LotArea',
 'OverallQual',
 'YearBuilt',
 'YearRemodAdd',
 'TotalBsmtSF',
 '1stFlrSF',
 '2ndFlrSF',
 'GrLivArea',
 'BsmtFullBath',
 'BsmtHalfBath',
 'FullBath',
 'HalfBath',
 'BedroomAbvGr',
 'Kitchen',
 'TotRmsAbvGrd',
 'Fireplaces',
 'GarageCars',
 'WoodDeckSF',
 'OpenPorchSF',
 'EnclosedPorch',
 'PoolArea',
 'SalePrice',
 'MSZoning_FV',
 'MSZoning_RH',
 'MSZoning_RL',
 'MSZoning_RM',
 'Street_Pave',
 'LotShape_IR2',
 'LotShape_IR3',
 'LotShape_Reg',
 'Utilities_NoSeWa',
 'LotConfig_CulDSac',
 'LotConfig_FR2',
 'LotConfig_FR3',
 'LotConfig_Inside',
 'LandSlope_Mod',
 'LandSlope_Sev',
 'RoofStyle_Gable',
 'RoofStyle_Gambrel',
 'RoofStyle_Hip',
 'RoofStyle_Mansard',
 'RoofStyle_Shed',
 'ExterQual_Fa',
 'ExterQual_Gd',
 'ExterQual_TA',
 'Foundation_CBlock',
 'Foundation_PConc',
 'Foundation_Slab',
 'Foundation_Stone',
 'Foundation_Wood',
 'BsmtQual_Fa',
 'BsmtQual_Gd',
 'BsmtQual_None',
 'BsmtQual_TA',
 'Heating_GasA',
 'Heating_GasW',
 'Heating_Grav',
 'Heating_OthW',
