In [1]:
# taken from https://www.kaggle.com/erikbruin/house-prices-lasso-xgboost-and-a-detailed-eda
# Adapted into the ballet framework
import ballet
import ballet.eng
from ballet import Feature
import numpy as np
import pandas as pd
import sklearn
import sklearn_pandas
from sklearn.model_selection import train_test_split

ballet.__version__

'0.4.1'

In [2]:
all_features = []

In [3]:
input = ['Alley']
transformer = [ballet.eng.missing.NullFiller(replacement="NOACCESS"), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer, name='Alley Misc Fill')
all_features.append(misc_fill)

In [4]:
input = ['MS Zoning']
def mode_filler(df):
    df = df.copy()
    return df['MS Zoning'].fillna(df['MS Zoning'].mode()[0])
transformer = [ballet.eng.SimpleFunctionTransformer(func=mode_filler), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [5]:
input = ['MS SubClass']
transformer = [ballet.eng.missing.NullFiller(replacement="None"), sklearn.preprocessing.OneHotEncoder()]
ms_fill = Feature(input=input, transformer=transformer, name='MS Fill None')
all_features.append(ms_fill)

In [6]:
input = ['Lot Frontage']
def mean_filler(df):
    df = df.copy()
    return df['Lot Frontage'].fillna(df['Lot Frontage'].mean())
transformer = ballet.eng.SimpleFunctionTransformer(func=mean_filler)
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [7]:
input = ['Overall Cond']
transformer =  sklearn.preprocessing.OneHotEncoder()
feature = Feature(input=input, transformer=transformer)
all_features.append(feature)

In [8]:
input = ['Mas Vnr Type']
def mode_filler(df):
    df = df.copy()
    return df['Mas Vnr Type'].fillna(df['Mas Vnr Type'].mode()[0])
transformer = [ballet.eng.SimpleFunctionTransformer(func=mode_filler), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [9]:
input = ['BsmtFin Type 1']
transformer = [ballet.eng.missing.NullFiller(replacement="NoBsmt"), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [10]:
input = ['BsmtFin Type 2']
transformer = [ballet.eng.missing.NullFiller(replacement="NoBsmt"), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [11]:
input = ['Bsmt Qual']
transformer = [ballet.eng.missing.NullFiller(replacement="NoBsmt"), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [12]:
input = ['Bsmt Exposure']
transformer = [ballet.eng.missing.NullFiller(replacement="NoBsmt"), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [13]:
input = ['Bsmt Cond']
transformer = [ballet.eng.missing.NullFiller(replacement="NoBsmt"), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [14]:
input = ['Total Bsmt SF']
transformer = ballet.eng.missing.NullFiller(replacement=0)
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [15]:
input = ['Electrical']
def mode_filler(df):
    df = df.copy()
    return df['Electrical'].fillna(df['Electrical'].mode()[0])
transformer = [ballet.eng.SimpleFunctionTransformer(func=mode_filler), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [16]:
input = ['Kitchen Qual']
def mode_filler(df):
    df = df.copy()
    return df['Kitchen Qual'].fillna(df['Kitchen Qual'].mode()[0])
transformer = [ballet.eng.SimpleFunctionTransformer(func=mode_filler), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [17]:
input = ['Kitchen AbvGr']
transformer = [ballet.eng.missing.NullFiller(replacement="missing"), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [18]:
input = ['Fireplace Qu']
transformer = [ballet.eng.missing.NullFiller(replacement="NoFP"), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [19]:
input = ['Garage Type']
transformer = [ballet.eng.missing.NullFiller(replacement="missing"), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [20]:
input = ['Garage Finish']
transformer = [ballet.eng.missing.NullFiller(replacement="missing"), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [21]:
input = ['Garage Qual']
transformer = [ballet.eng.missing.NullFiller(replacement="missing"), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [22]:
input = ['Sale Type']
def mode_filler(df):
    df = df.copy()
    return df['Sale Type'].fillna(df['Sale Type'].mode()[0])
transformer = [ballet.eng.SimpleFunctionTransformer(func=mode_filler), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [23]:
input = ['Yr Sold']
year = Feature(input=input, transformer=sklearn.preprocessing.OneHotEncoder(), name='Year Categorical')
all_features.append(year)

In [24]:
input = ['Mo Sold']
month = Feature(input=input, transformer=sklearn.preprocessing.OneHotEncoder(), name='Month Categorical')
all_features.append(month)

In [25]:
input = ['Condition 1']
transformer = [ballet.eng.missing.NullFiller(replacement="None"), sklearn.preprocessing.OneHotEncoder()]
factor = Feature(input=input, transformer=transformer)
all_features.append(factor)

In [26]:
input = ['Condition 2']
transformer = [ballet.eng.missing.NullFiller(replacement="None"), sklearn.preprocessing.OneHotEncoder()]
factor = Feature(input=input, transformer=transformer)
all_features.append(factor)

In [27]:
input = ['Exterior 1st']
transformer = [ballet.eng.missing.NullFiller(replacement="None"), sklearn.preprocessing.OneHotEncoder()]
factor = Feature(input=input, transformer=transformer)
all_features.append(factor)

In [28]:
input = ['Exterior 2nd']
transformer = [ballet.eng.missing.NullFiller(replacement="None"), sklearn.preprocessing.OneHotEncoder()]
factor = Feature(input=input, transformer=transformer)
all_features.append(factor)

In [29]:
input = ['Total Bsmt SF', '1st Flr SF', '2nd Flr SF']
def add_areas(df):
    return df['Total Bsmt SF'] + df['1st Flr SF'] + df['2nd Flr SF']
transformer = ballet.eng.SimpleFunctionTransformer(func=add_areas)
total_area = Feature(input=input, transformer=transformer, name='Total Area Calculation')
all_features.append(total_area)