In [1]:
# taken from https://www.kaggle.com/agehsbarg/top-10-0-10943-stacking-mice-and-brutal-force
# Adapted into the ballet framework
import ballet
import ballet.eng
from ballet import Feature
import numpy as np
import pandas as pd
import sklearn
import sklearn_pandas
from sklearn.model_selection import train_test_split

ballet.__version__

'0.4.1'

In [2]:
all_features = []

In [3]:
input = ['Total Bsmt SF', '1st Flr SF', '2nd Flr SF']
def add_areas(df):
    return df['Total Bsmt SF'] + df['1st Flr SF'] + df['2nd Flr SF']
transformer = ballet.eng.SimpleFunctionTransformer(func=add_areas)
total_area = Feature(input=input, transformer=transformer, name='Total Area Calculation')
all_features.append(total_area)

In [4]:
input = ['Yr Sold', 'Year Remod/Add']
def calc_age(df):
    return df['Yr Sold'] - df['Year/Remod Add']
transformer = ballet.eng.SimpleFunctionTransformer(func=calc_age)
age = Feature(input=input, transformer=transformer, name='Age')
all_features.append(age)

In [5]:
input = ['Overall Qual', 'Overall Cond']
def calc_qual(df):
    return df['Overall Qual'] - df['Overall Cond']
transformer = ballet.eng.SimpleFunctionTransformer(func=calc_qual)
qual = Feature(input=input, transformer=transformer, name='Qual')
all_features.append(qual)

In [6]:
def has_qual(df):
    return (df['Open Porch SF'] != 0) * 1
input = ['Open Porch SF']
transformer = ballet.eng.SimpleFunctionTransformer(func=has_qual)
qual = Feature(input=input, transformer=transformer)
all_features.append(qual)

In [7]:
def has_qual(df):
    return (df['Enclosed Porch'] != 0) * 1
input = ['Enclosed Porch']
transformer = ballet.eng.SimpleFunctionTransformer(func=has_qual)
qual = Feature(input=input, transformer=transformer)
all_features.append(qual)

In [8]:
def has_qual(df):
    return (df['3Ssn Porch'] != 0) * 1
input = ['3Ssn Porch']
transformer = ballet.eng.SimpleFunctionTransformer(func=has_qual)
qual = Feature(input=input, transformer=transformer)
all_features.append(qual)

In [9]:
def has_qual(df):
    return (df['Screen Porch'] != 0) * 1
input = ['Screen Porch']
transformer = ballet.eng.SimpleFunctionTransformer(func=has_qual)
qual = Feature(input=input, transformer=transformer)
all_features.append(qual)

In [10]:
input = ['Alley']
transformer = [ballet.eng.missing.NullFiller(replacement="None", isnull=pd.isnull), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer, name='Alley Misc Fill')
all_features.append(misc_fill)

In [11]:
input = ['Land Slope']
transformer = [ballet.eng.missing.NullFiller(replacement="None", isnull=pd.isnull), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [12]:
input = ['Mas Vnr Type']
transformer = [ballet.eng.missing.NullFiller(replacement="None", isnull=pd.isnull), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [13]:
input = ['Garage Type']
transformer = [ballet.eng.missing.NullFiller(replacement="missing", isnull=pd.isnull), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [14]:
input = ['Garage Cond']
transformer = [ballet.eng.missing.NullFiller(replacement="missing", isnull=pd.isnull), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [15]:
input = ['Fence']
transformer = [ballet.eng.missing.NullFiller(replacement="missing", isnull=pd.isnull), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [16]:
input = ['Street']
transformer = [ballet.eng.missing.NullFiller(replacement="missing", isnull=pd.isnull), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [17]:
input = ['Lot Shape']
transformer = [ballet.eng.missing.NullFiller(replacement="missing", isnull=pd.isnull), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [18]:
input = ['Land Contour']
transformer = [ballet.eng.missing.NullFiller(replacement="missing", isnull=pd.isnull), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [19]:
input = ['BsmtFin Type 1']
transformer = [ballet.eng.missing.NullFiller(replacement="missing", isnull=pd.isnull), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [20]:
input = ['BsmtFin Type 2']
transformer = [ballet.eng.missing.NullFiller(replacement="missing", isnull=pd.isnull), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [21]:
input = ['Central Air']
transformer = [ballet.eng.missing.NullFiller(replacement="missing", isnull=pd.isnull), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [22]:
input = ['Misc Feature']
transformer = [ballet.eng.missing.NullFiller(replacement="missing", isnull=pd.isnull), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [23]:
input = ['Utilities']
transformer = [ballet.eng.missing.NullFiller(replacement="missing", isnull=pd.isnull), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [24]:
input = ['Functional']
transformer = [ballet.eng.missing.NullFiller(replacement="Typ", isnull=pd.isnull), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [25]:
input = ['Sale Condition']
transformer = [ballet.eng.missing.NullFiller(replacement="Typ", isnull=pd.isnull), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [26]:
input = ['Pool QC']
def mode_filler(df):
    df = df.copy()
    return df['Pool QC'].fillna(df['Pool QC'].mode()[0])
transformer = [ballet.eng.SimpleFunctionTransformer(func=mode_filler), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [27]:
input = ['Bsmt Qual']
def mode_filler(df):
    df = df.copy()
    return df['Bsmt Qual'].fillna(df['Bsmt Qual'].mode()[0])
transformer = [ballet.eng.SimpleFunctionTransformer(func=mode_filler), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [28]:
input = ['Bsmt Cond']
def mode_filler(df):
    df = df.copy()
    return df['Bsmt Cond'].fillna(df['Bsmt Cond'].mode()[0])
transformer = [ballet.eng.SimpleFunctionTransformer(func=mode_filler), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [29]:
input = ['Fireplace Qu']
def mode_filler(df):
    df = df.copy()
    return df['Fireplace Qu'].fillna(df['Fireplace Qu'].mode()[0])
transformer = [ballet.eng.SimpleFunctionTransformer(func=mode_filler), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [30]:
input = ['Garage Finish']
def mode_filler(df):
    df = df.copy()
    return df['Garage Finish'].fillna(df['Garage Finish'].mode()[0])
transformer = [ballet.eng.SimpleFunctionTransformer(func=mode_filler), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [31]:
input = ['Garage Qual']
def mode_filler(df):
    df = df.copy()
    return df['Garage Qual'].fillna(df['Garage Qual'].mode()[0])
transformer = [ballet.eng.SimpleFunctionTransformer(func=mode_filler), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [32]:
input = ['Bsmt Exposure']
def mode_filler(df):
    df = df.copy()
    return df['Bsmt Exposure'].fillna(df['Bsmt Exposure'].mode()[0])
transformer = [ballet.eng.SimpleFunctionTransformer(func=mode_filler), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [33]:
input = ['Electrical']
def mode_filler(df):
    df = df.copy()
    return df['Electrical'].fillna(df['Electrical'].mode()[0])
transformer = [ballet.eng.SimpleFunctionTransformer(func=mode_filler), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [34]:
input = ['MS Zoning']
def mode_filler(df):
    df = df.copy()
    return df['MS Zoning'].fillna(df['MS Zoning'].mode()[0])
transformer = [ballet.eng.SimpleFunctionTransformer(func=mode_filler), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [35]:
input = ['Exterior 1st']
def mode_filler(df):
    df = df.copy()
    return df['Exterior 1st'].fillna(df['Exterior 1st'].mode()[0])
transformer = [ballet.eng.SimpleFunctionTransformer(func=mode_filler), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [36]:
input = ['Exterior 2nd']
def mode_filler(df):
    df = df.copy()
    return df['Exterior 2nd'].fillna(df['Exterior 2nd'].mode()[0])
transformer = [ballet.eng.SimpleFunctionTransformer(func=mode_filler), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [37]:
input = ['Kitchen Qual']
def mode_filler(df):
    df = df.copy()
    return df['Kitchen Qual'].fillna(df['Kitchen Qual'].mode()[0])
transformer = [ballet.eng.SimpleFunctionTransformer(func=mode_filler), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [38]:
input = ['Sale Type']
def mode_filler(df):
    df = df.copy()
    return df['Sale Type'].fillna(df['Sale Type'].mode()[0])
transformer = [ballet.eng.SimpleFunctionTransformer(func=mode_filler), sklearn.preprocessing.OneHotEncoder()]
misc_fill = Feature(input=input, transformer=transformer)
all_features.append(misc_fill)

In [39]:
input = ['Yr Sold']
year = Feature(input=input, transformer=sklearn.preprocessing.OneHotEncoder(), name='Year Categorical')
all_features.append(year)

In [40]:
input = ['Mo Sold']
month = Feature(input=input, transformer=sklearn.preprocessing.OneHotEncoder(), name='Month Categorical')
all_features.append(month)

In [41]:
input = ['MS SubClass']
transformer = [ballet.eng.missing.NullFiller(replacement="None"), sklearn.preprocessing.OneHotEncoder()]
ms_fill = Feature(input=input, transformer=transformer, name='MS Fill None')
all_features.append(ms_fill)

In [42]:
def load_ames():
    '''Loads the Ames Housing dataset
    
    Source:
    
        Decock, Dean. "Ames, Iowa: Alternative to the Boston Housing Data as an 
        End of Semester Regression Project."
        <https://ww2.amstat.org/publications/jse/v19n3/decock.pdf>
    '''
    source = 'https://s3.amazonaws.com/mit-dai-ballet/ames/AmesHousing.txt'
    df = pd.read_table(source)
    X = df.drop('SalePrice', axis=1)
    y = df['SalePrice']
    return X, y

In [43]:
X, y = load_ames()
# X.columns.values.tolist()

In [44]:
X_tr, X_te, y_tr, y_te = train_test_split(X, y, random_state=3)

In [45]:
mapper = ballet.make_mapper(all_features)
mapper.fit(X_tr, y_tr)

KeyError: "['Pool QC']: Pool QC"

In [None]:
mapper.transform(X_tr)