In [1]:
# taken from https://www.kaggle.com/mjbahmani/a-comprehensive-ml-workflow-for-house-prices
# Adapted into the ballet framework
import ballet
import ballet.eng
from ballet import Feature
import numpy as np
import pandas as pd
import sklearn
import sklearn_pandas
from sklearn.model_selection import train_test_split

ballet.__version__

'0.4.1'

In [2]:
all_features = []

In [4]:
input = ['EnclosedPorch', '3SsnPorch', 'OpenPorchSF']
def calc_porch_type(df):    
    # Porch features
    porch_type = df['TotalPorchArea'].apply(
        lambda x: 'Missing' if x == 0 else 'Multiple')
    porch_type[(df['TotalPorchArea'] == df['EnclosedPorch'])
               & (df['EnclosedPorch'] > 0)] = 'Enclosed'
    porch_type[(df['TotalPorchArea'] == df['3SsnPorch'])
               & (df['3SsnPorch'] > 0)] = '3Ssn'
    porch_type[(df['TotalPorchArea'] == df['OpenPorchSF'])
               & (df['OpenPorchSF'] > 0)] = 'Open'
    return porch_type
transformer = ballet.eng.SimpleFunctionTransformer(func=calc_porch_type)
porch = Feature(input=input, transformer=transformer, name='Porch Type Calculation')
all_features.append(porch)

In [6]:
input = ['EnclosedPorch', '3SsnPorch', 'OpenPorchSF']
def calc_porch_area(df):
    return df['EnclosedPorch'] + df['3SsnPorch'] + df['OpenPorchSF']
transformer = ballet.eng.SimpleFunctionTransformer(func=calc_porch_area)
total_area = Feature(input=input, transformer=transformer, name='Porch Area Calculation')
all_features.append(total_area)

In [7]:
input = ['TotalBsmtSF', '1stFlrSF', '2ndFlrSF']
def add_areas(df):
    return df['TotalBsmtSF'] + df['1stFlrSF'] + df['2ndFlrSF']
transformer = ballet.eng.SimpleFunctionTransformer(func=add_areas)
total_area = Feature(input=input, transformer=transformer, name='Total Area Calculation')
all_features.append(total_area)

In [8]:
input = ['FullBath', 'HalfBath', 'BsmtFullBath', 'BsmtHalfBath']
def calc_bath(df):
    return df[FullBath] + df['HalfBath'] + df['BsmtFullBath'] + df['BsmtHalfBath']
transformer = ballet.eng.SimpleFunctionTransformer(func=calc_bath)
baths = Feature(input=input, transformer=transformer, name='Bathroom Count')
all_features.append(baths)

In [None]:
input = ['GarageArea', 'GarageCars']
def calc_garage_per_car(df):
    df['GarageAreaPerCar'] = df['GarageArea'] / df['GarageCars']
    df.loc[~np.isfinite(df['GarageAreaPerCar']), 'GarageAreaPerCar'] = 0
    return df['GarageAreaPerCar']
transformer = ballet.eng.SimpleFunctionTransformer(func=calc_garage_per_car)
cars = Feature(input=input, transformer=transformer, name='Garage Area Per Car')
all_features.append(cars)