In [1]:
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin

import data_utils

In [2]:
df_train, df_test = data_utils.train_test_split()

In [3]:
col_idx = dict((col, idx) for idx, col in enumerate(df_train.columns))
col_idx

{'longitude': 0,
 'latitude': 1,
 'housing_median_age': 2,
 'total_rooms': 3,
 'total_bedrooms': 4,
 'population': 5,
 'households': 6,
 'median_income': 7,
 'median_house_value': 8,
 'ocean_proximity': 9,
 'income_cat': 10}

In [4]:
df_train.shape

(16512, 11)

In [5]:
class CombinedAttributesAdder(BaseEstimator, TransformerMixin):
    def __init__(self, add_bedrooms_per_room=True):
        self.add_bedrooms_per_room = add_bedrooms_per_room
    
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        rooms_per_household = X[:, col_idx['total_rooms']] / X[:, col_idx['households']]
        population_per_household = X[:, col_idx['population']] / X[:, col_idx['households']]
        if self.add_bedrooms_per_room:
            bedrooms_per_room = X[:, col_idx['total_bedrooms']] / X[:, 'total_rooms']
            return np.c_[X, rooms_per_household, population_per_household, bedrooms_per_room]
        else:
            return np.c_[X, rooms_per_household, population_per_household]

In [6]:
attr_adder = CombinedAttributesAdder(add_bedrooms_per_room=False)
extra = attr_adder.transform(df_train.values)
extra.shape

(16512, 13)

In [7]:
df_extra = pd.DataFrame(
    extra, 
    columns=list(df_train.columns) + ['rooms_per_household', 'population_per_household'],
    index=df_train.index
)
df_extra.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity,income_cat,rooms_per_household,population_per_household
17606,-121.89,37.29,38,1568,351,710,339,2.7042,286600,<1H OCEAN,2,4.62537,2.0944
18632,-121.93,37.05,14,679,108,306,113,6.4214,340600,<1H OCEAN,5,6.00885,2.70796
14650,-117.2,32.77,31,1952,471,936,462,2.8621,196900,NEAR OCEAN,2,4.22511,2.02597
3230,-119.61,36.31,25,1847,371,1460,353,1.8839,46300,INLAND,2,5.23229,4.13598
3555,-118.59,34.23,17,6592,1525,4459,1463,3.0347,254500,<1H OCEAN,3,4.50581,3.04785
