### Featuretools Installation

Before Running this notebook, you would need to install featuretools in your system. 

**Use the following command:**

$ python -m pip install featuretools

### Load the Dataset

In [1]:
import pandas as pd
import featuretools as ft



In [2]:
df = pd.read_csv('datasets\\train_bm.csv')
df.shape

(8523, 12)

In [3]:
features = df.drop('Item_Outlet_Sales', axis=1)
y = df['Item_Outlet_Sales']

### Make an entityset and add the entity


In [10]:
es = ft.EntitySet(id = 'bigmart')
es

Entityset: bigmart
  Entities:
  Relationships:
    No relationships

### Add data to this entityset created

In [5]:
es.entity_from_dataframe(entity_id = 'data_1', dataframe = features, 
                         make_index = True, index = 'index')

es

Entityset: bigmart
  Entities:
    data_1 [Rows: 8523, Columns: 12]
  Relationships:
    No relationships

### Feature Engineering 

In [6]:
# Run deep feature synthesis with transformation primitives
feature_matrix, feature_defs =
ft.dfs(entityset = es, target_entity = 'data_1', 
       max_depth=1,
       trans_primitives = ['add_numeric', 'multiply_numeric'])

feature_matrix.head()

Unnamed: 0_level_0,Item_Identifier,Item_Weight,Item_Fat_Content,Item_Visibility,Item_Type,Item_MRP,Outlet_Identifier,Outlet_Establishment_Year,Outlet_Size,Outlet_Location_Type,...,Item_Visibility + Outlet_Establishment_Year,Item_MRP + Item_Weight,Item_MRP + Outlet_Establishment_Year,Item_Visibility + Item_Weight,Item_MRP * Item_Visibility,Item_Weight * Outlet_Establishment_Year,Item_Visibility * Outlet_Establishment_Year,Item_MRP * Item_Weight,Item_MRP * Outlet_Establishment_Year,Item_Visibility * Item_Weight
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,FDA15,9.3,Low Fat,0.016047,Dairy,249.8092,OUT049,1999,Medium,Tier 1,...,1999.016047,259.1092,2248.8092,9.316047,4.008763,18590.7,32.078555,2323.22556,499368.5908,0.14924
1,DRC01,5.92,Regular,0.019278,Soft Drinks,48.2692,OUT018,2009,Medium,Tier 3,...,2009.019278,54.1892,2057.2692,5.939278,0.930544,11893.28,38.729936,285.753664,96972.8228,0.114127
2,FDN15,17.5,Low Fat,0.01676,Meat,141.618,OUT049,1999,Medium,Tier 1,...,1999.01676,159.118,2140.618,17.51676,2.373528,34982.5,33.50339,2478.315,283094.382,0.293301
3,FDX07,19.2,Regular,0.0,Fruits and Vegetables,182.095,OUT010,1998,,Tier 3,...,1998.0,201.295,2180.095,19.2,0.0,38361.6,0.0,3496.224,363825.81,0.0
4,NCD19,8.93,Low Fat,0.0,Household,53.8614,OUT013,1987,High,Tier 3,...,1987.0,62.7914,2040.8614,8.93,0.0,17743.91,0.0,480.982302,107022.6018,0.0


In [7]:
feature_defs

[<Feature: Item_Identifier>,
 <Feature: Item_Weight>,
 <Feature: Item_Fat_Content>,
 <Feature: Item_Visibility>,
 <Feature: Item_Type>,
 <Feature: Item_MRP>,
 <Feature: Outlet_Identifier>,
 <Feature: Outlet_Establishment_Year>,
 <Feature: Outlet_Size>,
 <Feature: Outlet_Location_Type>,
 <Feature: Outlet_Type>,
 <Feature: Item_MRP + Item_Visibility>,
 <Feature: Item_Weight + Outlet_Establishment_Year>,
 <Feature: Item_Visibility + Outlet_Establishment_Year>,
 <Feature: Item_MRP + Item_Weight>,
 <Feature: Item_MRP + Outlet_Establishment_Year>,
 <Feature: Item_Visibility + Item_Weight>,
 <Feature: Item_MRP * Item_Visibility>,
 <Feature: Item_Weight * Outlet_Establishment_Year>,
 <Feature: Item_Visibility * Outlet_Establishment_Year>,
 <Feature: Item_MRP * Item_Weight>,
 <Feature: Item_MRP * Outlet_Establishment_Year>,
 <Feature: Item_Visibility * Item_Weight>]

In [9]:
#df.select_columns(dtype=float64)
df.select_dtypes(exclude=[object])

Unnamed: 0,Item_Weight,Item_Visibility,Item_MRP,Outlet_Establishment_Year,Item_Outlet_Sales
0,9.300,0.016047,249.8092,1999,3735.1380
1,5.920,0.019278,48.2692,2009,443.4228
2,17.500,0.016760,141.6180,1999,2097.2700
3,19.200,0.000000,182.0950,1998,732.3800
4,8.930,0.000000,53.8614,1987,994.7052
...,...,...,...,...,...
8518,6.865,0.056783,214.5218,1987,2778.3834
8519,8.380,0.046982,108.1570,2002,549.2850
8520,10.600,0.035186,85.1224,2004,1193.1136
8521,7.210,0.145221,103.1332,2009,1845.5976


In [8]:
feature_matrix.shape

(8523, 23)

In [9]:
# Run deep feature synthesis with transformation primitives
feature_matrix, feature_defs = ft.dfs(entityset = es, target_entity = 'data_1', max_depth=2,
                                      trans_primitives = ['add_numeric', 'multiply_numeric'])

feature_matrix.head()

Unnamed: 0_level_0,Item_Identifier,Item_Weight,Item_Fat_Content,Item_Visibility,Item_Type,Item_MRP,Outlet_Identifier,Outlet_Establishment_Year,Outlet_Size,Outlet_Location_Type,...,Item_MRP + Item_Weight * Item_Weight + Outlet_Establishment_Year,Item_MRP + Outlet_Establishment_Year * Item_Weight + Outlet_Establishment_Year,Item_MRP * Item_Visibility + Item_Weight,Item_MRP + Outlet_Establishment_Year * Item_Weight,Item_Visibility + Outlet_Establishment_Year * Item_Weight,Item_Weight + Outlet_Establishment_Year * Outlet_Establishment_Year,Item_Visibility * Item_Visibility + Outlet_Establishment_Year,Item_MRP + Item_Visibility * Item_Visibility + Item_Weight,Item_Visibility * Item_Visibility + Item_Weight,Item_MRP + Item_Weight * Item_Visibility + Item_Weight
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,FDA15,9.3,Low Fat,0.016047,Dairy,249.8092,OUT049,1999,Medium,Tier 1,...,520369.00636,4516284.0,2327.234323,20913.92556,18590.84924,4014591.7,32.078812,2327.383821,0.149497,2413.873563
1,DRC01,5.92,Regular,0.019278,Soft Drinks,48.2692,OUT018,2009,Medium,Tier 3,...,109186.902864,4145233.0,286.684208,12179.033664,11893.394127,4047974.28,38.730308,286.798707,0.114499,321.844735
2,FDN15,17.5,Low Fat,0.01676,Meat,141.618,OUT049,1999,Medium,Tier 1,...,320861.447,4316556.0,2480.688528,37460.815,34982.793301,4030983.5,33.503671,2480.982111,0.293582,2787.23183
3,FDX07,19.2,Regular,0.0,Fruits and Vegetables,182.095,OUT010,1998,,Tier 3,...,406052.274,4397688.0,3496.224,41857.824,38361.6,4030365.6,0.0,3496.224,0.0,3864.864
4,NCD19,8.93,Low Fat,0.0,Household,53.8614,OUT013,1987,High,Tier 3,...,125327.239002,4073416.0,480.982302,18224.892302,17743.91,3965912.91,0.0,480.982302,0.0,560.727202


In [10]:
feature_defs

[<Feature: Item_Identifier>,
 <Feature: Item_Weight>,
 <Feature: Item_Fat_Content>,
 <Feature: Item_Visibility>,
 <Feature: Item_Type>,
 <Feature: Item_MRP>,
 <Feature: Outlet_Identifier>,
 <Feature: Outlet_Establishment_Year>,
 <Feature: Outlet_Size>,
 <Feature: Outlet_Location_Type>,
 <Feature: Outlet_Type>,
 <Feature: Item_MRP + Item_Visibility>,
 <Feature: Item_Weight + Outlet_Establishment_Year>,
 <Feature: Item_Visibility + Outlet_Establishment_Year>,
 <Feature: Item_MRP + Item_Weight>,
 <Feature: Item_MRP + Outlet_Establishment_Year>,
 <Feature: Item_Visibility + Item_Weight>,
 <Feature: Item_Visibility * Outlet_Establishment_Year>,
 <Feature: Item_MRP * Item_Visibility>,
 <Feature: Item_Visibility * Item_Weight>,
 <Feature: Item_Weight * Outlet_Establishment_Year>,
 <Feature: Item_MRP * Item_Weight>,
 <Feature: Item_MRP * Outlet_Establishment_Year>,
 <Feature: Item_MRP * Item_MRP + Item_Weight>,
 <Feature: Item_MRP + Outlet_Establishment_Year * Outlet_Establishment_Year>,
 <Fea