### Load the Dataset

In [1]:
import pandas as pd
import featuretools as ft

In [2]:
df = pd.read_csv('Data/train_bm.csv')
df.shape

(8523, 12)

In [3]:
features = df.drop('Item_Outlet_Sales', axis=1)
y = df['Item_Outlet_Sales']

### Make an entityset and add the entity


In [4]:
es = ft.EntitySet(id = 'bigmart')
es

Entityset: bigmart
  Entities:
  Relationships:
    No relationships

### Add data to this entityset created

In [5]:
es.entity_from_dataframe(entity_id = 'data_1', dataframe = features, 
                         make_index = True, index = 'index')

es

Entityset: bigmart
  Entities:
    data_1 [Rows: 8523, Columns: 12]
  Relationships:
    No relationships

### Feature Engineering 

In [7]:
# Run deep feature synthesis with transformation primitives
feature_matrix, feature_defs = ft.dfs(entityset = es, target_entity = 'data_1', max_depth=1,
                                      trans_primitives = ['add_numeric', 'multiply_numeric'])

feature_matrix.head()

Unnamed: 0_level_0,Item_Identifier,Item_Weight,Item_Fat_Content,Item_Visibility,Item_Type,Item_MRP,Outlet_Identifier,Outlet_Establishment_Year,Outlet_Size,Outlet_Location_Type,...,Item_MRP + Item_Weight,Item_Visibility + Outlet_Establishment_Year,Item_Weight + Outlet_Establishment_Year,Item_MRP + Item_Visibility,Item_Visibility * Item_Weight,Item_MRP * Outlet_Establishment_Year,Item_MRP * Item_Weight,Item_Visibility * Outlet_Establishment_Year,Item_Weight * Outlet_Establishment_Year,Item_MRP * Item_Visibility
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,FDA15,9.3,Low Fat,0.016047,Dairy,249.8092,OUT049,1999,Medium,Tier 1,...,259.1092,1999.016047,2008.3,249.825247,0.14924,499368.5908,2323.22556,32.078555,18590.7,4.008763
1,DRC01,5.92,Regular,0.019278,Soft Drinks,48.2692,OUT018,2009,Medium,Tier 3,...,54.1892,2009.019278,2014.92,48.288478,0.114127,96972.8228,285.753664,38.729936,11893.28,0.930544
2,FDN15,17.5,Low Fat,0.01676,Meat,141.618,OUT049,1999,Medium,Tier 1,...,159.118,1999.01676,2016.5,141.63476,0.293301,283094.382,2478.315,33.50339,34982.5,2.373528
3,FDX07,19.2,Regular,0.0,Fruits and Vegetables,182.095,OUT010,1998,,Tier 3,...,201.295,1998.0,2017.2,182.095,0.0,363825.81,3496.224,0.0,38361.6,0.0
4,NCD19,8.93,Low Fat,0.0,Household,53.8614,OUT013,1987,High,Tier 3,...,62.7914,1987.0,1995.93,53.8614,0.0,107022.6018,480.982302,0.0,17743.91,0.0


In [8]:
feature_defs

[<Feature: Item_Identifier>,
 <Feature: Item_Weight>,
 <Feature: Item_Fat_Content>,
 <Feature: Item_Visibility>,
 <Feature: Item_Type>,
 <Feature: Item_MRP>,
 <Feature: Outlet_Identifier>,
 <Feature: Outlet_Establishment_Year>,
 <Feature: Outlet_Size>,
 <Feature: Outlet_Location_Type>,
 <Feature: Outlet_Type>,
 <Feature: Item_Visibility + Item_Weight>,
 <Feature: Item_MRP + Outlet_Establishment_Year>,
 <Feature: Item_MRP + Item_Weight>,
 <Feature: Item_Visibility + Outlet_Establishment_Year>,
 <Feature: Item_Weight + Outlet_Establishment_Year>,
 <Feature: Item_MRP + Item_Visibility>,
 <Feature: Item_Visibility * Item_Weight>,
 <Feature: Item_MRP * Outlet_Establishment_Year>,
 <Feature: Item_MRP * Item_Weight>,
 <Feature: Item_Visibility * Outlet_Establishment_Year>,
 <Feature: Item_Weight * Outlet_Establishment_Year>,
 <Feature: Item_MRP * Item_Visibility>]

In [9]:
feature_matrix.shape

(8523, 23)

In [10]:
# Run deep feature synthesis with transformation primitives
feature_matrix, feature_defs = ft.dfs(entityset = es, target_entity = 'data_1', max_depth=2,
                                      trans_primitives = ['add_numeric', 'multiply_numeric'])

feature_matrix.head()

Unnamed: 0_level_0,Item_Identifier,Item_Weight,Item_Fat_Content,Item_Visibility,Item_Type,Item_MRP,Outlet_Identifier,Outlet_Establishment_Year,Outlet_Size,Outlet_Location_Type,...,Item_Visibility + Outlet_Establishment_Year * Outlet_Establishment_Year,Item_MRP + Outlet_Establishment_Year * Item_Visibility + Outlet_Establishment_Year,Item_Visibility + Item_Weight * Outlet_Establishment_Year,Item_MRP + Item_Visibility * Outlet_Establishment_Year,Item_Visibility * Item_Weight + Outlet_Establishment_Year,Item_MRP * Item_MRP + Item_Weight,Item_Weight * Item_Weight + Outlet_Establishment_Year,Item_MRP + Item_Weight * Item_MRP + Outlet_Establishment_Year,Item_MRP * Item_Visibility + Item_Weight,Item_MRP * Item_MRP + Outlet_Establishment_Year
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,FDA15,9.3,Low Fat,0.016047,Dairy,249.8092,OUT049,1999,Medium,Tier 1,...,3996033.0,4495406.0,18622.778555,499400.669355,32.227795,64727.861965,18677.19,582687.152765,2327.234323,561773.227205
1,DRC01,5.92,Regular,0.019278,Soft Drinks,48.2692,OUT018,2009,Medium,Tier 3,...,4036120.0,4133093.0,11932.009936,97011.552736,38.844063,2615.669333,11928.3264,111481.772133,286.684208,99302.738469
2,FDN15,17.5,Low Fat,0.01676,Meat,141.618,OUT049,1999,Medium,Tier 1,...,3996035.0,4279131.0,35016.00339,283127.88539,33.796691,22533.972924,35288.75,340610.854924,2480.688528,303150.039924
3,FDX07,19.2,Regular,0.0,Fruits and Vegetables,182.095,OUT010,1998,,Tier 3,...,3992004.0,4355830.0,38361.6,363825.81,0.0,36654.813025,38730.24,438842.223025,3496.224,396984.399025
4,NCD19,8.93,Low Fat,0.0,Household,53.8614,OUT013,1987,High,Tier 3,...,3948169.0,4055192.0,17743.91,107022.6018,0.0,3382.032712,17823.6549,128148.544512,480.982302,109923.65221


In [11]:
feature_defs

[<Feature: Item_Identifier>,
 <Feature: Item_Weight>,
 <Feature: Item_Fat_Content>,
 <Feature: Item_Visibility>,
 <Feature: Item_Type>,
 <Feature: Item_MRP>,
 <Feature: Outlet_Identifier>,
 <Feature: Outlet_Establishment_Year>,
 <Feature: Outlet_Size>,
 <Feature: Outlet_Location_Type>,
 <Feature: Outlet_Type>,
 <Feature: Item_Visibility + Item_Weight>,
 <Feature: Item_MRP + Outlet_Establishment_Year>,
 <Feature: Item_MRP + Item_Weight>,
 <Feature: Item_Visibility + Outlet_Establishment_Year>,
 <Feature: Item_Weight + Outlet_Establishment_Year>,
 <Feature: Item_MRP + Item_Visibility>,
 <Feature: Item_MRP * Outlet_Establishment_Year>,
 <Feature: Item_Visibility * Outlet_Establishment_Year>,
 <Feature: Item_MRP * Item_Visibility>,
 <Feature: Item_MRP * Item_Weight>,
 <Feature: Item_Visibility * Item_Weight>,
 <Feature: Item_Weight * Outlet_Establishment_Year>,
 <Feature: Item_MRP + Item_Weight * Item_Visibility>,
 <Feature: Item_MRP + Item_Visibility * Item_Visibility + Outlet_Establishme

Source: www.analyticsvidhya.com