### Featuretools Installation

Before Running this notebook, you would need to install featuretools in your system. 

**Use the following command:**

$ python -m pip install featuretools

### Load the Dataset

In [1]:
import pandas as pd
import featuretools as ft

ModuleNotFoundError: No module named 'featuretools'

In [3]:
df = pd.read_csv('train_bm.csv')
df.shape

(8523, 12)

In [4]:
features = df.drop('Item_Outlet_Sales', axis=1)
y = df['Item_Outlet_Sales']

### Make an entityset and add the entity


In [5]:
es = ft.EntitySet(id = 'bigmart')
es

Entityset: bigmart
  Entities:
  Relationships:
    No relationships

### Add data to this entityset created

In [6]:
es.entity_from_dataframe(entity_id = 'data_1', dataframe = features, 
                         make_index = True, index = 'index')

es

Entityset: bigmart
  Entities:
    data_1 [Rows: 8523, Columns: 12]
  Relationships:
    No relationships

### Feature Engineering 

In [7]:
# Run deep feature synthesis with transformation primitives
feature_matrix, feature_defs = ft.dfs(entityset = es, target_entity = 'data_1', max_depth=1,
                                      trans_primitives = ['add_numeric', 'multiply_numeric'])

feature_matrix.head()

Unnamed: 0_level_0,Item_Identifier,Item_Weight,Item_Fat_Content,Item_Visibility,Item_Type,Item_MRP,Outlet_Identifier,Outlet_Establishment_Year,Outlet_Size,Outlet_Location_Type,...,Item_MRP + Item_Visibility,Item_MRP + Outlet_Establishment_Year,Item_Weight + Outlet_Establishment_Year,Item_Visibility + Item_Weight,Item_Visibility * Outlet_Establishment_Year,Item_MRP * Item_Weight,Item_MRP * Item_Visibility,Item_MRP * Outlet_Establishment_Year,Item_Weight * Outlet_Establishment_Year,Item_Visibility * Item_Weight
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,FDA15,9.3,Low Fat,0.016047,Dairy,249.8092,OUT049,1999,Medium,Tier 1,...,249.825247,2248.8092,2008.3,9.316047,32.078555,2323.22556,4.008763,499368.5908,18590.7,0.14924
1,DRC01,5.92,Regular,0.019278,Soft Drinks,48.2692,OUT018,2009,Medium,Tier 3,...,48.288478,2057.2692,2014.92,5.939278,38.729936,285.753664,0.930544,96972.8228,11893.28,0.114127
2,FDN15,17.5,Low Fat,0.01676,Meat,141.618,OUT049,1999,Medium,Tier 1,...,141.63476,2140.618,2016.5,17.51676,33.50339,2478.315,2.373528,283094.382,34982.5,0.293301
3,FDX07,19.2,Regular,0.0,Fruits and Vegetables,182.095,OUT010,1998,,Tier 3,...,182.095,2180.095,2017.2,19.2,0.0,3496.224,0.0,363825.81,38361.6,0.0
4,NCD19,8.93,Low Fat,0.0,Household,53.8614,OUT013,1987,High,Tier 3,...,53.8614,2040.8614,1995.93,8.93,0.0,480.982302,0.0,107022.6018,17743.91,0.0


In [8]:
feature_defs

[<Feature: Item_Identifier>,
 <Feature: Item_Weight>,
 <Feature: Item_Fat_Content>,
 <Feature: Item_Visibility>,
 <Feature: Item_Type>,
 <Feature: Item_MRP>,
 <Feature: Outlet_Identifier>,
 <Feature: Outlet_Establishment_Year>,
 <Feature: Outlet_Size>,
 <Feature: Outlet_Location_Type>,
 <Feature: Outlet_Type>,
 <Feature: Item_Visibility + Outlet_Establishment_Year>,
 <Feature: Item_MRP + Item_Weight>,
 <Feature: Item_MRP + Item_Visibility>,
 <Feature: Item_MRP + Outlet_Establishment_Year>,
 <Feature: Item_Weight + Outlet_Establishment_Year>,
 <Feature: Item_Visibility + Item_Weight>,
 <Feature: Item_Visibility * Outlet_Establishment_Year>,
 <Feature: Item_MRP * Item_Weight>,
 <Feature: Item_MRP * Item_Visibility>,
 <Feature: Item_MRP * Outlet_Establishment_Year>,
 <Feature: Item_Weight * Outlet_Establishment_Year>,
 <Feature: Item_Visibility * Item_Weight>]

In [9]:
feature_matrix.shape

(8523, 23)

In [10]:
# Run deep feature synthesis with transformation primitives
feature_matrix, feature_defs = ft.dfs(entityset = es, target_entity = 'data_1', max_depth=2,
                                      trans_primitives = ['add_numeric', 'multiply_numeric'])

feature_matrix.head()

Unnamed: 0_level_0,Item_Identifier,Item_Weight,Item_Fat_Content,Item_Visibility,Item_Type,Item_MRP,Outlet_Identifier,Outlet_Establishment_Year,Outlet_Size,Outlet_Location_Type,...,Item_MRP + Item_Visibility * Item_Visibility,Item_MRP * Item_MRP + Item_Visibility,Item_Visibility * Item_Weight + Outlet_Establishment_Year,Item_MRP + Item_Visibility * Item_MRP + Item_Weight,Item_MRP + Item_Weight * Item_Weight + Outlet_Establishment_Year,Item_MRP * Item_Visibility + Item_Weight,Item_Visibility + Outlet_Establishment_Year * Item_Weight + Outlet_Establishment_Year,Item_Weight + Outlet_Establishment_Year * Outlet_Establishment_Year,Item_MRP * Item_MRP + Outlet_Establishment_Year,Item_MRP + Item_Visibility * Item_Weight + Outlet_Establishment_Year
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,FDA15,9.3,Low Fat,0.016047,Dairy,249.8092,OUT049,1999,Medium,Tier 1,...,4.009021,62408.645168,32.227795,64732.019968,520369.00636,2327.234323,4014624.0,4014591.7,561773.227205,501724.044155
1,DRC01,5.92,Regular,0.019278,Soft Drinks,48.2692,OUT018,2009,Medium,Tier 3,...,0.930916,2330.846213,38.844063,2616.714004,109186.902864,286.684208,4048013.0,4047974.28,99302.738469,97297.420527
2,FDN15,17.5,Low Fat,0.01676,Meat,141.618,OUT049,1999,Medium,Tier 1,...,2.373809,20058.031452,33.796691,22536.639754,320861.447,2480.688528,4031017.0,4030983.5,303150.039924,285606.493691
3,FDX07,19.2,Regular,0.0,Fruits and Vegetables,182.095,OUT010,1998,,Tier 3,...,0.0,33158.589025,0.0,36654.813025,406052.274,3496.224,4030366.0,4030365.6,396984.399025,367322.034
4,NCD19,8.93,Low Fat,0.0,Household,53.8614,OUT013,1987,High,Tier 3,...,0.0,2901.05041,0.0,3382.032712,125327.239002,480.982302,3965913.0,3965912.91,109923.65221,107503.584102


In [11]:
feature_defs

[<Feature: Item_Identifier>,
 <Feature: Item_Weight>,
 <Feature: Item_Fat_Content>,
 <Feature: Item_Visibility>,
 <Feature: Item_Type>,
 <Feature: Item_MRP>,
 <Feature: Outlet_Identifier>,
 <Feature: Outlet_Establishment_Year>,
 <Feature: Outlet_Size>,
 <Feature: Outlet_Location_Type>,
 <Feature: Outlet_Type>,
 <Feature: Item_Visibility + Outlet_Establishment_Year>,
 <Feature: Item_MRP + Item_Weight>,
 <Feature: Item_MRP + Item_Visibility>,
 <Feature: Item_MRP + Outlet_Establishment_Year>,
 <Feature: Item_Weight + Outlet_Establishment_Year>,
 <Feature: Item_Visibility + Item_Weight>,
 <Feature: Item_MRP * Item_Visibility>,
 <Feature: Item_Visibility * Outlet_Establishment_Year>,
 <Feature: Item_Weight * Outlet_Establishment_Year>,
 <Feature: Item_Visibility * Item_Weight>,
 <Feature: Item_MRP * Item_Weight>,
 <Feature: Item_MRP * Outlet_Establishment_Year>,
 <Feature: Item_Visibility + Outlet_Establishment_Year * Outlet_Establishment_Year>,
 <Feature: Item_MRP + Outlet_Establishment_Ye