In [2]:
# https://qiita.com/Hyperion13fleet/items/4eaca365f28049fe11c7
import featuretools as ft

In [3]:
# Loads demo data
data = ft.demo.load_mock_customer()

In [4]:
data['customers'].head()

Unnamed: 0,customer_id,zip_code,join_date,date_of_birth
0,1,60091,2011-04-17 10:48:33,1994-07-18
1,2,13244,2012-04-15 23:31:04,1986-08-18
2,3,13244,2011-08-13 15:42:34,2003-11-21
3,4,60091,2011-04-08 20:08:14,2006-08-15
4,5,60091,2010-07-17 05:27:50,1984-07-28


In [7]:
data['sessions'].head()

Unnamed: 0,session_id,customer_id,device,session_start
0,1,2,desktop,2014-01-01 00:00:00
1,2,5,mobile,2014-01-01 00:17:20
2,3,4,mobile,2014-01-01 00:28:10
3,4,1,mobile,2014-01-01 00:44:25
4,5,4,mobile,2014-01-01 01:11:30


In [8]:
data['transactions'].head()

Unnamed: 0,transaction_id,session_id,transaction_time,product_id,amount
0,298,1,2014-01-01 00:00:00,5,127.64
1,2,1,2014-01-01 00:01:05,2,109.48
2,308,1,2014-01-01 00:02:10,3,95.06
3,116,1,2014-01-01 00:03:15,4,78.92
4,371,1,2014-01-01 00:04:20,3,31.54


In [14]:
# Generates EntitySet
es = ft.EntitySet(id='demodat')

In [15]:
# Adds Entities
es.entity_from_dataframe(entity_id='cust',dataframe=data['customers'],index='customer_id')
es.entity_from_dataframe(entity_id='session',dataframe=data['sessions'], index='session_id',time_index='session_start')
es.entity_from_dataframe(entity_id='trans',dataframe=data['transactions'], index='transaction_id',time_index='transaction_time')

Entityset: demodat
  Entities:
    cust [Rows: 5, Columns: 4]
  Relationships:
    No relationships

Entityset: demodat
  Entities:
    cust [Rows: 5, Columns: 4]
    session [Rows: 35, Columns: 4]
  Relationships:
    No relationships

Entityset: demodat
  Entities:
    cust [Rows: 5, Columns: 4]
    session [Rows: 35, Columns: 4]
    trans [Rows: 500, Columns: 5]
  Relationships:
    No relationships

In [17]:
# Generates relationship
r_cust_session = ft.Relationship(es['cust']['customer_id'], es['session']['customer_id'])
r_session_trans = ft.Relationship(es['session']['session_id'], es['trans']['session_id'])

In [19]:
# Links(Adds) relationship
es.add_relationships(relationships=[r_cust_session,r_session_trans])

Entityset: demodat
  Entities:
    cust [Rows: 5, Columns: 4]
    session [Rows: 35, Columns: 4]
    trans [Rows: 500, Columns: 5]
  Relationships:
    session.customer_id -> cust.customer_id
    trans.session_id -> session.session_id

In [20]:
es['trans'].variables

[<Variable: transaction_id (dtype = index)>,
 <Variable: session_id (dtype = id)>,
 <Variable: transaction_time (dtype: datetime_time_index, format: None)>,
 <Variable: product_id (dtype = categorical)>,
 <Variable: amount (dtype = numeric)>]

In [21]:
# Defines aggregate functions
list_agg = ['sum','min','max','count']

# Defines transfer functions
list_trans = ['year','month','day']

# Run Deep Feature Synthesis
df_feature, features_defs = ft.dfs(
    entityset=es,
    target_entity='session',
    agg_primitives=list_agg,
    trans_primitives =list_trans,
    max_depth=1
)

df_feature.head()

Unnamed: 0_level_0,customer_id,device,SUM(trans.amount),MIN(trans.amount),MAX(trans.amount),COUNT(trans),YEAR(session_start),MONTH(session_start),DAY(session_start),cust.zip_code
session_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,2,desktop,1229.01,20.91,141.66,16,2014,1,1,13244
2,5,mobile,746.96,9.32,135.25,10,2014,1,1,60091
3,4,mobile,1329.0,8.7,147.73,15,2014,1,1,60091
4,1,mobile,1613.93,6.29,129.0,25,2014,1,1,60091
5,4,mobile,777.02,7.43,139.2,11,2014,1,1,60091
