In [45]:
import featuretools as ft
import pandas as pd
from datetime import datetime

In [46]:
transactions_df = pd.DataFrame({"transaction_id": ["001", "002", "003", "004", "005", "006", "007", "008"],
                                "user_id": [1, 2, 1, 3, 2, 5, 5, 2],
                                "amount": [100.40, 20.63, 33.32, 13.12, 67.22, 1.00, 3.00, 20.99],
                                "transaction_time": pd.date_range(start="2014-01-01 08:00:50", periods=8, freq="12h"),
                                "name": ["Diana Marshall", "Stewart Alsop", "Diana Marshall", "Emma Ball", "Stewart Alsop", "Penelope Walsh", "Penelope Walsh", "Stewart Alsop"],
                                "user_credit": [810, 730, 810, 690, 730, 520, 520, 730]})

In [47]:
transactions_df

Unnamed: 0,transaction_id,user_id,amount,transaction_time,name,user_credit
0,1,1,100.4,2014-01-01 08:00:50,Diana Marshall,810
1,2,2,20.63,2014-01-01 20:00:50,Stewart Alsop,730
2,3,1,33.32,2014-01-02 08:00:50,Diana Marshall,810
3,4,3,13.12,2014-01-02 20:00:50,Emma Ball,690
4,5,2,67.22,2014-01-03 08:00:50,Stewart Alsop,730
5,6,5,1.0,2014-01-03 20:00:50,Penelope Walsh,520
6,7,5,3.0,2014-01-04 08:00:50,Penelope Walsh,520
7,8,2,20.99,2014-01-04 20:00:50,Stewart Alsop,730


In [61]:
transactions_df.dtypes

transaction_id              object
user_id                      int64
amount                     float64
transaction_time    datetime64[ns]
name                        object
user_credit                  int64
dtype: object

In [53]:
es = ft.EntitySet()

In [54]:
es = es.entity_from_dataframe(entity_id="transactions",
                               dataframe=transactions_df,
                               index="transaction_id",
                               time_index="transaction_time")

In [55]:
es = es.normalize_entity(base_entity_id="transactions",
                          new_entity_id="users",
                          index="user_id",
                          make_time_index=False,
                          additional_variables=["name", "user_credit"])

In [56]:
ct = pd.DataFrame()
ct['user_id'] = [1, 2, 3, 5]

ct['time'] = pd.to_datetime(['2014-01-02 08:00:50',
                            '2014-01-02 08:00:50',
                            '2014-01-02 20:00:50',
                            '2014-01-03 12:00:50'])
ct['label'] = [False, False, False, True]

# This demos single entity normalize functionality


In [59]:
# fm, features = ft.dfs(entityset=es,
#                      target_entity='transactions',
#                       trans_primitives=["year", "month", "weekday", "percentile"],
#                       max_depth=1)

fm, features = ft.dfs(entityset=es,
                     target_entity='users',
                     trans_primitives=["year", "month", "weekday", "percentile"],
                     cutoff_time=ct,
                     # max_depth=1,
                     cutoff_time_in_index=True)

In [60]:
fm

Unnamed: 0_level_0,Unnamed: 1_level_0,name,user_credit,SUM(transactions.amount),STD(transactions.amount),MAX(transactions.amount),SKEW(transactions.amount),MIN(transactions.amount),MEAN(transactions.amount),COUNT(transactions),PERCENTILE(user_credit),...,MODE(transactions.MONTH(transaction_time)),MODE(transactions.WEEKDAY(transaction_time)),PERCENTILE(SUM(transactions.amount)),PERCENTILE(STD(transactions.amount)),PERCENTILE(MAX(transactions.amount)),PERCENTILE(SKEW(transactions.amount)),PERCENTILE(MIN(transactions.amount)),PERCENTILE(MEAN(transactions.amount)),PERCENTILE(COUNT(transactions)),label
user_id,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,2014-01-02 08:00:50,Diana Marshall,810,133.72,47.432723,100.4,,33.32,66.86,2.0,1.0,...,1.0,2.0,1.0,1.0,1.0,,1.0,1.0,1.0,False
2,2014-01-02 08:00:50,Stewart Alsop,730,20.63,,20.63,,20.63,20.63,1.0,0.75,...,1.0,2.0,0.75,,0.5,,0.5,0.5,0.75,False
3,2014-01-02 20:00:50,Emma Ball,690,13.12,,13.12,,13.12,13.12,1.0,0.5,...,1.0,3.0,0.5,,0.333333,,0.333333,0.333333,0.625,False
5,2014-01-03 12:00:50,Penelope Walsh,520,0.0,,,,,,0.0,0.25,...,,,0.25,,,,,,0.25,True
