In [23]:
import featuretools as ft
import pandas as pd
from datetime import datetime

In [24]:
users = pd.DataFrame({
    "id": [1, 2, 3, 4],
    "anotherid": [1, 2, 3, 4],
    "name": ["Diana Marshall", "Stewart Alsop", "Emma Ball", "Penelope Walsh"],
    "credit": [10, 5, 2, 1]})

txns = pd.DataFrame({
    "transaction_id": [1, 2, 3, 4, 5, 6, 7, 8],
    "user_id": [1, 1, 2, 1, 2, 2, 3, 3],
    "amount": [500, 600, 300, 100, 80, 120, None, 20],
    "transaction_time": pd.date_range('2014-01-01 00:00:00', periods=8, freq='1D')
    })

In [25]:
users

Unnamed: 0,id,anotherid,name,credit
0,1,1,Diana Marshall,10
1,2,2,Stewart Alsop,5
2,3,3,Emma Ball,2
3,4,4,Penelope Walsh,1


In [26]:
txns

Unnamed: 0,transaction_id,user_id,amount,transaction_time
0,1,1,500.0,2014-01-01
1,2,1,600.0,2014-01-02
2,3,2,300.0,2014-01-03
3,4,1,100.0,2014-01-04
4,5,2,80.0,2014-01-05
5,6,2,120.0,2014-01-06
6,7,3,,2014-01-07
7,8,3,20.0,2014-01-08


In [27]:
# Create entity set
es = ft.EntitySet()

In [28]:
es = es.entity_from_dataframe(entity_id="users",
                               dataframe=users,
                               index="id")

In [29]:
es = es.entity_from_dataframe(entity_id="txns",
                               dataframe=txns,
                               index="transaction_id",
                               time_index="transaction_time")

In [30]:
# if anotherid is not primary key, it will throw exception
new_relationship = ft.Relationship(es["users"]["id"], # it will throw exception if use anotherid
                                    es["txns"]["user_id"])
 
es = es.add_relationship(new_relationship)

In [31]:
es

Entityset: None
  Entities:
    users [Rows: 4, Columns: 4]
    txns [Rows: 8, Columns: 4]
  Relationships:
    txns.user_id -> users.id

In [93]:
ct = pd.DataFrame()
ct['id'] = [1, 2, 3]

ct['time'] = pd.to_datetime(['2014-01-02',
                            '2014-01-03',
                            '2014-01-08'])
ct['label'] = [True, True, False]

In [98]:
fm, features = ft.dfs(entityset=es,
                     target_entity='txns', # change target entity to use different target.
                     cutoff_time= pd.to_datetime('2014-01-08'), # ct, #pd.to_datetime('2014-01-08'), # change cut off to observe different behaviours or use ct
                     training_window = ft.Timedelta(1,"d"),
                     cutoff_time_in_index=True,
                     max_depth = 4
                 )



In [99]:
fm

Unnamed: 0_level_0,Unnamed: 1_level_0,user_id,amount,DAY(transaction_time),YEAR(transaction_time),MONTH(transaction_time),WEEKDAY(transaction_time),users.anotherid,users.name,users.credit,users.SUM(txns.amount),...,users.MEAN(txns.amount),users.COUNT(txns),users.NUM_UNIQUE(txns.DAY(transaction_time)),users.NUM_UNIQUE(txns.YEAR(transaction_time)),users.NUM_UNIQUE(txns.MONTH(transaction_time)),users.NUM_UNIQUE(txns.WEEKDAY(transaction_time)),users.MODE(txns.DAY(transaction_time)),users.MODE(txns.YEAR(transaction_time)),users.MODE(txns.MONTH(transaction_time)),users.MODE(txns.WEEKDAY(transaction_time))
transaction_id,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,2014-01-08,,,,,,,,,,0.0,...,,0,,,,,,,,
2,2014-01-08,,,,,,,,,,0.0,...,,0,,,,,,,,
3,2014-01-08,,,,,,,,,,0.0,...,,0,,,,,,,,
4,2014-01-08,,,,,,,,,,0.0,...,,0,,,,,,,,
5,2014-01-08,,,,,,,,,,0.0,...,,0,,,,,,,,
6,2014-01-08,,,,,,,,,,0.0,...,,0,,,,,,,,
7,2014-01-08,3.0,,7.0,2014.0,1.0,1.0,3.0,Emma Ball,2.0,20.0,...,20.0,2,2.0,1.0,1.0,2.0,7.0,2014.0,1.0,1.0
8,2014-01-08,3.0,20.0,8.0,2014.0,1.0,2.0,3.0,Emma Ball,2.0,20.0,...,20.0,2,2.0,1.0,1.0,2.0,7.0,2014.0,1.0,1.0


In [100]:
features

[<Feature: user_id>,
 <Feature: amount>,
 <Feature: DAY(transaction_time)>,
 <Feature: YEAR(transaction_time)>,
 <Feature: MONTH(transaction_time)>,
 <Feature: WEEKDAY(transaction_time)>,
 <Feature: users.anotherid>,
 <Feature: users.name>,
 <Feature: users.credit>,
 <Feature: users.SUM(txns.amount)>,
 <Feature: users.STD(txns.amount)>,
 <Feature: users.MAX(txns.amount)>,
 <Feature: users.SKEW(txns.amount)>,
 <Feature: users.MIN(txns.amount)>,
 <Feature: users.MEAN(txns.amount)>,
 <Feature: users.COUNT(txns)>,
 <Feature: users.NUM_UNIQUE(txns.DAY(transaction_time))>,
 <Feature: users.NUM_UNIQUE(txns.YEAR(transaction_time))>,
 <Feature: users.NUM_UNIQUE(txns.MONTH(transaction_time))>,
 <Feature: users.NUM_UNIQUE(txns.WEEKDAY(transaction_time))>,
 <Feature: users.MODE(txns.DAY(transaction_time))>,
 <Feature: users.MODE(txns.YEAR(transaction_time))>,
 <Feature: users.MODE(txns.MONTH(transaction_time))>,
 <Feature: users.MODE(txns.WEEKDAY(transaction_time))>]

In [101]:
len(features)

24