In [3]:
import featuretools as ft
import pandas as pd
from datetime import datetime

In [4]:
user_df = pd.DataFrame({
    "id": [1, 2, 3, 4],
    "name": ["Diana Marshall", "Stewart Alsop", "Emma Ball", "Penelope Walsh"],
    "credit": [10, 5, 2, 1]})

transactions_df = pd.DataFrame({
    "transaction_id": [1, 2, 3, 4, 5, 6, 7, 8],
    "user_id": [1, 1, 2, 1, 2, 2, 3, 3],
    "amount": [500, 600, 300, 100, 80, 120, 30, 20],
    "transaction_time": pd.date_range('2014-01-01 08:00:50', periods=8, freq='12h')
    })

transactions_df = transactions_df.append([{
    "transaction_id": 9,
    "user_id": 1,
    "amount": 10.0,
    "transaction_time": datetime.strptime('2014-01-05 09:00:00', '%Y-%m-%d %H:%M:%S')
}], ignore_index=True)

In [5]:
user_df.head(10)

Unnamed: 0,id,name,credit
0,1,Diana Marshall,10
1,2,Stewart Alsop,5
2,3,Emma Ball,2
3,4,Penelope Walsh,1


In [6]:
transactions_df.head(10)

Unnamed: 0,transaction_id,user_id,amount,transaction_time
0,1,1,500.0,2014-01-01 08:00:50
1,2,1,600.0,2014-01-01 20:00:50
2,3,2,300.0,2014-01-02 08:00:50
3,4,1,100.0,2014-01-02 20:00:50
4,5,2,80.0,2014-01-03 08:00:50
5,6,2,120.0,2014-01-03 20:00:50
6,7,3,30.0,2014-01-04 08:00:50
7,8,3,20.0,2014-01-04 20:00:50
8,9,1,10.0,2014-01-05 09:00:00


In [7]:
es = ft.EntitySet()

In [8]:
es = es.entity_from_dataframe(entity_id="users",
                               dataframe=user_df,
                               index="id")

In [10]:
es = es.entity_from_dataframe(entity_id="transactions",
                               dataframe=transactions_df,
                               index="transaction_id",
                               time_index="transaction_time")

In [12]:
new_relationship = ft.Relationship(es["users"]["id"],
                                    es["transactions"]["user_id"])
 
es = es.add_relationship(new_relationship)

In [13]:
es

Entityset: None
  Entities:
    users [Rows: 4, Columns: 3]
    transactions [Rows: 9, Columns: 4]
  Relationships:
    transactions.user_id -> users.id

In [14]:
ct = pd.DataFrame()
ct['id'] = [1, 2, 3]

ct['time'] = pd.to_datetime(['2014-01-01 20:00:50',
                            '2014-01-02 08:00:50',
                            '2014-01-04 20:00:50'])
ct['label'] = [True, True, False]

# This demos featuretools uses information upto(<=) cutoff time

We can see users has total amount 1100, which includes transaction upto '2014-01-02 08:00:50'

In [16]:
fm, features = ft.dfs(entityset=es,
                     target_entity='users',
                     cutoff_time=ct,
                     cutoff_time_in_index=True)

In [17]:
fm

Unnamed: 0_level_0,Unnamed: 1_level_0,name,credit,SUM(transactions.amount),STD(transactions.amount),MAX(transactions.amount),SKEW(transactions.amount),MIN(transactions.amount),MEAN(transactions.amount),COUNT(transactions),NUM_UNIQUE(transactions.DAY(transaction_time)),NUM_UNIQUE(transactions.YEAR(transaction_time)),NUM_UNIQUE(transactions.MONTH(transaction_time)),NUM_UNIQUE(transactions.WEEKDAY(transaction_time)),MODE(transactions.DAY(transaction_time)),MODE(transactions.YEAR(transaction_time)),MODE(transactions.MONTH(transaction_time)),MODE(transactions.WEEKDAY(transaction_time)),label
id,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1,2014-01-01 20:00:50,Diana Marshall,10,1100.0,70.710678,600.0,,500.0,550.0,2,1,1,1,1,1,2014,1,2,True
2,2014-01-02 08:00:50,Stewart Alsop,5,300.0,,300.0,,300.0,300.0,1,1,1,1,1,2,2014,1,3,True
3,2014-01-04 20:00:50,Emma Ball,2,50.0,7.071068,30.0,,20.0,25.0,2,1,1,1,1,4,2014,1,5,False
