## Importing the packages

In [1]:
import pandas as pd
import featuretools as ft
pd.set_option("display.max_columns", 100)

import warnings
warnings.filterwarnings(action="ignore")

## Load the data

In [2]:
data = ft.demo.load_mock_customer()

## Prepare the data

In [3]:
customers_df = data["customers"]
display(customers_df)

Unnamed: 0,customer_id,zip_code,join_date,birthday
0,1,60091,2011-04-17 10:48:33,1994-07-18
1,2,13244,2012-04-15 23:31:04,1986-08-18
2,3,13244,2011-08-13 15:42:34,2003-11-21
3,4,60091,2011-04-08 20:08:14,2006-08-15
4,5,60091,2010-07-17 05:27:50,1984-07-28


In [4]:
sessions_df = data["sessions"]
display(sessions_df.head())

Unnamed: 0,session_id,customer_id,device,session_start
0,1,2,desktop,2014-01-01 00:00:00
1,2,5,mobile,2014-01-01 00:17:20
2,3,4,mobile,2014-01-01 00:28:10
3,4,1,mobile,2014-01-01 00:44:25
4,5,4,mobile,2014-01-01 01:11:30


In [5]:
transactions_df = data["transactions"]
display(transactions_df.head())

Unnamed: 0,transaction_id,session_id,transaction_time,product_id,amount
0,298,1,2014-01-01 00:00:00,5,127.64
1,2,1,2014-01-01 00:01:05,2,109.48
2,308,1,2014-01-01 00:02:10,3,95.06
3,116,1,2014-01-01 00:03:15,4,78.92
4,371,1,2014-01-01 00:04:20,3,31.54


### Note:
* The Above data is containing the data of customer details and their transcations.
* The data is not ideal for machine learning, As same Customer can have multiple sessions and each session has its own transcations 
* FeatureTools help us in transforming this data to make it ideal for Machine learning.

First, we need to specify a dictionary with all the DataFrames in our dataset. The DataFrames are passed in with their index column and time index column if one exists for the DataFrame.

In [6]:
dataframes = {
    "customers": (customers_df, "customer_id"),
    "sessions": (sessions_df, "session_id", "session_start"),
    "transactions": (transactions_df, "transaction_id", "transaction_time"),
}

Second, we specify how the DataFrames are related. When two DataFrames have a one-to-many relationship, we call the “one” DataFrame, the “parent DataFrame”. A relationship between a parent and child is defined like this:

**(parent_dataframe, parent_dataframe_idx_column, child_dataframe, child_dataframe_idx_column)**

In [7]:
relationships = [
    ("sessions", "session_id", "transactions", "session_id"),
    ("customers", "customer_id", "sessions", "customer_id"),
]

In [8]:
feature_matrix_customers, features_defs = ft.dfs(dataframes=dataframes,
                                                 relationships=relationships,
                                                 target_dataframe_name="customers",
)

feature_matrix_customers.head(10)

Unnamed: 0_level_0,COUNT(sessions),MODE(sessions.device),NUM_UNIQUE(sessions.device),COUNT(transactions),MAX(transactions.amount),MEAN(transactions.amount),MIN(transactions.amount),MODE(transactions.product_id),NUM_UNIQUE(transactions.product_id),SKEW(transactions.amount),STD(transactions.amount),SUM(transactions.amount),DAY(birthday),DAY(join_date),MONTH(birthday),MONTH(join_date),WEEKDAY(birthday),WEEKDAY(join_date),YEAR(birthday),YEAR(join_date),MAX(sessions.COUNT(transactions)),MAX(sessions.MEAN(transactions.amount)),MAX(sessions.MIN(transactions.amount)),MAX(sessions.NUM_UNIQUE(transactions.product_id)),MAX(sessions.SKEW(transactions.amount)),MAX(sessions.STD(transactions.amount)),MAX(sessions.SUM(transactions.amount)),MEAN(sessions.COUNT(transactions)),MEAN(sessions.MAX(transactions.amount)),MEAN(sessions.MEAN(transactions.amount)),MEAN(sessions.MIN(transactions.amount)),MEAN(sessions.NUM_UNIQUE(transactions.product_id)),MEAN(sessions.SKEW(transactions.amount)),MEAN(sessions.STD(transactions.amount)),MEAN(sessions.SUM(transactions.amount)),MIN(sessions.COUNT(transactions)),MIN(sessions.MAX(transactions.amount)),MIN(sessions.MEAN(transactions.amount)),MIN(sessions.NUM_UNIQUE(transactions.product_id)),MIN(sessions.SKEW(transactions.amount)),MIN(sessions.STD(transactions.amount)),MIN(sessions.SUM(transactions.amount)),MODE(sessions.DAY(session_start)),MODE(sessions.MODE(transactions.product_id)),MODE(sessions.MONTH(session_start)),MODE(sessions.WEEKDAY(session_start)),MODE(sessions.YEAR(session_start)),NUM_UNIQUE(sessions.DAY(session_start)),NUM_UNIQUE(sessions.MODE(transactions.product_id)),NUM_UNIQUE(sessions.MONTH(session_start)),NUM_UNIQUE(sessions.WEEKDAY(session_start)),NUM_UNIQUE(sessions.YEAR(session_start)),SKEW(sessions.COUNT(transactions)),SKEW(sessions.MAX(transactions.amount)),SKEW(sessions.MEAN(transactions.amount)),SKEW(sessions.MIN(transactions.amount)),SKEW(sessions.NUM_UNIQUE(transactions.product_id)),SKEW(sessions.STD(transactions.amount)),SKEW(sessions.SUM(transactions.amount)),STD(sessions.COUNT(transactions)),STD(sessions.MAX(transactions.amount)),STD(sessions.MEAN(transactions.amount)),STD(sessions.MIN(transactions.amount)),STD(sessions.NUM_UNIQUE(transactions.product_id)),STD(sessions.SKEW(transactions.amount)),STD(sessions.SUM(transactions.amount)),SUM(sessions.MAX(transactions.amount)),SUM(sessions.MEAN(transactions.amount)),SUM(sessions.MIN(transactions.amount)),SUM(sessions.NUM_UNIQUE(transactions.product_id)),SUM(sessions.SKEW(transactions.amount)),SUM(sessions.STD(transactions.amount)),MODE(transactions.sessions.device),NUM_UNIQUE(transactions.sessions.device)
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1
1,8,mobile,3,126,139.43,71.631905,5.81,4,5,0.019698,40.442059,9025.62,18,17,7,4,0,6,1994,2011,25.0,88.755625,26.36,5.0,0.640252,46.905665,1613.93,15.75,132.24625,72.77414,9.82375,5.0,-0.059515,39.093244,1128.2025,12.0,118.9,50.623125,5.0,-1.038434,30.450261,809.97,1,4,1,2,2014,1,4,1,1,1,1.946018,-0.780493,-0.424949,2.440005,0.0,-0.312355,0.77817,4.062019,7.322191,13.759314,6.954507,0.0,0.589386,279.510713,1057.97,582.193117,78.59,40.0,-0.476122,312.745952,mobile,3
2,7,desktop,3,93,146.81,77.422366,8.73,4,5,0.098259,37.705178,7200.28,18,15,8,4,0,6,1986,2012,18.0,96.581,56.46,5.0,0.755711,47.93592,1320.64,13.285714,133.09,78.415122,22.085714,5.0,-0.039663,36.957218,1028.611429,8.0,100.04,61.91,5.0,-0.763603,27.839228,634.84,1,3,1,2,2014,1,4,1,1,1,-0.303276,-1.539467,0.235296,2.154929,0.0,0.013087,-0.440929,3.450328,17.221593,11.477071,15.874374,0.0,0.509798,251.609234,931.63,548.905851,154.6,35.0,-0.27764,258.700528,desktop,3
3,6,desktop,3,93,149.15,67.06043,5.89,1,5,0.41823,43.683296,6236.62,21,13,11,8,4,5,2003,2011,18.0,82.109444,20.06,5.0,0.854976,50.11012,1477.97,15.5,141.271667,67.539577,11.035,4.833333,0.381014,42.883316,1039.436667,11.0,126.74,55.579412,4.0,-0.289466,35.70468,889.21,1,1,1,2,2014,1,4,1,1,1,-1.507217,-0.941078,0.678544,1.000771,-2.44949,-0.245703,2.246479,2.428992,10.724241,11.174282,5.424407,0.408248,0.429374,219.02142,847.63,405.237462,66.21,29.0,2.286086,257.299895,desktop,3
4,8,mobile,3,109,149.95,80.070459,5.73,2,5,-0.036348,45.068765,8727.68,15,8,8,4,1,4,2006,2011,18.0,110.45,54.83,5.0,0.382868,54.293903,1351.46,13.625,144.74875,81.207189,16.43875,4.625,0.000346,44.515729,1090.96,10.0,139.2,70.638182,4.0,-0.711744,29.026424,771.68,1,1,1,2,2014,1,5,1,1,1,0.282488,0.027256,1.980948,2.10351,-0.644061,-1.065663,-0.391805,3.335416,3.514421,13.027258,16.960575,0.517549,0.387884,235.992478,1157.99,649.657515,131.51,37.0,0.002764,356.125829,mobile,3
5,6,mobile,3,79,149.02,80.375443,7.55,5,5,-0.025941,44.09563,6349.66,28,17,7,7,5,5,1984,2010,18.0,94.481667,20.65,5.0,0.602209,51.14925,1700.67,13.166667,139.96,78.705187,14.415,5.0,0.002397,43.312326,1058.276667,8.0,128.51,66.666667,5.0,-0.53906,36.734681,543.18,1,3,1,2,2014,1,5,1,1,1,-0.317685,-0.333796,0.335175,-0.47041,0.0,0.204548,0.472342,3.600926,7.928001,11.007471,4.961414,0.0,0.415426,402.775486,839.76,472.231119,86.49,30.0,0.014384,259.873954,mobile,3


In [9]:
type(feature_matrix_customers)

pandas.core.frame.DataFrame