# Feature Engineering (Optional Local)
Mirror what Calculated Insights will compute in Data Cloud.


In [0]:
import pandas as pd
from pathlib import Path
base = Path('../data')
customers = pd.read_csv(base/'customers.csv')
accounts = pd.read_csv(base/'accounts.csv')
transactions = pd.read_csv(base/'transactions.csv', parse_dates=['txn_date'])

recent = transactions[transactions['txn_date'] >= transactions['txn_date'].max() - pd.Timedelta(days=90)]
fees = recent.groupby('account_id')['fee_flag'].sum().rename('fees_90d')
spend = recent.groupby('account_id')['amount'].apply(lambda s: s.abs().sum()).rename('spend_90d')
acct = accounts.join(fees, on='account_id').join(spend, on='account_id').fillna(0)
cust_feat = acct.groupby('customer_id').agg({'account_id':'nunique','fees_90d':'sum','spend_90d':'sum','balance':'mean'})
cust_feat = cust_feat.rename(columns={'account_id':'product_count','balance':'avg_balance'}).reset_index()
features = customers.merge(cust_feat, on='customer_id', how='left').fillna({'product_count':0,'fees_90d':0,'spend_90d':0,'avg_balance':0})
features.to_csv('../ed/training_dataset.csv', index=False)
print('Feature table written to ../ed/training_dataset.csv')
