In [1]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [2]:
BASE_PATH = '/home/marco/ifood-case'
DATA_RAW_PATH = BASE_PATH + '/data/raw/'

# profile

In [3]:
df_profile = pd.read_json(DATA_RAW_PATH + 'profile.json')

df_profile['age'] = df_profile['age'].astype(int)
df_profile = df_profile[df_profile['age'] <= 101]
df_profile['registered_on'] = pd.to_datetime(df_profile['registered_on'], format='%Y%m%d')
df_profile['gender'] = df_profile['gender'].str.upper().fillna('O')
df_profile['id'] = df_profile['id'].str.strip()
df_profile['credit_card_limit'] = df_profile['credit_card_limit'].astype(float)
df_profile = df_profile.rename(columns={'id': 'account_id'})

# offers

In [4]:
df_offers = pd.read_json(DATA_RAW_PATH + 'offers.json')

df_offers['min_value'] = df_offers['min_value'].astype(float)
df_offers['duration'] = df_offers['duration'].astype(int)
df_offers['id'] = df_offers['id'].str.strip()
df_offers['discount_value'] = df_offers['discount_value'].astype(float)

df_offers = df_offers.rename(columns={'id': 'offer_id'})

# transactions

In [5]:
df_transactions = pd.read_json(DATA_RAW_PATH + 'transactions.json')

df_transactions['account_id'] = df_transactions['account_id'].str.strip()
df_transactions['time_since_test_start'] = df_transactions['time_since_test_start'].astype(float)

value_expanded = df_transactions['value'].apply(pd.Series)
df_transactions_s = pd.concat([df_transactions.drop(columns='value'), value_expanded], axis=1)

In [6]:
df_transaction = df_transactions_s[df_transactions_s['event'] == 'transaction']
df_offer_received = df_transactions_s[df_transactions_s['event'] == 'offer received']
df_offer_viewed = df_transactions_s[df_transactions_s['event'] == 'offer viewed']
df_offer_completed = df_transactions_s[df_transactions_s['event'] == 'offer completed']

df_transaction = df_transaction[['account_id', 'amount', 'time_since_test_start']]
df_offer_received = df_offer_received[['account_id', 'offer id', 'time_since_test_start']]
df_offer_viewed = df_offer_viewed[['account_id', 'offer id', 'time_since_test_start']]
df_offer_completed = df_offer_completed[['account_id', 'offer_id', 'reward', 'time_since_test_start']]

df_offer_received = df_offer_received.rename(columns={'offer id': 'offer_id'})
df_offer_viewed = df_offer_viewed.rename(columns={'offer id': 'offer_id'})

In [7]:
df_transaction

Unnamed: 0,account_id,amount,time_since_test_start
12654,02c083884c7d45b39cc68e1314fec56c,0.83,0.00
12657,9fa9ae8f57894cc9a3b8a9bbe0fc1b2f,34.56,0.00
12659,54890f68699049c2a04d415abc25e717,13.23,0.00
12670,b2f1cd155b864803ad8334cdf13c4bd2,19.51,0.00
12671,fe97aa22dd3e48c8b143116a8403dd52,18.97,0.00
...,...,...,...
306529,b3a1272bc9904337b331bf348c3e8c17,1.59,29.75
306530,68213b08d99a4ae1b0dcb72aebd9aa35,9.53,29.75
306531,a00058cf10334a308c68e7631c529907,3.61,29.75
306532,76ddbd6576844afe811f1a3c0fbb5bec,3.53,29.75


# Sample profile

In [8]:
profile_id_s = '0610b486422d4921ae7d2bf64640c50b'

## Filters

In [9]:
df_profile_s = df_profile[df_profile['account_id'] == profile_id_s]
df_profile_s

Unnamed: 0,age,registered_on,gender,account_id,credit_card_limit
1,55,2017-07-15,F,0610b486422d4921ae7d2bf64640c50b,112000.0


In [10]:
df_transaction_s = df_transaction[df_transaction['account_id'] == profile_id_s]
df_transaction_g = df_transaction_s.groupby('account_id')['amount'].sum()
df_transaction_g = df_transaction_g.reset_index(name='total_ammount')
df_transaction_g

Unnamed: 0,account_id,total_ammount
0,0610b486422d4921ae7d2bf64640c50b,77.01


In [11]:
df_offer_received_s = df_offer_received[df_offer_received['account_id'] == profile_id_s]
df_offer_received_g = df_offer_received_s.groupby('account_id')['offer_id'].count()
df_offer_received_g = df_offer_received_g.reset_index(name='offers_received')
df_offer_received_g

Unnamed: 0,account_id,offers_received
0,0610b486422d4921ae7d2bf64640c50b,2


In [12]:
df_offer_viewed_s = df_offer_viewed[df_offer_viewed['account_id'] == profile_id_s]
df_offer_viewed_g = df_offer_viewed_s.groupby('account_id')['offer_id'].count()
df_offer_viewed_g = df_offer_viewed_g.reset_index(name='offers_viwed')
df_offer_viewed_g

Unnamed: 0,account_id,offers_viwed


In [13]:
df_offer_completed_s = df_offer_completed[df_offer_completed['account_id'] == profile_id_s]
df_offer_completed_g = df_offer_completed_s.groupby('account_id')['offer_id'].count()
df_offer_completed_g = df_offer_completed_g.reset_index(name='offers_completed')
df_offer_completed_g

Unnamed: 0,account_id,offers_completed
0,0610b486422d4921ae7d2bf64640c50b,1


## Merge

In [14]:
df_profile_m = (
    df_profile_s
    .merge(
        df_transaction_g, 
        on=['account_id'], 
        how='left'
    )
    .merge(
        df_offer_received_g, 
        on=['account_id'], 
        how='left'
    )
    .merge(
        df_offer_viewed_g, 
        on=['account_id'], 
        how='left'
    )
    .merge(
        df_offer_completed_g, 
        on=['account_id'], 
        how='left'
    )
    .fillna(0)
)

df_profile_m

Unnamed: 0,age,registered_on,gender,account_id,credit_card_limit,total_ammount,offers_received,offers_viwed,offers_completed
0,55,2017-07-15,F,0610b486422d4921ae7d2bf64640c50b,112000.0,77.01,2,0.0,1
