In [1]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [2]:
BASE_PATH = '/home/marco/ifood-case'
DATA_RAW_PATH = BASE_PATH + '/data/raw/'

# profile

In [3]:
df_profile = pd.read_json(DATA_RAW_PATH + 'profile.json')

df_profile['age'] = df_profile['age'].astype(int)
df_profile = df_profile[df_profile['age'] <= 101]
df_profile['registered_on'] = pd.to_datetime(df_profile['registered_on'], format='%Y%m%d')
df_profile['gender'] = df_profile['gender'].str.upper().fillna('O')
df_profile['id'] = df_profile['id'].str.strip()
df_profile['credit_card_limit'] = df_profile['credit_card_limit'].astype(float)
df_profile = df_profile.rename(columns={'id': 'account_id'})

# offers

In [4]:
df_offers = pd.read_json(DATA_RAW_PATH + 'offers.json')

df_offers['min_value'] = df_offers['min_value'].astype(float)
df_offers['duration'] = df_offers['duration'].astype(int)
df_offers['id'] = df_offers['id'].str.strip()
df_offers['discount_value'] = df_offers['discount_value'].astype(float)

df_offers = df_offers.rename(columns={'id': 'offer_id'})

# transactions

In [5]:
df_transactions = pd.read_json(DATA_RAW_PATH + 'transactions.json')

df_transactions['account_id'] = df_transactions['account_id'].str.strip()
df_transactions['time_since_test_start'] = df_transactions['time_since_test_start'].astype(float)

value_expanded = df_transactions['value'].apply(pd.Series)
df_transactions_s = pd.concat([df_transactions.drop(columns='value'), value_expanded], axis=1)

In [6]:
df_transaction = df_transactions_s[df_transactions_s['event'] == 'transaction']
df_offer_received = df_transactions_s[df_transactions_s['event'] == 'offer received']
df_offer_completed = df_transactions_s[df_transactions_s['event'] == 'offer completed']

df_transaction = df_transaction[['account_id', 'amount', 'time_since_test_start']]
df_offer_received = df_offer_received[['account_id', 'offer id', 'time_since_test_start']]
df_offer_completed = df_offer_completed[['account_id', 'offer_id', 'reward', 'time_since_test_start']]

df_transaction = df_transaction.rename(columns={
    'time_since_test_start': 'transaction_time',
})
df_offer_received = df_offer_received.rename(columns={
    'offer id': 'offer_id',
    'time_since_test_start': 'received_time',
})
df_offer_completed = df_offer_completed.rename(columns={
    'time_since_test_start': 'transaction_time',
})

## Merge

In [7]:
df_transaction_m = (
    df_transaction
    .merge(
        df_offer_completed, 
        on=['account_id', 'transaction_time'], 
        how='left'
    )
    .merge(
        df_offer_received, 
        on=['account_id', 'offer_id'], 
        how='outer'
    )
    .merge(
        df_offers,
        on='offer_id',
        how='left'
    )
    .merge(
        df_profile,
        on='account_id',
        how='inner'
    )
)

df_transaction_m = df_transaction_m[df_transaction_m['offer_type'].isin(['discount', 'bogo'])]
df_transaction_m['offer_type'] = df_transaction_m['offer_type'].fillna('no_offer')
df_transaction_m['offer_conv_days'] = df_transaction_m['transaction_time'] - df_transaction_m['received_time']
df_transaction_m['actual_date'] = pd.to_datetime('2019-01-01')
df_transaction_m['registered_days'] = (df_transaction_m['actual_date'] - df_transaction_m['registered_on']).dt.days

df_transaction_m = df_transaction_m.drop([
    'channels',
    'discount_value',
    'actual_date',
    'registered_on'
], axis=1).drop_duplicates()

df_transaction_m

Unnamed: 0,account_id,amount,transaction_time,offer_id,reward,received_time,min_value,duration,offer_type,age,gender,credit_card_limit,offer_conv_days,registered_days
0,0009655768c64bdeb2e877511632db8f,10.27,24.00,2906b810c7d4411798c6938adc9daaa5,2.0,24.0,10.0,7.0,discount,33,M,72000.0,0.00,620
3,0009655768c64bdeb2e877511632db8f,8.57,17.25,f19421c1d4aa40978ebb69ca19b0e20d,5.0,17.0,5.0,5.0,bogo,33,M,72000.0,0.25,620
4,0009655768c64bdeb2e877511632db8f,14.11,22.00,fafdcd668e3743c1bb461111dcafc2a4,2.0,21.0,10.0,10.0,discount,33,M,72000.0,1.00,620
10,0011e0d4e6b944f998e987f904e8c1e5,22.05,24.00,0b1e1539f2cc45b7b9fa7c272da2e1d7,5.0,17.0,20.0,10.0,discount,40,O,57000.0,7.00,357
11,0011e0d4e6b944f998e987f904e8c1e5,11.93,10.50,2298d6c36e964ae4a3e7e9706d1fb8c2,3.0,7.0,7.0,7.0,discount,40,O,57000.0,3.50,357
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
170831,ffff82501cea40309d5fdd7edcca4a07,14.23,24.00,2906b810c7d4411798c6938adc9daaa5,2.0,14.0,10.0,7.0,discount,45,F,62000.0,10.00,767
170832,ffff82501cea40309d5fdd7edcca4a07,14.23,24.00,2906b810c7d4411798c6938adc9daaa5,2.0,17.0,10.0,7.0,discount,45,F,62000.0,7.00,767
170833,ffff82501cea40309d5fdd7edcca4a07,14.23,24.00,2906b810c7d4411798c6938adc9daaa5,2.0,24.0,10.0,7.0,discount,45,F,62000.0,0.00,767
170834,ffff82501cea40309d5fdd7edcca4a07,7.79,21.00,9b98b8c7a33c4b65b9aebfe6a799e6d9,5.0,21.0,5.0,7.0,bogo,45,F,62000.0,0.00,767


In [8]:
df_transaction_m[(df_transaction_m['offer_conv_days'].notnull()) &
                 (df_transaction_m['offer_conv_days'] > df_transaction_m['duration'])]

Unnamed: 0,account_id,amount,transaction_time,offer_id,reward,received_time,min_value,duration,offer_type,age,gender,credit_card_limit,offer_conv_days,registered_days
23,0020c2b971eb4e9188eac86d93036a77,17.24,21.25,fafdcd668e3743c1bb461111dcafc2a4,2.0,0.0,10.0,10.0,discount,59,F,90000.0,21.25,1033
49,003d66b6608740288d6cc97a6903f4f0,5.01,21.00,fafdcd668e3743c1bb461111dcafc2a4,2.0,7.0,10.0,10.0,discount,26,F,73000.0,14.00,559
97,004c5799adbf42868b9cff0396190900,35.91,23.25,f19421c1d4aa40978ebb69ca19b0e20d,5.0,17.0,5.0,5.0,bogo,54,M,99000.0,6.25,1006
101,004c5799adbf42868b9cff0396190900,33.67,24.00,fafdcd668e3743c1bb461111dcafc2a4,2.0,7.0,10.0,10.0,discount,54,M,99000.0,17.00,1006
126,00715b6e55c3431cb56ff7307eb19675,21.59,27.75,0b1e1539f2cc45b7b9fa7c272da2e1d7,5.0,7.0,20.0,10.0,discount,58,F,119000.0,20.75,390
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
170761,fff29fb549084123bd046dbc5ceb4faa,13.72,21.50,4d5c57ea9a6940dd891ad53e9dbe8da0,10.0,14.0,10.0,5.0,bogo,59,F,93000.0,7.50,488
170765,fff29fb549084123bd046dbc5ceb4faa,39.59,28.50,ae264e3637204a6fb9bb56bc8210ddfd,10.0,0.0,10.0,7.0,bogo,59,F,93000.0,28.50,488
170795,fff7576017104bcc8677a8d63322b5e1,8.01,24.75,fafdcd668e3743c1bb461111dcafc2a4,2.0,0.0,10.0,10.0,discount,71,M,73000.0,24.75,427
170813,fffad4f4828548d1b5583907f2e9906b,12.18,21.50,f19421c1d4aa40978ebb69ca19b0e20d,5.0,0.0,5.0,5.0,bogo,34,M,34000.0,21.50,708


In [9]:
df_transaction_m[df_transaction_m['transaction_time'].isnull()]


Unnamed: 0,account_id,amount,transaction_time,offer_id,reward,received_time,min_value,duration,offer_type,age,gender,credit_card_limit,offer_conv_days,registered_days
20,0020c2b971eb4e9188eac86d93036a77,,,ae264e3637204a6fb9bb56bc8210ddfd,,7.0,10.0,7.0,bogo,59,F,90000.0,,1033
66,00426fe3ffde4c6b9cb9ad6d077a13ea,,,0b1e1539f2cc45b7b9fa7c272da2e1d7,,14.0,20.0,10.0,discount,19,F,65000.0,,875
67,00426fe3ffde4c6b9cb9ad6d077a13ea,,,2906b810c7d4411798c6938adc9daaa5,,17.0,10.0,7.0,discount,19,F,65000.0,,875
68,00426fe3ffde4c6b9cb9ad6d077a13ea,,,2906b810c7d4411798c6938adc9daaa5,,24.0,10.0,7.0,discount,19,F,65000.0,,875
110,005500a7188546ff8a767329a2f7c76a,,,2906b810c7d4411798c6938adc9daaa5,,17.0,10.0,7.0,discount,56,M,47000.0,,388
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
170752,fff0f0aac6c547b9b263080f09a5586a,,,4d5c57ea9a6940dd891ad53e9dbe8da0,,14.0,10.0,5.0,bogo,67,M,80000.0,,565
170790,fff7576017104bcc8677a8d63322b5e1,,,4d5c57ea9a6940dd891ad53e9dbe8da0,,7.0,10.0,5.0,bogo,71,M,73000.0,,427
170792,fff7576017104bcc8677a8d63322b5e1,,,ae264e3637204a6fb9bb56bc8210ddfd,,17.0,10.0,7.0,bogo,71,M,73000.0,,427
170802,fff8957ea8b240a6b5e634b6ee8eafcf,,,4d5c57ea9a6940dd891ad53e9dbe8da0,,24.0,10.0,5.0,bogo,71,M,56000.0,,317
