In [1]:
import pandas as pd

In [2]:
def create_dummy_columns(df, column_name):
    return pd.get_dummies(df, columns=[column_name], prefix=column_name)

## Profile

In [3]:
profile = pd.read_json('../data/profile.json', orient='records', lines=True)
profile.head()

Unnamed: 0,gender,age,id,became_member_on,income
0,,118,68be06ca386d4c31939f3a4f0e3dd783,20170212,
1,F,55,0610b486422d4921ae7d2bf64640c50b,20170715,112000.0
2,,118,38fe809add3b4fcf9315a9694bb96ff5,20180712,
3,F,75,78afa995795e4d85b5d9ceeca43f5fef,20170509,100000.0
4,,118,a03223e636434f42ac4c3df47e8bac43,20170804,


In [4]:
# Convert gender to dummies
profile = create_dummy_columns(profile,"gender")
profile.head()

Unnamed: 0,age,id,became_member_on,income,gender_F,gender_M,gender_O
0,118,68be06ca386d4c31939f3a4f0e3dd783,20170212,,False,False,False
1,55,0610b486422d4921ae7d2bf64640c50b,20170715,112000.0,True,False,False
2,118,38fe809add3b4fcf9315a9694bb96ff5,20180712,,False,False,False
3,75,78afa995795e4d85b5d9ceeca43f5fef,20170509,100000.0,True,False,False
4,118,a03223e636434f42ac4c3df47e8bac43,20170804,,False,False,False


In [5]:
# Convert became_member_on to datetime and create new column
profile['become_member_on_date'] = pd.to_datetime(profile['became_member_on'], format='%Y%m%d')
profile.head()

Unnamed: 0,age,id,became_member_on,income,gender_F,gender_M,gender_O,become_member_on_date
0,118,68be06ca386d4c31939f3a4f0e3dd783,20170212,,False,False,False,2017-02-12
1,55,0610b486422d4921ae7d2bf64640c50b,20170715,112000.0,True,False,False,2017-07-15
2,118,38fe809add3b4fcf9315a9694bb96ff5,20180712,,False,False,False,2018-07-12
3,75,78afa995795e4d85b5d9ceeca43f5fef,20170509,100000.0,True,False,False,2017-05-09
4,118,a03223e636434f42ac4c3df47e8bac43,20170804,,False,False,False,2017-08-04


In [6]:
# Get the latest membership date
latest_date = profile['become_member_on_date'].max()
print(latest_date)

2018-07-26 00:00:00


In [7]:
# Apply the function to create a new column with list of dates
profile['days_since_last_member'] = (latest_date - profile['become_member_on_date']).dt.days
profile.head()

Unnamed: 0,age,id,became_member_on,income,gender_F,gender_M,gender_O,become_member_on_date,days_since_last_member
0,118,68be06ca386d4c31939f3a4f0e3dd783,20170212,,False,False,False,2017-02-12,529
1,55,0610b486422d4921ae7d2bf64640c50b,20170715,112000.0,True,False,False,2017-07-15,376
2,118,38fe809add3b4fcf9315a9694bb96ff5,20180712,,False,False,False,2018-07-12,14
3,75,78afa995795e4d85b5d9ceeca43f5fef,20170509,100000.0,True,False,False,2017-05-09,443
4,118,a03223e636434f42ac4c3df47e8bac43,20170804,,False,False,False,2017-08-04,356
