In [2]:
import pandas as pd
import numpy as np
import datetime
import gc


In [3]:
## Import up sound alert dependencies
from IPython.display import Audio, display

def allDone():
    display(Audio(url='https://sound.peal.io/ps/audios/000/000/537/original/woo_vu_luvub_dub_dub.wav', autoplay=True))
## Insert whatever audio file you want above

In [4]:
def fix_str_float(ds, col):
    ds[col] = ds[col].str.replace(r'[^0-9\.]','')
    ds[col] = np.where(ds[col]=='',np.nan,ds[col])
    ds[col] = ds[col].astype('float32')
    return ds[col].astype('float32')

In [5]:
# clicks
clicks_df = pd.read_csv('data/clicks.csv', low_memory=False,dtype={'advertiser_id':'int32','action_id':'float32','source_id':'int32','country_code':'category',\
                                                 'latitude':'float32','longitude':'float32','carrier_id':'float32','os_minor':'category',\
                                                  'os_major':'category','specs_brand':'category','timeToClick':'float32','ref_type':'category'\
                                                                  ,'ref_hash':'object'})

clicks_df['touchX'] = fix_str_float(clicks_df,'touchX')
clicks_df['touchY'] = fix_str_float(clicks_df,'touchY')
clicks_df['created'] = pd.to_datetime(clicks_df['created'])
#events
events_df = pd.read_csv('data/events.csv', low_memory=False, dtype={'event_id':'int32','ref_type':'category','application_id':'category',\
                                                                                            'attributed':'bool','device_countrycode':'category','device_city':'category',\
                                                                                            'trans_id':'category','carrier':'category','device_os':'category',\
                                                                                            'connection_type':'category'})
events_df['date'] = pd.to_datetime(events_df['date'])
events_df['wifi'].astype('bool', inplace=True)
events_df.drop(columns=['device_countrycode','session_user_agent','ip_address','device_language'], inplace=True)
# installs
installs_df = pd.read_csv('data/installs.csv', low_memory=False, dtype={'ref_type':'category','application_id':'category',\
                                                      'device_brand':'category','ref_hash':'object','wifi':'category'})
installs_df['kind'] = installs_df['kind'].str.lower()
installs_df['kind'] = installs_df['kind'].astype('category')
installs_df.drop(columns=['session_user_agent','ip_address','device_language','device_model'], inplace=True)
installs_df['created'] = pd.to_datetime(installs_df['created'])
installs_df.drop(['device_countrycode'], axis=1, inplace=True)
# auctions
auctions_df = pd.read_csv('data/auctions.csv', low_memory=False, dtype={'country':'category','platform':'category',\
                                                                        'ref_type_id':'category','source_id':'category','device_id':'object'})

auctions_df['date'] = pd.to_datetime(auctions_df['date'])
allDone()
print('setup done')


setup done


In [6]:
competencia_ids = pd.read_csv('data/target_competencia_ids.csv')
competencia_ids['ref_hash'] = competencia_ids['ref_hash'].str.replace('_sc', '')
competencia_ids['ref_hash'] = competencia_ids['ref_hash'].str.replace('_st', '')
competencia_ids.drop_duplicates(inplace=True)

# Target Auctions

In [7]:
competencia_ids = pd.read_csv('data/target_competencia_ids.csv')
competencia_ids['ref_hash'] = competencia_ids['ref_hash'].str.replace('_sc', '')
competencia_ids['ref_hash'] = competencia_ids['ref_hash'].str.replace('_st', '')
competencia_ids.drop_duplicates(inplace=True)
competencia_ids = competencia_ids['ref_hash']

In [8]:
auctions_sample = auctions_df.loc[auctions_df['device_id'].isin(competencia_ids)].copy()
installs_sample = installs_df.loc[installs_df['ref_hash'].isin(competencia_ids)].copy()
events_sample = events_df.loc[events_df['ref_hash'].isin(competencia_ids)].copy()
del auctions_df
gc.collect()

152

In [9]:
# calculate time in seconds
auctions_sample.drop_duplicates(inplace=True)
auctions_sample = auctions_sample.sort_values(by=['device_id','date'])
#calculate previous time in seconds
auctions_sample['date_prev'] = auctions_sample['date'].shift()
auctions_sample['date_dif_prev'] = auctions_sample['date']- auctions_sample['date_prev']
auctions_sample['device_id_prev'] = auctions_sample['device_id'].astype('object').shift()
auctions_sample['date_dif_prev'] = pd.to_timedelta(np.where(auctions_sample['device_id_prev']==auctions_sample['device_id'], auctions_sample['date_dif_prev'], np.where(auctions_sample['date']-pd.DateOffset(3)<'2019-04-18 00:00:00', auctions_sample['date']-datetime.datetime(2019,4,18), pd.to_timedelta(3, unit='d'))))
auctions_sample['last_seen'] = auctions_sample['date_dif_prev'].dt.total_seconds()
auctions_sample.drop(['device_id_prev','date_dif_prev','date_prev'], axis='columns', inplace=True)
auctions_sample = auctions_sample.sort_values(by=['date'])
#get last date of all devices ids
group_1 = auctions_sample.groupby(['device_id']).agg({'date': 'max'}).reset_index()
auctions_sample = pd.merge(auctions_sample, group_1, on=['device_id', 'date'], how='inner')



In [10]:
#some features
auct_cols = auctions_sample.columns.tolist()
print(1)
#installs and evetns
events_sample['kind'] = 'event'
print(2)
installs_sample['kind'] = installs_sample['kind'].str.replace(' ', '_').str.lower()
print(3)
#information about last installs and events
applications = installs_sample[['ref_hash','created','application_id','kind']].append(events_sample[['ref_hash', 'date', 'application_id','kind']].rename(columns = {'date':'created'}), ignore_index=True)
print(4)
data_1 = pd.merge(auctions_sample, applications, left_on='device_id', right_on='ref_hash', how='left')
print(5)
#only previus installs or events
data_1 = data_1.loc[(data_1['date']>data_1['created']) | data_1['created'].isnull()]
print(6)

# application_id feature by id
app_id_1 = data_1[['application_id','kind']].copy()
print(7)
app_id_1 = pd.get_dummies(app_id_1, dummy_na=True, prefix_sep='=')
print(8)
data_1.drop(columns=['application_id'], inplace=True)
print(9)
data_1 = pd.merge(data_1, app_id_1, left_index=True, right_index=True, how='inner')
print(10)

app_id_1_columns = app_id_1.columns.tolist()
print(11)

group_1 = data_1.groupby(auct_cols).agg({col:'sum' for col in app_id_1_columns})
print(12)
group_1.reset_index(inplace=True)

del installs_sample
del events_sample
gc.collect()
print(13)
auctions_sample = pd.merge(auctions_sample, group_1, on=['date','device_id','ref_type_id','source_id', 'last_seen'], how='left')
print(14)
auctions_sample = auctions_sample.astype({col:'float32' for col in app_id_1_columns})
print(15)

auctions_sample['hora'] = auctions_sample['date'].dt.hour
auctions_sample['dia'] = auctions_sample['date'].dt.day
pivot_auctions = auctions_sample.groupby(['dia','hora']).size().reset_index()
pivot_auctions.columns = ['dia','hora', 'hour_day_dist']
max_auctions = pivot_auctions['hour_day_dist'].max().max()
pivot_auctions['hour_day_dist'] = pivot_auctions['hour_day_dist']/max_auctions
display(pivot_auctions.head(5))
print(auctions_sample.shape)
auctions_sample = pd.merge(auctions_sample, pivot_auctions, on=['dia','hora'], how='left')
auctions_sample.drop(columns=['hora','dia'], inplace=True)
print(auctions_sample.shape)

auctions_sample.fillna(value={'application_id=nan':1,'kind=nan':1}, inplace=True)
print(16)
auctions_sample.fillna(value={col:0 for col in app_id_1_columns}, inplace=True)
print(17)
auctions_sample = auctions_sample.astype({col:'int32' for col in app_id_1_columns})
print(18)
auctions_sample.reset_index(inplace=True, drop=True)
print(19)


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15


Unnamed: 0,dia,hora,hour_day_dist
0,18,0,0.004796
1,18,1,0.01199
2,18,2,0.004796
3,18,3,0.01199
4,18,4,0.007194


(4037, 182)
(4037, 181)
16
17
18
19


In [11]:
#cols from auctions sample
auctions_cols = ['application_id=101', 'application_id=116', 'application_id=117', 'application_id=121', 'application_id=122', 'application_id=123', 'application_id=126', 'application_id=128', 'application_id=129', 'application_id=13', 'application_id=133', 'application_id=135', 'application_id=14', 'application_id=140', 'application_id=143', 'application_id=145', 'application_id=147', 'application_id=153', 'application_id=155', 'application_id=158', 'application_id=159', 'application_id=161', 'application_id=163', 'application_id=165', 'application_id=167', 'application_id=170', 'application_id=175', 'application_id=180', 'application_id=185', 'application_id=187', 'application_id=188', 'application_id=19', 'application_id=194', 'application_id=196', 'application_id=197', 'application_id=200', 'application_id=203', 'application_id=21', 'application_id=210', 'application_id=212', 'application_id=214', 'application_id=218', 'application_id=226', 'application_id=230', 'application_id=232', 'application_id=256', 'application_id=26', 'application_id=261', 'application_id=263', 'application_id=265', 'application_id=268', 'application_id=269', 'application_id=27', 'application_id=274', 'application_id=279', 'application_id=289', 'application_id=291', 'application_id=3', 'application_id=30', 'application_id=301', 'application_id=302', 'application_id=305', 'application_id=308', 'application_id=309', 'application_id=31', 'application_id=311', 'application_id=327', 'application_id=330', 'application_id=339', 'application_id=34', 'application_id=35', 'application_id=36', 'application_id=39', 'application_id=4', 'application_id=42', 'application_id=45', 'application_id=49', 'application_id=54', 'application_id=55', 'application_id=56', 'application_id=57', 'application_id=62', 'application_id=65', 'application_id=66', 'application_id=68', 'application_id=70', 'application_id=71', 'application_id=72', 'application_id=75', 'application_id=77', 'application_id=78', 'application_id=80', 'application_id=81', 'application_id=86', 'application_id=87', 'application_id=94', 'application_id=96', 'application_id=nan', 'date', 'device_id', 'hour_day_dist', 'in_seconds', 'kind=01_launch', 'kind=account_summary_first_step', 'kind=added_personal_info', 'kind=af_app_opend', 'kind=af_app_opened', 'kind=af_search', 'kind=af_view_product', 'kind=app_open', 'kind=applaunch', 'kind=deeplink', 'kind=deeplink_open', 'kind=list_view', 'kind=login_success', 'kind=mobilesignup', 'kind=nan', 'kind=open', 'kind=pax_app_open', 'kind=product_view', 'kind=registration_complete', 'kind=sessionbegin', 'kind=sign_in', 'last_seen', 'ref_type_id', 'source_id', 'status_censored']


In [12]:
for col in auctions_cols:
    if not(col in auctions_sample.columns):
        auctions_sample[col] = 0
auctions_sample = auctions_sample[auctions_cols]

In [13]:
auctions_sample.shape

(4037, 127)

In [14]:
competencia_ids.shape

(4037,)

In [15]:
#export to file
auctions_sample.to_csv('data/auctions_target.csv', index=False) 

In [16]:
del auctions_sample
gc.collect()

251

# Target Convertions

In [17]:
installs_sample = installs_df.loc[installs_df['ref_hash'].isin(competencia_ids)].copy()
events_sample = events_df.loc[events_df['ref_hash'].isin(competencia_ids)].copy()

In [18]:
#calculate previous time in seconds
installs_sample = installs_sample.sort_values(by=['ref_hash','created'])
installs_sample['date_prev'] = installs_sample['created'].shift()
installs_sample['date_dif_prev'] = installs_sample['created']- installs_sample['date_prev']
installs_sample['device_id_prev'] = installs_sample['ref_hash'].astype('object').shift()
installs_sample['date_dif_prev'] = pd.to_timedelta(np.where(installs_sample['device_id_prev']==installs_sample['ref_hash'], installs_sample['date_dif_prev'], np.where(installs_sample['created']-pd.DateOffset(3)<'2019-04-18 00:00:00', installs_sample['created']-datetime.datetime(2019,4,18), pd.to_timedelta(3, unit='d'))))
installs_sample['last_seen'] = installs_sample['date_dif_prev'].dt.total_seconds()
installs_sample.drop(['device_id_prev','date_dif_prev','date_prev'], axis='columns', inplace=True)
installs_sample = installs_sample.sort_values(by=['created'])

#average time
installs_sample['hora'] = installs_sample['created'].dt.hour
installs_sample['dia'] = installs_sample['created'].dt.day
pivot_auctions = installs_sample.groupby(['dia','hora']).size().reset_index()
pivot_auctions.columns = ['dia','hora', 'hour_day_dist']
max_auctions = pivot_auctions['hour_day_dist'].max().max()
pivot_auctions['hour_day_dist'] = pivot_auctions['hour_day_dist']/max_auctions
display(pivot_auctions.head(5))
installs_sample = pd.merge(installs_sample, pivot_auctions, on=['dia','hora'], how='left')
installs_sample.drop(columns=['hora','dia'], inplace=True)




Unnamed: 0,dia,hora,hour_day_dist
0,18,0,0.514706
1,18,1,0.529412
2,18,2,0.823529
3,18,3,0.720588
4,18,4,0.397059


In [19]:
#calculate previous time in seconds
events_sample = events_sample.rename(columns = {'date':'created'}).sort_values(by=['ref_hash','created'])
events_sample['date_prev'] = events_sample['created'].shift()
events_sample['date_dif_prev'] = events_sample['created']- events_sample['date_prev']
events_sample['device_id_prev'] = events_sample['ref_hash'].astype('object').shift()
events_sample['date_dif_prev'] = pd.to_timedelta(np.where(events_sample['device_id_prev']==events_sample['ref_hash'], events_sample['date_dif_prev'], np.where(events_sample['created']-pd.DateOffset(3)<'2019-04-18 00:00:00', events_sample['created']-datetime.datetime(2019,4,18), pd.to_timedelta(3, unit='d'))))
events_sample['last_seen'] = events_sample['date_dif_prev'].dt.total_seconds()
events_sample.drop(['device_id_prev','date_dif_prev','date_prev'], axis='columns', inplace=True)
events_sample = events_sample.sort_values(by=['created'])
events_sample['user_agent']=np.nan
events_sample['kind']='event' #it is hashed

#average time
events_sample['hora'] = events_sample['created'].dt.hour
events_sample['dia'] = events_sample['created'].dt.day
pivot_auctions = events_sample.groupby(['dia','hora']).size().reset_index()
pivot_auctions.columns = ['dia','hora', 'hour_day_dist']
max_auctions = pivot_auctions['hour_day_dist'].max().max()
pivot_auctions['hour_day_dist'] = pivot_auctions['hour_day_dist']/max_auctions
display(pivot_auctions.head(5))
events_sample = pd.merge(events_sample, pivot_auctions, on=['dia','hora'], how='left')
events_sample.drop(columns=['hora','dia'], inplace=True)



Unnamed: 0,dia,hora,hour_day_dist
0,18,0,0.332133
1,18,1,0.377138
2,18,2,0.50405
3,18,3,0.383438
4,18,4,0.372637


In [20]:
#some features
install_cols = installs_sample.columns.tolist()
print(1)
#information about last installs and events
convertions = installs_sample[['ref_hash','created','application_id','user_agent','wifi','kind','last_seen','hour_day_dist']].append(events_sample[['ref_hash', 'created', 'application_id','user_agent','wifi','kind','last_seen','hour_day_dist']], ignore_index=True)

print(2)

#user agent
convertions['user_agent'] = np.where(convertions['user_agent'].str.contains('Android', regex=False),'Android',convertions['user_agent'])
convertions['user_agent'] = np.where(convertions['user_agent'].str.contains('Darwin', regex=False) | convertions['user_agent'].str.contains('iOS', regex=False),'iOS',convertions['user_agent'])
print(3)

# previus applications

app_id_1 = convertions[['application_id','created','ref_hash']].copy()
group_1 = pd.merge(convertions, app_id_1, on='ref_hash', how='inner')
group_1 = group_1.loc[(group_1['created_x']>group_1['created_y']) | group_1['created_y'].isnull()]
group_1['application_id'] = group_1['application_id_y']
group_1['created'] = group_1['created_x']
group_1.drop(columns=['application_id_y','created_x', 'application_id_x'], inplace=True)
group_1 = group_1.loc[group_1['created_y']>group_1['created']-pd.DateOffset(7)]
group_1 = pd.get_dummies(group_1,prefix_sep='=', dummy_na=True,columns=['application_id'],drop_first=True)
app_id_1_columns = []
for col in group_1.columns.tolist():
    if col.startswith('application_id'):
        app_id_1_columns.append(col)
group_1 = group_1.groupby(['created','ref_hash']).agg({col:'sum' for col in app_id_1_columns})
convertions = pd.merge(convertions, group_1, on=['created','ref_hash'], how='left')
convertions['curr_app_id'] = convertions['application_id']
convertions.drop(columns=['application_id'], inplace=True)
print(4)



convertions = pd.get_dummies(convertions,prefix_sep='=', dummy_na=True,columns=['curr_app_id','wifi', 'kind', 'user_agent'],drop_first=True)

convertions.fillna(value={'application_id=nan':1,'kind=nan':1,'curr_app_id=nan':1}, inplace=True)
print(6)
convertions.fillna(value={col:0 for col in app_id_1_columns}, inplace=True)
print(7)
convertions = convertions.astype({col:'int32' for col in app_id_1_columns})
print(8)
convertions.reset_index(inplace=True, drop=True)
print(9)


1
2
3
4
6
7
8
9


In [21]:
print(convertions.shape)

# only last info
convertions['ref_hash'] = convertions['ref_hash'].map(str)
group_1 = convertions.groupby(['ref_hash']).agg({'created': 'max'}).reset_index()
print(group_1.shape)
convertions2 = pd.merge(convertions, group_1, on=['ref_hash','created'], how='inner')

## delete duplicates
convertions2=convertions2.sort_values('last_seen', ascending=False).drop_duplicates('ref_hash').sort_index()

print(convertions2.shape)

(83967, 315)
(4037, 2)
(4037, 315)


In [22]:
#cols from convertions sample
convertions_cols = ['application_id=116', 'application_id=117', 'application_id=121', 'application_id=122', 'application_id=123', 'application_id=124', 'application_id=126', 'application_id=128', 'application_id=13', 'application_id=133', 'application_id=135', 'application_id=136', 'application_id=14', 'application_id=140', 'application_id=145', 'application_id=147', 'application_id=148', 'application_id=149', 'application_id=150', 'application_id=154', 'application_id=155', 'application_id=157', 'application_id=158', 'application_id=159', 'application_id=16', 'application_id=161', 'application_id=163', 'application_id=164', 'application_id=167', 'application_id=170', 'application_id=178', 'application_id=182', 'application_id=185', 'application_id=187', 'application_id=19', 'application_id=190', 'application_id=194', 'application_id=195', 'application_id=196', 'application_id=197', 'application_id=200', 'application_id=204', 'application_id=21', 'application_id=210', 'application_id=213', 'application_id=214', 'application_id=218', 'application_id=219', 'application_id=221', 'application_id=226', 'application_id=230', 'application_id=231', 'application_id=235', 'application_id=239', 'application_id=243', 'application_id=248', 'application_id=256', 'application_id=26', 'application_id=263', 'application_id=265', 'application_id=268', 'application_id=269', 'application_id=274', 'application_id=279', 'application_id=289', 'application_id=291', 'application_id=3', 'application_id=302', 'application_id=305', 'application_id=308', 'application_id=309', 'application_id=31', 'application_id=311', 'application_id=320', 'application_id=323', 'application_id=327', 'application_id=339', 'application_id=34', 'application_id=344', 'application_id=35', 'application_id=36', 'application_id=39', 'application_id=41', 'application_id=45', 'application_id=48', 'application_id=49', 'application_id=54', 'application_id=55', 'application_id=56', 'application_id=57', 'application_id=62', 'application_id=65', 'application_id=68', 'application_id=70', 'application_id=71', 'application_id=75', 'application_id=77', 'application_id=78', 'application_id=79', 'application_id=80', 'application_id=86', 'application_id=87', 'application_id=93', 'application_id=94', 'application_id=97', 'application_id=99', 'application_id=nan', 'created', 'curr_app_id=112', 'curr_app_id=116', 'curr_app_id=117', 'curr_app_id=121', 'curr_app_id=122', 'curr_app_id=123', 'curr_app_id=124', 'curr_app_id=126', 'curr_app_id=128', 'curr_app_id=129', 'curr_app_id=13', 'curr_app_id=133', 'curr_app_id=135', 'curr_app_id=136', 'curr_app_id=14', 'curr_app_id=140', 'curr_app_id=143', 'curr_app_id=145', 'curr_app_id=146', 'curr_app_id=147', 'curr_app_id=148', 'curr_app_id=149', 'curr_app_id=150', 'curr_app_id=154', 'curr_app_id=155', 'curr_app_id=157', 'curr_app_id=158', 'curr_app_id=159', 'curr_app_id=16', 'curr_app_id=161', 'curr_app_id=163', 'curr_app_id=164', 'curr_app_id=165', 'curr_app_id=167', 'curr_app_id=170', 'curr_app_id=174', 'curr_app_id=175', 'curr_app_id=178', 'curr_app_id=180', 'curr_app_id=182', 'curr_app_id=185', 'curr_app_id=187', 'curr_app_id=19', 'curr_app_id=190', 'curr_app_id=191', 'curr_app_id=194', 'curr_app_id=195', 'curr_app_id=196', 'curr_app_id=197', 'curr_app_id=200', 'curr_app_id=203', 'curr_app_id=204', 'curr_app_id=205', 'curr_app_id=21', 'curr_app_id=210', 'curr_app_id=212', 'curr_app_id=213', 'curr_app_id=214', 'curr_app_id=218', 'curr_app_id=219', 'curr_app_id=221', 'curr_app_id=226', 'curr_app_id=230', 'curr_app_id=231', 'curr_app_id=232', 'curr_app_id=234', 'curr_app_id=235', 'curr_app_id=237', 'curr_app_id=239', 'curr_app_id=243', 'curr_app_id=247', 'curr_app_id=248', 'curr_app_id=256', 'curr_app_id=26', 'curr_app_id=261', 'curr_app_id=263', 'curr_app_id=265', 'curr_app_id=267', 'curr_app_id=268', 'curr_app_id=269', 'curr_app_id=27', 'curr_app_id=274', 'curr_app_id=279', 'curr_app_id=282', 'curr_app_id=289', 'curr_app_id=291', 'curr_app_id=3', 'curr_app_id=30', 'curr_app_id=301', 'curr_app_id=302', 'curr_app_id=304', 'curr_app_id=305', 'curr_app_id=308', 'curr_app_id=309', 'curr_app_id=31', 'curr_app_id=311', 'curr_app_id=320', 'curr_app_id=323', 'curr_app_id=327', 'curr_app_id=336', 'curr_app_id=339', 'curr_app_id=34', 'curr_app_id=341', 'curr_app_id=344', 'curr_app_id=35', 'curr_app_id=36', 'curr_app_id=39', 'curr_app_id=4', 'curr_app_id=41', 'curr_app_id=45', 'curr_app_id=47', 'curr_app_id=48', 'curr_app_id=49', 'curr_app_id=54', 'curr_app_id=55', 'curr_app_id=56', 'curr_app_id=57', 'curr_app_id=62', 'curr_app_id=65', 'curr_app_id=66', 'curr_app_id=68', 'curr_app_id=70', 'curr_app_id=71', 'curr_app_id=72', 'curr_app_id=75', 'curr_app_id=77', 'curr_app_id=78', 'curr_app_id=79', 'curr_app_id=80', 'curr_app_id=81', 'curr_app_id=86', 'curr_app_id=87', 'curr_app_id=89', 'curr_app_id=93', 'curr_app_id=94', 'curr_app_id=96', 'curr_app_id=97', 'curr_app_id=99', 'curr_app_id=nan', 'hour_day_dist', 'in_seconds', 'kind=account_summary_first_step', 'kind=add_payment', 'kind=add_to_cart', 'kind=adjust_reinstall', 'kind=adjust_uninstall', 'kind=af app open', 'kind=af_app_opend', 'kind=af_app_opened', 'kind=af_content_view', 'kind=af_search', 'kind=af_view_list', 'kind=af_view_product', 'kind=app first start', 'kind=app launch', 'kind=app open', 'kind=app_alive', 'kind=app_launch', 'kind=app_open', 'kind=app_opened', 'kind=background_session', 'kind=content view', 'kind=deeplink', 'kind=deeplink_open', 'kind=event', 'kind=event_listingview_flight', 'kind=event_open_app', 'kind=install', 'kind=list view', 'kind=login', 'kind=login_success', 'kind=mobilesignup', 'kind=nan', 'kind=open', 'kind=opened_app', 'kind=pax_app_open', 'kind=pre_checkout', 'kind=product view', 'kind=product_open', 'kind=product_preview', 'kind=reattribution', 'kind=reengagement', 'kind=registration_complete', 'kind=session begin', 'kind=sessionbegin', 'kind=sign in', 'kind=startsessionplayback', 'kind=terms_agree_split', 'last_seen', 'ref_hash', 'status_censored', 'user_agent=iOS', 'user_agent=nan', 'wifi=True', 'wifi=false', 'wifi=nan', 'wifi=true']


In [23]:
for col in convertions_cols:
    if not(col in convertions2.columns):
        convertions2[col] = 0
convertions2 = convertions2[convertions_cols]

In [24]:
#export to file
convertions2.to_csv('data/convertions_target.csv', index=False) 