Copyright (c) 2020, NVIDIA CORPORATION.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

In [5]:
import os, time
start = time.time()

In [6]:
import pandas as pd, numpy as np, gc
from datetime import datetime
import joblib

import matplotlib.pyplot as plt
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

In [7]:
from numba import jit, njit, prange
from sklearn.metrics import precision_recall_curve, auc, log_loss

def compute_prauc(gt, pred, nafill=True):
    if nafill:
        pred[ np.isnan(pred) ] = np.nanmean( pred )
    prec, recall, thresh = precision_recall_curve(gt, pred)
    prauc = auc(recall, prec)
    return prauc

@jit
def fast_auc(y_true, y_prob):
    y_true = np.asarray(y_true)
    y_true = y_true[np.argsort(y_prob)]
    nfalse = 0
    auc = 0
    n = len(y_true)
    for i in range(n):
        y_i = y_true[i]
        nfalse += (1 - y_i)
        auc += y_i * nfalse
    auc /= (nfalse * (n - nfalse))
    return auc

@njit
def numba_log_loss(y,x):
    n = x.shape[0]
    ll = 0.
    for i in prange(n):
        if y[i]<=0.:
            ll += np.log(1-x[i] + 1e-15 )
        else:
            ll += np.log(x[i] + 1e-15)
    return -ll / n

def compute_rce(gt , pred, nafill=True, verbose=0):
    if nafill:
        pred[ np.isnan(pred) ] = np.nanmean( pred )
        
    cross_entropy = numba_log_loss( gt, pred  )
    
    yt = np.mean(gt>0)     
    strawman_cross_entropy = -(yt*np.log(yt) + (1 - yt)*np.log(1 - yt))
    
    if verbose:
        print( "logloss: {0:.5f} / {1:.5f} = {2:.5f}".format(cross_entropy, strawman_cross_entropy, cross_entropy/strawman_cross_entropy))
        print( 'mean:    {0:.5f} / {1:.5f}'.format( np.nanmean( pred ) , yt  ) )
    
    return (1.0 - cross_entropy/strawman_cross_entropy)*100.0


In [8]:
def save_memory( df ):
    features = df.columns
    for i in range( df.shape[1] ):
        if df.dtypes[i] == 'uint8':
            df[features[i]] = df[features[i]].astype( np.int8 )
            gc.collect()
        elif df.dtypes[i] == 'bool':
            df[features[i]] = df[features[i]].astype( np.int8 )
            gc.collect()
        elif df.dtypes[i] == 'uint32':
            df[features[i]] = df[features[i]].astype( np.int32 )
            gc.collect()
        elif df.dtypes[i] == 'int64':
            df[features[i]] = df[features[i]].astype( np.int32 )
            gc.collect()
        elif df.dtypes[i] == 'float64':
            df[features[i]] = df[features[i]].astype( np.float32 )
            gc.collect()

# Load Train

In [9]:
train = pd.read_parquet( 'train.parquet' )
test0 = pd.read_parquet( 'valid.parquet' )
test1 = pd.read_parquet( 'test.parquet' )
gc.collect()

train['tr'] = 0
test0['tr'] = 1
test1['tr'] = 1

train.shape, test0.shape, test1.shape

((106723416, 28), (9811488, 28), (9815713, 28))

In [10]:
train = pd.concat( (train,test0,test1), sort=False )
gc.collect()
del test0,test1
gc.collect()

train.shape

(126350617, 28)

In [11]:
%%time
train = train.sort_values('timestamp').reset_index(drop=True) #TIME ORDER
gc.collect()

CPU times: user 1min 30s, sys: 15 s, total: 1min 45s
Wall time: 1min 45s


0

In [12]:
train.loc[ train.reply>0, 'reply' ] = 1
train.loc[ train.retweet>0, 'retweet' ] = 1
train.loc[ train.retweet_comment>0, 'retweet_comment' ] = 1
train.loc[ train.like>0, 'like' ] = 1

train['engage'] = 0
train.loc[ (train.reply>0)|(train.retweet>0)|(train.retweet_comment>0)|(train.like>0)  , 'engage'] = 1
gc.collect()

0

In [13]:
train.head()

Unnamed: 0,timestamp,a_follower_count,a_following_count,a_is_verified,a_account_creation,b_follower_count,b_following_count,b_is_verified,b_account_creation,b_follows_a,reply,retweet,retweet_comment,like,id,language,tweet_type,media,tweet_id,a_user_id,b_user_id,len_hashtags,len_domains,len_links,domains,links,hashtags,tr,engage
0,1580947200,438860,159,True,1480270328,63,351,False,1505350202,False,0,0,0,0,78208872,54,2,12,13853657,13488905,4009871,1,1,1,266516,4141906,191970,0,0
1,1580947200,2828662,217,True,1244233304,37,43,False,1573587136,False,0,0,0,0,83895315,59,2,4,30382844,4768532,20049830,0,1,1,103730,2712364,0,0,0
2,1580947200,9794149,90,True,1321282652,0,36,False,1574692024,False,0,0,0,0,8177999,54,2,4,46335397,17612981,25428544,0,0,0,0,0,0,0,0
3,1580947200,7230468,758,True,1184948409,408,231,False,1289164476,False,0,0,0,0,92868826,3,2,12,56738631,22760498,26051552,0,1,1,115975,2460396,0,0,0
4,1580947200,53032,12,True,1530316237,2,68,False,1541192342,False,0,0,0,0,4824533,54,2,8,56380015,24605653,975650,0,0,0,0,0,0,0,0


In [14]:
save_memory(train)
gc.collect()

0

In [15]:
train.dtypes

timestamp             int32
a_follower_count      int32
a_following_count     int32
a_is_verified          int8
a_account_creation    int32
b_follower_count      int32
b_following_count     int32
b_is_verified          int8
b_account_creation    int32
b_follows_a            int8
reply                 int32
retweet               int32
retweet_comment       int32
like                  int32
id                    int32
language               int8
tweet_type             int8
media                  int8
tweet_id              int32
a_user_id             int32
b_user_id             int32
len_hashtags          int32
len_domains           int32
len_links             int32
domains               int32
links                 int32
hashtags              int32
tr                    int32
engage                int32
dtype: object

In [16]:
dt0 = train[['tr','a_user_id','a_follower_count','a_following_count','timestamp','id']].copy()
dt1 = train[['tr','b_user_id','b_follower_count','b_following_count','timestamp','id']].copy()
dt1.columns = ['tr','a_user_id','a_follower_count','a_following_count','timestamp','id']
dt1['id'] = -1
dt = pd.concat( (dt0,dt1), sort=False )
del dt0,dt1; _=gc.collect()
dt.head()

Unnamed: 0,tr,a_user_id,a_follower_count,a_following_count,timestamp,id
0,0,13488905,438860,159,1580947200,78208872
1,0,4768532,2828662,217,1580947200,83895315
2,0,17612981,9794149,90,1580947200,8177999
3,0,22760498,7230468,758,1580947200,92868826
4,0,24605653,53032,12,1580947200,4824533


In [17]:
%%time
dt['a_fc_max'] = dt.groupby(['tr','a_user_id','a_follower_count'])['timestamp'].transform('max'); _=gc.collect()
dt['a_fc_min'] = dt.groupby(['tr','a_user_id','a_follower_count'])['timestamp'].transform('min'); _=gc.collect()

dt['a_fc_dif'] = dt['a_fc_max'] - dt['a_fc_min']; _=gc.collect()

dt['a_fc_dif_max'] = dt.groupby(['tr','a_user_id'])['a_fc_dif'].transform('max'); _=gc.collect()

dt['a_fc_dif_flag'] = 1* ((dt['a_fc_dif'] == dt['a_fc_dif_max']) )  ; _=gc.collect()
dt.loc[ dt.a_fc_dif==0 ,'a_fc_dif_flag'] = -1

del dt['a_fc_max'],dt['a_fc_min'],dt['a_fc_dif'],dt['a_fc_dif_max'] ; _=gc.collect()
dt['a_fc_dif_flag'].value_counts()

CPU times: user 5min 55s, sys: 46 s, total: 6min 41s
Wall time: 6min 41s


 1    168330593
 0     54991842
-1     29378799
Name: a_fc_dif_flag, dtype: int64

In [18]:
train['a_user_fer_count_delta_time'] = train.merge( dt, on='id', how='left' )['a_fc_dif_flag'] ; _=gc.collect()

del dt['a_fc_dif_flag'] ; _=gc.collect()

train['a_user_fer_count_delta_time'].value_counts()

 1    71960799
 0    41783581
-1    12606237
Name: a_user_fer_count_delta_time, dtype: int64

In [19]:
train.groupby(['tr','a_user_fer_count_delta_time'])[['reply','retweet','retweet_comment','like']].agg(['mean'])

Unnamed: 0_level_0,Unnamed: 1_level_0,reply,retweet,retweet_comment,like
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean,mean
tr,a_user_fer_count_delta_time,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
0,-1,0.024673,0.106663,0.006928,0.431959
0,0,0.024845,0.107117,0.006964,0.434257
0,1,0.024677,0.105458,0.006942,0.424089
1,-1,0.0,0.0,0.0,0.0
1,0,0.0,0.0,0.0,0.0
1,1,0.0,0.0,0.0,0.0


In [20]:
%%time
dt['a_fc_max'] = dt.groupby(['tr','a_user_id','a_following_count'])['timestamp'].transform('max'); _=gc.collect()
dt['a_fc_min'] = dt.groupby(['tr','a_user_id','a_following_count'])['timestamp'].transform('min'); _=gc.collect()

dt['a_fc_dif'] = dt['a_fc_max'] - dt['a_fc_min']; _=gc.collect()

dt['a_fc_dif_max'] = dt.groupby(['tr','a_user_id'])['a_fc_dif'].transform('max'); _=gc.collect()

dt['a_fc_dif_flag'] = 1* ((dt['a_fc_dif'] == dt['a_fc_dif_max']) )  ; _=gc.collect()
dt.loc[ dt.a_fc_dif==0 ,'a_fc_dif_flag'] = -1

del dt['a_fc_max'],dt['a_fc_min'],dt['a_fc_dif'],dt['a_fc_dif_max'] ; _=gc.collect()
dt['a_fc_dif_flag'].value_counts()

CPU times: user 5min 49s, sys: 44.2 s, total: 6min 33s
Wall time: 6min 33s


 1    182423773
 0     40959468
-1     29317993
Name: a_fc_dif_flag, dtype: int64

In [21]:
train['a_user_fing_count_delta_time'] = train.merge( dt, on='id', how='left' )['a_fc_dif_flag'] ; _=gc.collect()

del dt['a_fc_dif_flag'] ; _=gc.collect()

train['a_user_fing_count_delta_time'].value_counts()

 1    89384386
 0    26810151
-1    10156080
Name: a_user_fing_count_delta_time, dtype: int64

In [22]:
train.groupby(['tr','a_user_fing_count_delta_time'])[['reply','retweet','retweet_comment','like']].agg(['mean'])

Unnamed: 0_level_0,Unnamed: 1_level_0,reply,retweet,retweet_comment,like
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean,mean
tr,a_user_fing_count_delta_time,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
0,-1,0.024624,0.106455,0.006939,0.431908
0,0,0.024741,0.106891,0.006938,0.435143
0,1,0.024742,0.105861,0.006953,0.425745
1,-1,0.0,0.0,0.0,0.0
1,0,0.0,0.0,0.0,0.0
1,1,0.0,0.0,0.0,0.0


In [23]:
%%time
dt['a_fc_max'] = dt.groupby(['tr','a_user_id','a_follower_count','a_following_count'])['timestamp'].transform('max'); _=gc.collect()
dt['a_fc_min'] = dt.groupby(['tr','a_user_id','a_follower_count','a_following_count'])['timestamp'].transform('min'); _=gc.collect()

dt['a_fc_dif'] = dt['a_fc_max'] - dt['a_fc_min']; _=gc.collect()

dt['a_fc_dif_max'] = dt.groupby(['tr','a_user_id'])['a_fc_dif'].transform('max'); _=gc.collect()

dt['a_fc_dif_flag'] = 1* ((dt['a_fc_dif'] == dt['a_fc_dif_max']) )  ; _=gc.collect()
dt.loc[ dt.a_fc_dif==0 ,'a_fc_dif_flag'] = -1

del dt['a_fc_max'],dt['a_fc_min'],dt['a_fc_dif'],dt['a_fc_dif_max'] ; _=gc.collect()
dt['a_fc_dif_flag'].value_counts()

CPU times: user 6min 28s, sys: 48.8 s, total: 7min 16s
Wall time: 7min 17s


 1    155319323
 0     57334326
-1     40047585
Name: a_fc_dif_flag, dtype: int64

In [24]:
train['a_user_fering_count_delta_time'] = train.merge( dt, on='id', how='left' )['a_fc_dif_flag'] ; _=gc.collect()

del dt['a_fc_dif_flag'] ; _=gc.collect()

train['a_user_fering_count_delta_time'].value_counts()

 1    70226253
 0    41983233
-1    14141131
Name: a_user_fering_count_delta_time, dtype: int64

In [25]:
train.groupby(['tr','a_user_fering_count_delta_time'])[['reply','retweet','retweet_comment','like']].agg(['mean'])

Unnamed: 0_level_0,Unnamed: 1_level_0,reply,retweet,retweet_comment,like
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean,mean
tr,a_user_fering_count_delta_time,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
0,-1,0.024678,0.106771,0.006938,0.432342
0,0,0.024842,0.107138,0.006971,0.43427
0,1,0.024677,0.105393,0.006937,0.423803
1,-1,0.0,0.0,0.0,0.0
1,0,0.0,0.0,0.0,0.0
1,1,0.0,0.0,0.0,0.0


In [26]:
%%time
dt['a_fc_count'] = dt.groupby(['tr','a_user_id','a_following_count'])['id'].transform('count'); _=gc.collect()
dt['a_fc_max']   = dt.groupby(['tr','a_user_id'])['a_fc_count'].transform('max'); _=gc.collect()
dt['a_fc_count_flag'] = 1* ((dt['a_fc_count'] == dt['a_fc_max']))  ; _=gc.collect()
dt.loc[ dt.a_fc_count<=1,'a_fc_count_flag'] = -1

del dt['a_fc_count'], dt['a_fc_max']
dt['a_fc_count_flag'].value_counts()

CPU times: user 3min 42s, sys: 26.5 s, total: 4min 9s
Wall time: 4min 9s


 1    189320859
 0     35481218
-1     27899157
Name: a_fc_count_flag, dtype: int64

In [27]:
train['a_user_fing_count_mode'] = train.merge( dt, on='id', how='left' )['a_fc_count_flag'] ; _=gc.collect()
del dt['a_fc_count_flag'] ; _=gc.collect()
train['a_user_fing_count_mode'].value_counts()

 1    92456564
 0    25205378
-1     8688675
Name: a_user_fing_count_mode, dtype: int64

In [28]:
train.groupby(['tr','a_user_fing_count_mode'])[['reply','retweet','retweet_comment','like']].agg(['mean'])

Unnamed: 0_level_0,Unnamed: 1_level_0,reply,retweet,retweet_comment,like
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean,mean
tr,a_user_fing_count_mode,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
0,-1,0.024709,0.10658,0.006947,0.431366
0,0,0.024723,0.106605,0.006942,0.433759
0,1,0.024739,0.105961,0.006951,0.426445
1,-1,0.0,0.0,0.0,0.0
1,0,0.0,0.0,0.0,0.0
1,1,0.0,0.0,0.0,0.0


In [29]:
%%time
dt['a_fc_count'] = dt.groupby(['tr','a_user_id','a_follower_count'])['id'].transform('count'); _=gc.collect()
dt['a_fc_max']   = dt.groupby(['tr','a_user_id'])['a_fc_count'].transform('max'); _=gc.collect()
dt['a_fc_count_flag'] = 1* ((dt['a_fc_count'] == dt['a_fc_max']))  ; _=gc.collect()
dt.loc[ dt.a_fc_count<=1,'a_fc_count_flag'] = -1

del dt['a_fc_count'], dt['a_fc_max']
dt['a_fc_count_flag'].value_counts()

CPU times: user 3min 45s, sys: 26.3 s, total: 4min 11s
Wall time: 4min 11s


 1    174484263
 0     51461287
-1     26755684
Name: a_fc_count_flag, dtype: int64

In [30]:
train['a_user_fer_count_mode'] = train.merge( dt, on='id', how='left' )['a_fc_count_flag'] ; _=gc.collect()
del dt['a_fc_count_flag'] ; _=gc.collect()
train['a_user_fer_count_mode'].value_counts()

 1    74842482
 0    41649512
-1     9858623
Name: a_user_fer_count_mode, dtype: int64

In [31]:
train.groupby(['tr','a_user_fer_count_mode'])[['reply','retweet','retweet_comment','like']].agg(['mean'])

Unnamed: 0_level_0,Unnamed: 1_level_0,reply,retweet,retweet_comment,like
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean,mean
tr,a_user_fer_count_mode,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
0,-1,0.024733,0.106725,0.006915,0.431278
0,0,0.024847,0.107024,0.006962,0.433781
0,1,0.024669,0.105551,0.006944,0.424737
1,-1,0.0,0.0,0.0,0.0
1,0,0.0,0.0,0.0,0.0
1,1,0.0,0.0,0.0,0.0


In [32]:
%%time
dt['a_fc_count'] = dt.groupby(['tr','a_user_id','a_follower_count','a_following_count'])['id'].transform('count'); _=gc.collect()
dt['a_fc_max']   = dt.groupby(['tr','a_user_id'])['a_fc_count'].transform('max'); _=gc.collect()
dt['a_fc_count_flag'] = 1* ((dt['a_fc_count'] == dt['a_fc_max']))  ; _=gc.collect()
dt.loc[ dt.a_fc_count<=1,'a_fc_count_flag'] = -1

del dt['a_fc_count'], dt['a_fc_max']
dt['a_fc_count_flag'].value_counts()

CPU times: user 3min 58s, sys: 28.7 s, total: 4min 27s
Wall time: 4min 27s


 1    162510206
 0     52931747
-1     37259281
Name: a_fc_count_flag, dtype: int64

In [33]:
train['a_user_fering_count_mode'] = train.merge( dt, on='id', how='left' )['a_fc_count_flag'] ; _=gc.collect()
del dt['a_fc_count_flag'] ; _=gc.collect()
train['a_user_fering_count_mode'].value_counts()

 1    73102990
 0    41947310
-1    11300317
Name: a_user_fering_count_mode, dtype: int64

In [34]:
train.groupby(['tr','a_user_fering_count_mode'])[['reply','retweet','retweet_comment','like']].agg(['mean'])

Unnamed: 0_level_0,Unnamed: 1_level_0,reply,retweet,retweet_comment,like
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean,mean
tr,a_user_fering_count_mode,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
0,-1,0.024733,0.106831,0.006933,0.431824
0,0,0.02484,0.107031,0.006967,0.433806
0,1,0.024671,0.105504,0.00694,0.424478
1,-1,0.0,0.0,0.0,0.0
1,0,0.0,0.0,0.0,0.0
1,1,0.0,0.0,0.0,0.0


In [35]:
del dt; _=gc.collect()

In [36]:
train['a_count_combined'] = pd.factorize(
    (1+train['a_user_fer_count_delta_time'])*3**0 +
    (1+train['a_user_fing_count_delta_time'])*3**1 +
    (1+train['a_user_fering_count_delta_time'])*3**2 +
    (1+train['a_user_fing_count_mode'])*3**3 +
    (1+train['a_user_fer_count_mode'])*3**4 +
    (1+train['a_user_fering_count_mode'])*3**5 
)[0]
train['a_count_combined'].value_counts()

3      66991766
0      20387443
1      15273172
15      7602051
9       2033543
16      1561398
4       1191337
6       1088480
5       1062254
31       924708
2        822177
35       703491
14       699061
10       629797
23       510132
13       444927
36       419450
12       398809
8        309550
29       287107
25       280533
11       242631
34       218248
53       201779
20       185429
32       157232
27       104155
59        95363
28        92965
7         80155
50        77484
46        70525
42        67542
17        62926
19        59986
52        56691
61        53284
75        45573
54        44418
21        40930
24        40587
55        40223
40        36739
44        36577
39        35814
41        34691
57        32732
48        31839
89        28205
22        26392
79        23569
45        22445
72        22333
71        20863
63        18500
51        16562
49        16330
92        16267
78        14362
81        13903
64        13504
65        11793
69      

In [37]:
train.loc[ train.tr==0 ].groupby('a_count_combined')[['reply','retweet','retweet_comment','like']].agg(['mean'])

Unnamed: 0_level_0,reply,retweet,retweet_comment,like
Unnamed: 0_level_1,mean,mean,mean,mean
a_count_combined,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
0,0.024772,0.10689,0.006934,0.435416
1,0.025008,0.10759,0.007014,0.432782
2,0.024938,0.10765,0.007095,0.434224
3,0.024689,0.105501,0.006936,0.424183
4,0.024608,0.107427,0.007053,0.432707
5,0.024364,0.101566,0.006976,0.410747
6,0.024497,0.106961,0.006882,0.433673
7,0.024353,0.107623,0.006657,0.434342
8,0.025312,0.105458,0.007182,0.428473
9,0.024789,0.106566,0.006891,0.435839


In [38]:
train.head()

Unnamed: 0,timestamp,a_follower_count,a_following_count,a_is_verified,a_account_creation,b_follower_count,b_following_count,b_is_verified,b_account_creation,b_follows_a,reply,retweet,retweet_comment,like,id,language,tweet_type,media,tweet_id,a_user_id,b_user_id,len_hashtags,len_domains,len_links,domains,links,hashtags,tr,engage,a_user_fer_count_delta_time,a_user_fing_count_delta_time,a_user_fering_count_delta_time,a_user_fing_count_mode,a_user_fer_count_mode,a_user_fering_count_mode,a_count_combined
0,1580947200,438860,159,1,1480270328,63,351,0,1505350202,0,0,0,0,0,78208872,54,2,12,13853657,13488905,4009871,1,1,1,266516,4141906,191970,0,0,0,0,0,0,0,0,0
1,1580947200,2828662,217,1,1244233304,37,43,0,1573587136,0,0,0,0,0,83895315,59,2,4,30382844,4768532,20049830,0,1,1,103730,2712364,0,0,0,0,1,0,1,0,0,1
2,1580947200,9794149,90,1,1321282652,0,36,0,1574692024,0,0,0,0,0,8177999,54,2,4,46335397,17612981,25428544,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1
3,1580947200,7230468,758,1,1184948409,408,231,0,1289164476,0,0,0,0,0,92868826,3,2,12,56738631,22760498,26051552,0,1,1,115975,2460396,0,0,0,0,1,0,1,0,0,1
4,1580947200,53032,12,1,1530316237,2,68,0,1541192342,0,0,0,0,0,4824533,54,2,8,56380015,24605653,975650,0,0,0,0,0,0,0,0,-1,1,-1,1,0,0,2


In [39]:
dt = train[['id','a_count_combined','a_user_fer_count_delta_time','a_user_fing_count_delta_time','a_user_fering_count_delta_time','a_user_fing_count_mode','a_user_fer_count_mode','a_user_fering_count_mode']]
dt = dt.sort_values( 'id' )
dt = dt.reset_index( drop=True )
save_memory( dt )
dt.to_parquet( 'a_count_combined-final.parquet' )
dt.tail(5)

Unnamed: 0,id,a_count_combined,a_user_fer_count_delta_time,a_user_fing_count_delta_time,a_user_fering_count_delta_time,a_user_fing_count_mode,a_user_fer_count_mode,a_user_fering_count_mode
126350612,125780463,3,1,1,1,1,1,1
126350613,125780464,1,0,1,0,1,0,0
126350614,125780465,3,1,1,1,1,1,1
126350615,125780466,3,1,1,1,1,1,1
126350616,125780467,0,0,0,0,0,0,0
