In [1]:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy
%matplotlib inline

In [2]:
path = './'
teamname = 'team_foo'
out_name = path + teamname + '_submission.csv'
df_usage = pd.read_csv(path + "train_usage_data.csv")

In [4]:
df_usage.head()

Unnamed: 0,id,feature_name,usage_duration,end_use_date,accepted
0,30000429392689,Relight,5.659,2018-04-10 11:18:16,False
1,440000407817536,Crop,0.984,2018-04-06 16:44:19,False
2,540000265662400,Crop,1.583,2018-04-21 18:46:24,False
3,220000444951653,Vignette,4.668,2018-04-30 14:11:22,False
4,220000444725338,Retouch,185.154,2018-04-29 23:37:34,False


In [5]:
df_usage.isnull().any()

id                False
feature_name      False
usage_duration    False
end_use_date      False
accepted          False
dtype: bool

In [6]:
df_users = pd.read_csv(path + "train_users_data.csv",parse_dates = [ 'installation_date','subscripiton_date'], index_col=0)
joined_df = df_usage.join(df_users.set_index('id'),on='id')
joined_df.head()

Unnamed: 0,id,feature_name,usage_duration,end_use_date,accepted,installation_date,subscripiton_date,country,initial_app_version,initial_ios_version,initial_device,days_until_churn,churned
0,30000429392689,Relight,5.659,2018-04-10 11:18:16,False,2018-02-26 07:01:08,2018-04-08 09:38:19,Australia,1.9.6,11.2.6,UIDeviceKindIPhoneX,,0
1,440000407817536,Crop,0.984,2018-04-06 16:44:19,False,2017-07-21 20:04:15,2018-04-05 22:39:32,United Kingdom,1.9.6,11.2.5,UIDeviceKindIPhoneX,,0
2,540000265662400,Crop,1.583,2018-04-21 18:46:24,False,2018-04-21 02:01:23,2018-04-21 02:14:14,United States,1.9.6,11.2,UIDeviceKindIPhone8,,0
3,220000444951653,Vignette,4.668,2018-04-30 14:11:22,False,2018-04-30 12:45:44,2018-04-30 12:57:44,Australia,1.9.6,11.2.5,UIDeviceKindIPhone7Plus,,0
4,220000444725338,Retouch,185.154,2018-04-29 23:37:34,False,2018-04-29 21:14:23,2018-04-29 22:07:56,United States,1.9.6,11.2.6,UIDeviceKindIPhone6Plus,31.0,0


In [7]:
#usage duration by chruned
print(joined_df.groupby('churned')['usage_duration'].mean())
print(joined_df.groupby('churned')['usage_duration'].std())

churned
0    56.191571
1    58.626024
Name: usage_duration, dtype: float64
churned
0    593.080063
1    463.218981
Name: usage_duration, dtype: float64


In [None]:
joined_df.groupby('feature_name')['usage_duration'].mean().plot.bar()
plt.ylabel('usage_duration mean')

In [None]:
joined_df.groupby('churned')['feature_name'].value_counts(normalize =True).unstack(0).plot.bar()
plt.ylabel("normalized usage of features")

In [None]:
joined_df.groupby('churned')['usage_duration'].mean().plot.bar()

In [None]:
joined_df.groupby(['churned','feature_name'])['usage_duration'].mean().unstack(0).plot.bar()
plt.ylabel('mean usage_duration')

In [None]:
ax = joined_df[joined_df['accepted'] == 0].groupby(['churned'])['feature_name'].value_counts(normalize=True).unstack(0).plot.bar(alpha=0.5,figsize=(15,5), color=['g','r'])
joined_df[joined_df['accepted'] == 1].groupby(['churned'])['feature_name'].value_counts(normalize=True).unstack(0).plot.bar(ax=ax, alpha=0.5,figsize=(15,5))

In [None]:
joined_df.groupby(['churned', 'accepted'])['feature_name'].value_counts(normalize=True).unstack([2]).plot.bar(figsize=(15,5))

## Here is te code for clustering the sessions

In [8]:
import datetime
joined_df['end_use_date'] = pd.to_datetime(joined_df['end_use_date'])
joined_df['start_use_date'] = joined_df['end_use_date'] - joined_df['usage_duration'].apply(lambda x: datetime.timedelta(seconds=x))

In [59]:
def cluster(g):
    soted_g = g.sort_values(by='end_use_date')
    soted_g['delta'] = soted_g['start_use_date'].shift(-1) - soted_g['end_use_date']
    soted_g['delta'] = soted_g['delta'].apply(lambda x: x.total_seconds())
    soted_g['delta'] = soted_g['delta'].fillna(0)
    soted_g['class'] = 0
    soted_g.loc[soted_g['delta'] > 1000,'class'] = 1
    soted_g['class'] = soted_g['class'].cumsum()
    return pd.DataFrame(soted_g.index, soted_g['class'])
clustered_group = joined_df.groupby(['id']).apply(cluster)
clustered_group = clustered_group.reset_index()

In [64]:
clustered_group = clustered_group.set_index(0)

In [65]:
clustered_group.merge(joined_df, left_index=True, right_index=True, how='inner')

Unnamed: 0,id_x,class,id_y,feature_name,usage_duration,end_use_date,accepted,installation_date,subscripiton_date,country,initial_app_version,initial_ios_version,initial_device,days_until_churn,churned,start_use_date,new_id
123481,20000443246052,0,20000443246052,Vignette,399.849,2018-04-01 11:32:07,True,2017-11-29 11:16:40,2018-04-01 11:30:41,Brazil,1.9.5,11.2.6,UIDeviceKindIPhone6S,,0,2018-04-01 11:25:27.151,20000443246052
100827,20000443246052,0,20000443246052,Crop,59.463,2018-04-01 11:33:10,False,2017-11-29 11:16:40,2018-04-01 11:30:41,Brazil,1.9.5,11.2.6,UIDeviceKindIPhone6S,,0,2018-04-01 11:32:10.537,20000443246052
126858,20000443246052,0,20000443246052,Crop,6.961,2018-04-01 11:33:19,True,2017-11-29 11:16:40,2018-04-01 11:30:41,Brazil,1.9.5,11.2.6,UIDeviceKindIPhone6S,,0,2018-04-01 11:33:12.039,20000443246052
111938,20000443246052,0,20000443246052,Retouch,73.048,2018-04-01 11:34:57,True,2017-11-29 11:16:40,2018-04-01 11:30:41,Brazil,1.9.5,11.2.6,UIDeviceKindIPhone6S,,0,2018-04-01 11:33:43.952,20000443246052
122873,20000443246052,0,20000443246052,Prism,6.905,2018-04-01 11:35:09,False,2017-11-29 11:16:40,2018-04-01 11:30:41,Brazil,1.9.5,11.2.6,UIDeviceKindIPhone6S,,0,2018-04-01 11:35:02.095,20000443246052
124160,20000443246052,1,20000443246052,Touch Up,7.568,2018-04-01 11:35:25,True,2017-11-29 11:16:40,2018-04-01 11:30:41,Brazil,1.9.5,11.2.6,UIDeviceKindIPhone6S,,0,2018-04-01 11:35:17.432,20000443246052
109467,20000443246052,1,20000443246052,Light FX,29.128,2018-04-01 18:24:31,False,2017-11-29 11:16:40,2018-04-01 11:30:41,Brazil,1.9.5,11.2.6,UIDeviceKindIPhone6S,,0,2018-04-01 18:24:01.872,20000443246052
122064,20000443246052,1,20000443246052,Crop,18.784,2018-04-01 18:24:53,True,2017-11-29 11:16:40,2018-04-01 11:30:41,Brazil,1.9.5,11.2.6,UIDeviceKindIPhone6S,,0,2018-04-01 18:24:34.216,20000443246052
124840,20000443246052,1,20000443246052,Retouch,682.491,2018-04-01 18:36:26,True,2017-11-29 11:16:40,2018-04-01 11:30:41,Brazil,1.9.5,11.2.6,UIDeviceKindIPhone6S,,0,2018-04-01 18:25:03.509,20000443246052
126331,20000443246052,1,20000443246052,Retouch,50.600,2018-04-01 18:37:20,True,2017-11-29 11:16:40,2018-04-01 11:30:41,Brazil,1.9.5,11.2.6,UIDeviceKindIPhone6S,,0,2018-04-01 18:36:29.400,20000443246052
