In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re

# 한글 폰트 설정
plt.rcParams['font.family'] = 'Malgun Gothic'
plt.rcParams['axes.unicode_minus'] = False ## 마이너스 부호 깨짐 방지

pd.set_option('display.max_columns', None)                       # 출력할 열의 최대개수
pd.set_option('display.max_colwidth', None)                      # 출력할 열의 너비
# pd.reset_option('display.max_rows', None)                    # 출력할 행의 최대개수  None
pd.set_option('display.unicode.east_asian_width', True) 

### 컬럼 설명
### train_users_2.csv
- id : 사용자 아이디
- date_account_created: 계정 생성 날짜
- timestamp_first_active: 첫 번째 활동의 타임스탬프입니다. 사용자가 가입하기 전에 검색할 수 있으므로 date_account_created 또는 date_first_booking보다 이전일 수 있습니다.
- date_first_booking: 첫 예약 날짜
- 성별
- 나이
- signup_method - 가입_방법
- signup_flow: 사용자가 가입하기 위해 온 페이지
- language: 국제 언어 기본 설정
- affili_channel: 유료 마케팅의 종류
- affiliate_provider: 마케팅이 이루어지는 곳. 구글, 크레이그리스트, 기타
- first_affiliate_tracked: 가입하기 전에 사용자가 처음으로 상호작용한 마케팅은 무엇입니까?
- signup_app - 가입_앱
- first_device_type - 첫 번째_장치_유형
- first_browser - 첫 번째_브라우저
- country_destination: 예측할 대상 변수입니다.
### session.csv - 사용자에 대한 웹 세션 로그
- user_id: users 테이블의 'id' 컬럼과 조인됩니다.
- action - 행동 
- action_type
- action_detail - 액션_디테일
- device_type - 기기 종류
- secs_elapsed - 초_경과 
#### country.csv - 이 데이터세트의 목적지 국가와 해당 위치에 대한 요약 통계
#### age_gender_bkts.csv - 사용자의 연령대, 성별, 목적지 국가에 대한 요약 통계
#### Sample_submission.csv - 예측 제출을 위한 올바른 형식

In [4]:
## sessions
sessions = pd.read_csv('sessions.csv')
sessions.head()

Unnamed: 0,user_id,action,action_type,action_detail,device_type,secs_elapsed
0,d1mm9tcy42,lookup,,,Windows Desktop,319.0
1,d1mm9tcy42,search_results,click,view_search_results,Windows Desktop,67753.0
2,d1mm9tcy42,lookup,,,Windows Desktop,301.0
3,d1mm9tcy42,search_results,click,view_search_results,Windows Desktop,22141.0
4,d1mm9tcy42,lookup,,,Windows Desktop,435.0


In [5]:
sessions['user_flow'] = sessions['action'].astype('str') + '+' + sessions['action_type'].astype('str') + '+' + sessions['action_detail'].astype('str')

In [6]:
## train_users_2
train = pd.read_csv('train_users_2.csv')
train.rename(columns={'id':'user_id'},inplace=True)
train.head()

Unnamed: 0,user_id,date_account_created,timestamp_first_active,date_first_booking,gender,age,signup_method,signup_flow,language,affiliate_channel,affiliate_provider,first_affiliate_tracked,signup_app,first_device_type,first_browser,country_destination
0,gxn3p5htnn,2010-06-28,20090319043255,,-unknown-,,facebook,0,en,direct,direct,untracked,Web,Mac Desktop,Chrome,NDF
1,820tgsjxq7,2011-05-25,20090523174809,,MALE,38.0,facebook,0,en,seo,google,untracked,Web,Mac Desktop,Chrome,NDF
2,4ft3gnwmtx,2010-09-28,20090609231247,2010-08-02,FEMALE,56.0,basic,3,en,direct,direct,untracked,Web,Windows Desktop,IE,US
3,bjjt8pjhuk,2011-12-05,20091031060129,2012-09-08,FEMALE,42.0,facebook,0,en,direct,direct,untracked,Web,Mac Desktop,Firefox,other
4,87mebub9p4,2010-09-14,20091208061105,2010-02-18,-unknown-,41.0,basic,0,en,direct,direct,untracked,Web,Mac Desktop,Chrome,US


In [7]:
train['timestamp_first_active'] = pd.to_datetime(train['timestamp_first_active'], format = '%Y%m%d%H%M%S').dt.strftime('%Y-%m-%d')
train['date_account_created'] = pd.to_datetime(train['date_account_created'])
train['date_first_booking'] = pd.to_datetime(train['date_first_booking'], errors='coerce')

In [8]:
train.isnull().sum()

user_id                         0
date_account_created            0
timestamp_first_active          0
date_first_booking         124543
gender                          0
age                         87990
signup_method                   0
signup_flow                     0
language                        0
affiliate_channel               0
affiliate_provider              0
first_affiliate_tracked      6065
signup_app                      0
first_device_type               0
first_browser                   0
country_destination             0
dtype: int64

### Train_Session 병합

In [9]:
df = pd.merge(sessions, train, on='user_id', how='inner')
df

Unnamed: 0,user_id,action,action_type,action_detail,device_type,secs_elapsed,user_flow,date_account_created,timestamp_first_active,date_first_booking,gender,age,signup_method,signup_flow,language,affiliate_channel,affiliate_provider,first_affiliate_tracked,signup_app,first_device_type,first_browser,country_destination
0,d1mm9tcy42,lookup,,,Windows Desktop,319.0,lookup+nan+nan,2014-01-01,2014-01-01,2014-01-04,MALE,62.0,basic,0,en,sem-non-brand,google,omg,Web,Windows Desktop,Chrome,other
1,d1mm9tcy42,search_results,click,view_search_results,Windows Desktop,67753.0,search_results+click+view_search_results,2014-01-01,2014-01-01,2014-01-04,MALE,62.0,basic,0,en,sem-non-brand,google,omg,Web,Windows Desktop,Chrome,other
2,d1mm9tcy42,lookup,,,Windows Desktop,301.0,lookup+nan+nan,2014-01-01,2014-01-01,2014-01-04,MALE,62.0,basic,0,en,sem-non-brand,google,omg,Web,Windows Desktop,Chrome,other
3,d1mm9tcy42,search_results,click,view_search_results,Windows Desktop,22141.0,search_results+click+view_search_results,2014-01-01,2014-01-01,2014-01-04,MALE,62.0,basic,0,en,sem-non-brand,google,omg,Web,Windows Desktop,Chrome,other
4,d1mm9tcy42,lookup,,,Windows Desktop,435.0,lookup+nan+nan,2014-01-01,2014-01-01,2014-01-04,MALE,62.0,basic,0,en,sem-non-brand,google,omg,Web,Windows Desktop,Chrome,other
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5537952,nw9fwlyb5f,index,data,reservations,iPhone,245.0,index+data+reservations,2014-06-30,2014-06-30,NaT,-unknown-,,basic,25,en,direct,direct,untracked,iOS,iPhone,-unknown-,NDF
5537953,nw9fwlyb5f,unavailabilities,data,unavailable_dates,iPhone,286.0,unavailabilities+data+unavailable_dates,2014-06-30,2014-06-30,NaT,-unknown-,,basic,25,en,direct,direct,untracked,iOS,iPhone,-unknown-,NDF
5537954,nw9fwlyb5f,notifications,submit,notifications,iPhone,830.0,notifications+submit+notifications,2014-06-30,2014-06-30,NaT,-unknown-,,basic,25,en,direct,direct,untracked,iOS,iPhone,-unknown-,NDF
5537955,nw9fwlyb5f,search,click,view_search_results,iPhone,101961.0,search+click+view_search_results,2014-06-30,2014-06-30,NaT,-unknown-,,basic,25,en,direct,direct,untracked,iOS,iPhone,-unknown-,NDF


# 예약 (1)  vs 비예약 (0) 구분

In [10]:
df['binary_target'] = df['country_destination'].replace({'NDF': 0}, inplace=False)
df.loc[df['binary_target'] != 0, 'binary_target'] = 1

# 가입하지 않고 예약한 자 확인
- 로그 기록된 인원은 예약을 먼저하고 가입한 인원이 존재하지 않는다. 따라서 세션 데이터 유저는 가입 후 행동이 기록된다. 

In [11]:
df['booking_creation_delay'] = df['date_first_booking'] - df['date_account_created']

In [12]:
df[df['booking_creation_delay'] < pd.Timedelta(0)]

Unnamed: 0,user_id,action,action_type,action_detail,device_type,secs_elapsed,user_flow,date_account_created,timestamp_first_active,date_first_booking,gender,age,signup_method,signup_flow,language,affiliate_channel,affiliate_provider,first_affiliate_tracked,signup_app,first_device_type,first_browser,country_destination,binary_target,booking_creation_delay


## action 계층 분석 

In [13]:
df['user_flow']

0                                    lookup+nan+nan
1          search_results+click+view_search_results
2                                    lookup+nan+nan
3          search_results+click+view_search_results
4                                    lookup+nan+nan
                             ...                   
5537952                     index+data+reservations
5537953     unavailabilities+data+unavailable_dates
5537954          notifications+submit+notifications
5537955            search+click+view_search_results
5537956            search+click+view_search_results
Name: user_flow, Length: 5537957, dtype: object

In [22]:
df[ (df['action_type'].isnull()) & (df['action_detail'].isnull()) ]['action'].unique()


array(['lookup', 'show', 'track_page_view', 'widget', 'uptodate', 'index',
       'campaigns', 'currencies', 'localization_settings', 'update',
       'phone_verification', 'satisfy', 'track_activity', 'check',
       'signed_out_modal', 'disaster_action', 'similar_listings_v2'],
      dtype=object)

In [26]:
df

Unnamed: 0,user_id,action,action_type,action_detail,device_type,secs_elapsed,user_flow,date_account_created,timestamp_first_active,date_first_booking,gender,age,signup_method,signup_flow,language,affiliate_channel,affiliate_provider,first_affiliate_tracked,signup_app,first_device_type,first_browser,country_destination,binary_target,booking_creation_delay
0,d1mm9tcy42,lookup,,,Windows Desktop,319.0,lookup+nan+nan,2014-01-01,2014-01-01,2014-01-04,MALE,62.0,basic,0,en,sem-non-brand,google,omg,Web,Windows Desktop,Chrome,other,1,3 days
1,d1mm9tcy42,search_results,click,view_search_results,Windows Desktop,67753.0,search_results+click+view_search_results,2014-01-01,2014-01-01,2014-01-04,MALE,62.0,basic,0,en,sem-non-brand,google,omg,Web,Windows Desktop,Chrome,other,1,3 days
2,d1mm9tcy42,lookup,,,Windows Desktop,301.0,lookup+nan+nan,2014-01-01,2014-01-01,2014-01-04,MALE,62.0,basic,0,en,sem-non-brand,google,omg,Web,Windows Desktop,Chrome,other,1,3 days
3,d1mm9tcy42,search_results,click,view_search_results,Windows Desktop,22141.0,search_results+click+view_search_results,2014-01-01,2014-01-01,2014-01-04,MALE,62.0,basic,0,en,sem-non-brand,google,omg,Web,Windows Desktop,Chrome,other,1,3 days
4,d1mm9tcy42,lookup,,,Windows Desktop,435.0,lookup+nan+nan,2014-01-01,2014-01-01,2014-01-04,MALE,62.0,basic,0,en,sem-non-brand,google,omg,Web,Windows Desktop,Chrome,other,1,3 days
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5537952,nw9fwlyb5f,index,data,reservations,iPhone,245.0,index+data+reservations,2014-06-30,2014-06-30,NaT,-unknown-,,basic,25,en,direct,direct,untracked,iOS,iPhone,-unknown-,NDF,0,NaT
5537953,nw9fwlyb5f,unavailabilities,data,unavailable_dates,iPhone,286.0,unavailabilities+data+unavailable_dates,2014-06-30,2014-06-30,NaT,-unknown-,,basic,25,en,direct,direct,untracked,iOS,iPhone,-unknown-,NDF,0,NaT
5537954,nw9fwlyb5f,notifications,submit,notifications,iPhone,830.0,notifications+submit+notifications,2014-06-30,2014-06-30,NaT,-unknown-,,basic,25,en,direct,direct,untracked,iOS,iPhone,-unknown-,NDF,0,NaT
5537955,nw9fwlyb5f,search,click,view_search_results,iPhone,101961.0,search+click+view_search_results,2014-06-30,2014-06-30,NaT,-unknown-,,basic,25,en,direct,direct,untracked,iOS,iPhone,-unknown-,NDF,0,NaT


In [25]:
df.isnull().sum()

user_id                          0
action                       51532
action_type                 619860
action_detail               619860
device_type                      0
secs_elapsed                 73815
user_flow                        0
date_account_created             0
timestamp_first_active           0
date_first_booking         3057710
gender                           0
age                        2115437
signup_method                    0
signup_flow                      0
language                         0
affiliate_channel                0
affiliate_provider               0
first_affiliate_tracked       1341
signup_app                       0
first_device_type                0
first_browser                    0
country_destination              0
binary_target                    0
booking_creation_delay     3057710
dtype: int64

In [31]:
nan_ac = df[(df['action_type'].isnull())&(df['action_detail'].isnull())]['action'].unique()

In [32]:
un_ac = df[(df['action_type']=='-unknown-')&(df['action_detail']=='-unknown-')]['action'].unique()

In [34]:
len(set(nan_ac) ^ set(un_ac))

239

In [86]:
set(nan_ac) & set(un_ac)

{'campaigns', 'currencies', 'index', 'localization_settings', 'show', 'update'}

In [36]:
set(nan_ac) & set(un_ac)

{'campaigns', 'currencies', 'index', 'localization_settings', 'show', 'update'}

## session에 유저의 인덱스가 섞여 있는지 확인

In [37]:
def count_user_chages(df):
    count=0
    prev_user_id = None

    for user_id in df['user_id']:
        if prev_user_id is None:
            prev_user_id = user_id
        elif user_id != prev_user_id:
            count+=1
            prev_user_id = user_id
    if prev_user_id is not None:
        count +=1
    return count

result = count_user_chages(df)
print(result)

73815


In [64]:
len(df['user_id'].unique())

73815

## 유저별 action order 생성

In [None]:
df['action_order'] = df.groupby('user_id').cumcount() + 1

In [66]:
df

Unnamed: 0,user_id,action,action_type,action_detail,device_type,secs_elapsed,user_flow,date_account_created,timestamp_first_active,date_first_booking,gender,age,signup_method,signup_flow,language,affiliate_channel,affiliate_provider,first_affiliate_tracked,signup_app,first_device_type,first_browser,country_destination,binary_target,booking_creation_delay,action_order
0,d1mm9tcy42,lookup,,,Windows Desktop,319.0,lookup+nan+nan,2014-01-01,2014-01-01,2014-01-04,MALE,62.0,basic,0,en,sem-non-brand,google,omg,Web,Windows Desktop,Chrome,other,1,3 days,1
1,d1mm9tcy42,search_results,click,view_search_results,Windows Desktop,67753.0,search_results+click+view_search_results,2014-01-01,2014-01-01,2014-01-04,MALE,62.0,basic,0,en,sem-non-brand,google,omg,Web,Windows Desktop,Chrome,other,1,3 days,2
2,d1mm9tcy42,lookup,,,Windows Desktop,301.0,lookup+nan+nan,2014-01-01,2014-01-01,2014-01-04,MALE,62.0,basic,0,en,sem-non-brand,google,omg,Web,Windows Desktop,Chrome,other,1,3 days,3
3,d1mm9tcy42,search_results,click,view_search_results,Windows Desktop,22141.0,search_results+click+view_search_results,2014-01-01,2014-01-01,2014-01-04,MALE,62.0,basic,0,en,sem-non-brand,google,omg,Web,Windows Desktop,Chrome,other,1,3 days,4
4,d1mm9tcy42,lookup,,,Windows Desktop,435.0,lookup+nan+nan,2014-01-01,2014-01-01,2014-01-04,MALE,62.0,basic,0,en,sem-non-brand,google,omg,Web,Windows Desktop,Chrome,other,1,3 days,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5537952,nw9fwlyb5f,index,data,reservations,iPhone,245.0,index+data+reservations,2014-06-30,2014-06-30,NaT,-unknown-,,basic,25,en,direct,direct,untracked,iOS,iPhone,-unknown-,NDF,0,NaT,37
5537953,nw9fwlyb5f,unavailabilities,data,unavailable_dates,iPhone,286.0,unavailabilities+data+unavailable_dates,2014-06-30,2014-06-30,NaT,-unknown-,,basic,25,en,direct,direct,untracked,iOS,iPhone,-unknown-,NDF,0,NaT,38
5537954,nw9fwlyb5f,notifications,submit,notifications,iPhone,830.0,notifications+submit+notifications,2014-06-30,2014-06-30,NaT,-unknown-,,basic,25,en,direct,direct,untracked,iOS,iPhone,-unknown-,NDF,0,NaT,39
5537955,nw9fwlyb5f,search,click,view_search_results,iPhone,101961.0,search+click+view_search_results,2014-06-30,2014-06-30,NaT,-unknown-,,basic,25,en,direct,direct,untracked,iOS,iPhone,-unknown-,NDF,0,NaT,40


In [77]:
df[df['user_flow'].str.contains('nan')]

Unnamed: 0,user_id,action,action_type,action_detail,device_type,secs_elapsed,user_flow,date_account_created,timestamp_first_active,date_first_booking,gender,age,signup_method,signup_flow,language,affiliate_channel,affiliate_provider,first_affiliate_tracked,signup_app,first_device_type,first_browser,country_destination,binary_target,booking_creation_delay,action_order
0,d1mm9tcy42,lookup,,,Windows Desktop,319.0,lookup+nan+nan,2014-01-01,2014-01-01,2014-01-04,MALE,62.0,basic,0,en,sem-non-brand,google,omg,Web,Windows Desktop,Chrome,other,1,3 days,1
2,d1mm9tcy42,lookup,,,Windows Desktop,301.0,lookup+nan+nan,2014-01-01,2014-01-01,2014-01-04,MALE,62.0,basic,0,en,sem-non-brand,google,omg,Web,Windows Desktop,Chrome,other,1,3 days,3
4,d1mm9tcy42,lookup,,,Windows Desktop,435.0,lookup+nan+nan,2014-01-01,2014-01-01,2014-01-04,MALE,62.0,basic,0,en,sem-non-brand,google,omg,Web,Windows Desktop,Chrome,other,1,3 days,5
6,d1mm9tcy42,lookup,,,Windows Desktop,115.0,lookup+nan+nan,2014-01-01,2014-01-01,2014-01-04,MALE,62.0,basic,0,en,sem-non-brand,google,omg,Web,Windows Desktop,Chrome,other,1,3 days,7
9,d1mm9tcy42,lookup,,,Windows Desktop,683.0,lookup+nan+nan,2014-01-01,2014-01-01,2014-01-04,MALE,62.0,basic,0,en,sem-non-brand,google,omg,Web,Windows Desktop,Chrome,other,1,3 days,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5537921,nw9fwlyb5f,campaigns,,,iPhone,59032.0,campaigns+nan+nan,2014-06-30,2014-06-30,NaT,-unknown-,,basic,25,en,direct,direct,untracked,iOS,iPhone,-unknown-,NDF,0,NaT,6
5537924,nw9fwlyb5f,campaigns,,,iPhone,10.0,campaigns+nan+nan,2014-06-30,2014-06-30,NaT,-unknown-,,basic,25,en,direct,direct,untracked,iOS,iPhone,-unknown-,NDF,0,NaT,9
5537928,nw9fwlyb5f,campaigns,,,iPhone,22.0,campaigns+nan+nan,2014-06-30,2014-06-30,NaT,-unknown-,,basic,25,en,direct,direct,untracked,iOS,iPhone,-unknown-,NDF,0,NaT,13
5537930,nw9fwlyb5f,campaigns,,,iPhone,33.0,campaigns+nan+nan,2014-06-30,2014-06-30,NaT,-unknown-,,basic,25,en,direct,direct,untracked,iOS,iPhone,-unknown-,NDF,0,NaT,15


In [79]:
nan_index = df[df['user_flow'].str.contains('nan')].index

# 선택된 인덱스 주변의 행을 포함하는 새로운 데이터프레임 생성
start_index = max(0, nan_index.min() - 1)
end_index = min(len(df), nan_index.max() + 2)
selected_df = df.iloc[start_index:end_index]

selected_df


Unnamed: 0,user_id,action,action_type,action_detail,device_type,secs_elapsed,user_flow,date_account_created,timestamp_first_active,date_first_booking,gender,age,signup_method,signup_flow,language,affiliate_channel,affiliate_provider,first_affiliate_tracked,signup_app,first_device_type,first_browser,country_destination,binary_target,booking_creation_delay,action_order
0,d1mm9tcy42,lookup,,,Windows Desktop,319.0,lookup+nan+nan,2014-01-01,2014-01-01,2014-01-04,MALE,62.0,basic,0,en,sem-non-brand,google,omg,Web,Windows Desktop,Chrome,other,1,3 days,1
1,d1mm9tcy42,search_results,click,view_search_results,Windows Desktop,67753.0,search_results+click+view_search_results,2014-01-01,2014-01-01,2014-01-04,MALE,62.0,basic,0,en,sem-non-brand,google,omg,Web,Windows Desktop,Chrome,other,1,3 days,2
2,d1mm9tcy42,lookup,,,Windows Desktop,301.0,lookup+nan+nan,2014-01-01,2014-01-01,2014-01-04,MALE,62.0,basic,0,en,sem-non-brand,google,omg,Web,Windows Desktop,Chrome,other,1,3 days,3
3,d1mm9tcy42,search_results,click,view_search_results,Windows Desktop,22141.0,search_results+click+view_search_results,2014-01-01,2014-01-01,2014-01-04,MALE,62.0,basic,0,en,sem-non-brand,google,omg,Web,Windows Desktop,Chrome,other,1,3 days,4
4,d1mm9tcy42,lookup,,,Windows Desktop,435.0,lookup+nan+nan,2014-01-01,2014-01-01,2014-01-04,MALE,62.0,basic,0,en,sem-non-brand,google,omg,Web,Windows Desktop,Chrome,other,1,3 days,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5537941,nw9fwlyb5f,active,-unknown-,-unknown-,iPhone,159.0,active+-unknown-+-unknown-,2014-06-30,2014-06-30,NaT,-unknown-,,basic,25,en,direct,direct,untracked,iOS,iPhone,-unknown-,NDF,0,NaT,26
5537942,nw9fwlyb5f,create,submit,signup,iPhone,,create+submit+signup,2014-06-30,2014-06-30,NaT,-unknown-,,basic,25,en,direct,direct,untracked,iOS,iPhone,-unknown-,NDF,0,NaT,27
5537943,nw9fwlyb5f,active,-unknown-,-unknown-,iPhone,681.0,active+-unknown-+-unknown-,2014-06-30,2014-06-30,NaT,-unknown-,,basic,25,en,direct,direct,untracked,iOS,iPhone,-unknown-,NDF,0,NaT,28
5537944,nw9fwlyb5f,campaigns,,,iPhone,100.0,campaigns+nan+nan,2014-06-30,2014-06-30,NaT,-unknown-,,basic,25,en,direct,direct,untracked,iOS,iPhone,-unknown-,NDF,0,NaT,29


In [88]:
# 'binary_target' = 1

max_wait_time_index = df.groupby('user_id')['secs_elapsed'].idxmax()
result = df.loc[max_wait_time_index]


action_counts = result['user_flow'].value_counts()

print(action_counts.index)
print(action_counts.values)

  max_wait_time_index = df.groupby('user_id')['secs_elapsed'].idxmax()
  max_wait_time_index = df.groupby('user_id')['secs_elapsed'].idxmax()
  max_wait_time_index = df.groupby('user_id')['secs_elapsed'].idxmax()
  max_wait_time_index = df.groupby('user_id')['secs_elapsed'].idxmax()
  max_wait_time_index = df.groupby('user_id')['secs_elapsed'].idxmax()
  max_wait_time_index = df.groupby('user_id')['secs_elapsed'].idxmax()
  max_wait_time_index = df.groupby('user_id')['secs_elapsed'].idxmax()
  max_wait_time_index = df.groupby('user_id')['secs_elapsed'].idxmax()
  max_wait_time_index = df.groupby('user_id')['secs_elapsed'].idxmax()
  max_wait_time_index = df.groupby('user_id')['secs_elapsed'].idxmax()
  max_wait_time_index = df.groupby('user_id')['secs_elapsed'].idxmax()
  max_wait_time_index = df.groupby('user_id')['secs_elapsed'].idxmax()
  max_wait_time_index = df.groupby('user_id')['secs_elapsed'].idxmax()
  max_wait_time_index = df.groupby('user_id')['secs_elapsed'].idxmax()
  max_

KeyboardInterrupt: 

In [92]:
df.groupby('action_type')['action_detail'].list()


AttributeError: 'SeriesGroupBy' object has no attribute 'list'

In [109]:
x = df['action_detail'].unique().tolist()
for i in x[40:60]:
    print(i)

at_checkpoint
manage_listing
create_listing
your_listings
profile_references
list_your_space
popular_wishlists
listing_reviews_page
apply_coupon
user_tax_forms
account_payout_preferences
guest_itinerary
guest_receipt
account_privacy_settings
lookup_message_thread
friends_wishlists
host_guarantee
delete_phone_numbers
account_transaction_history
set_password


In [125]:
df[df['action_type']=='click']['user_flow'].unique()

len(df[df['action_type']=='click']['user_flow'].unique())


35

In [119]:
df['action_type'].unique()


array([nan, 'click', 'data', 'view', 'submit', 'message_post',
       '-unknown-', 'booking_request', 'partner_callback',
       'booking_response'], dtype=object)

In [136]:
max_length = max(len(str(i)) for i in df['action_type'].unique())

for i in df['action_type'].unique():
    print('action type:',str(i).ljust(max_length),' ','action combination count =',len(df[df['action_type']==i]['user_flow'].unique()))


    

action type: nan                action combination count = 0
action type: click              action combination count = 35
action type: data               action combination count = 19
action type: view               action combination count = 52
action type: submit             action combination count = 30
action type: message_post       action combination count = 10
action type: -unknown-          action combination count = 234
action type: booking_request    action combination count = 2
action type: partner_callback   action combination count = 1
action type: booking_response   action combination count = 1
