In [98]:
import pandas as pd
from datetime import datetime
from collections import namedtuple
from util import cfg, load_file, read_csv, gmfname

import time
import numpy as np
import pickle

In [99]:
#second version -> 월마다 preprocess해서 합치기
class SeqPreProcess():
    
    def __init__(self, start, end=None):
        
        startmth = pd.to_datetime(start, format='%Y%m')
        self.date_ranges = None
        if end is not None:
            endmth = pd.to_datetime(end, format='%Y%m')
            self.date_ranges = pd.date_range(start=startmth, end=endmth, freq='M')
        else:
            self.date_ranges = [startmth]
        
    def preprocess(self):
        
        #page_code와 label 불러오기
        self.pcd, self.code2name = prep_pagecd(read_csv(cfg.pgcd))
        self.lbl = read_csv(cfg.label)   
        
        for date in self.date_ranges:
            print(f"### {date} 데이터 전처리 시작")
            merged_df = self._preprocess_per_mth(date)
            print(f'### {date} 데이터 전처리 완료')
            print()
            
        return merged_df
    
    def load_dataset(self, date):
        return load_file(date.year, date.month)
    
    def _preprocess_per_mth(self, date):
        
        appdf, mbrdf, gmdf = self.load_dataset(date)
        #goal_mission은 이전 달까지 추가로 불러야 함
        prev_date = date - pd.DateOffset(months=1)
        gmdf_prev = read_csv(gmfname(prev_date.year, prev_date.month))
        
        print('label 전처리 중')
        lblmth = prep_lbl_per_mth(self.lbl, date)
        print('label 전처리 완료')
        print()
        
        print('applog 전처리 중')
        appdf = prep_applog_per_mth(appdf, self.pcd, lblmth)
        print('applog 전처리 완료')
        print()
        
        print('member 전처리 중')
        mbrdf = prep_mbrlog_per_mth(mbrdf)
        print('member 전처리 완료')
        print()
        
        print('goal_mission 전처리 중')
        gmdf = pd.concat([gmdf_prev, gmdf], axis=0)
        gmdf = gmdf.sort_values(['party_id','p_event_apl_dte'])
        gmdf = prep_gmlog_per_mth(gmdf)
        print('goal_mission 전처리 완료')
        print()
        
        print('세 데이터 합치는 중')
        merged_df = merge_app_and_mbr(appdf, mbrdf)
        merged_df = merge_app_and_gm(merged_df, gmdf)
        
        return merged_df
        
class SeqDataSet():
    
    """create seq dataset after preprocess"""
    
    def __init__(year, month, maxlen=4):
        
        self.year = year
        self.month = month
        self.maxlen = maxlen
        
    def create_seq_dataset():
        return
    
    
def basic_prep_applog_per_mth(df, pcd, lblmth):
    
    #null제거
    print('Orig. data len:', len(df))
    df = df.dropna()
    print('Aft. drop-nan:', len(df))
    
    #방문일시 변경
    vst_dtm = df['vst_dtm'].astype('str')
    f = lambda x: x[:-3]
    vst_dtm = vst_dtm.apply(f)
    vst_dtm = pd.to_datetime(df['vst_dtm'], format='%Y-%m-%d %H:%M:%S')
    df['vst_dtm'] = vst_dtm
    
    #sesn_id, sty_tms drop -> new_sesn_id를 sesn_id로, new_sty_tms를 sty_tms로
    df['sty_tms'] = df['new_sty_tms']
    df['sesn_id'] = df['new_sesn_id']
    df = df.drop(columns=['new_sesn_id','new_sty_tms'])

    #1970년대 데이터 제외
    df = df[df['vst_dtm'].dt.year != 1970]
    df = df.reset_index(drop=True)

    #session_id '#' 제거
    inds = np.where(df['sesn_id'] == '#')[0]
    df = df.drop(inds)
    
    #'month'칼럼 & 'dt'칼럼 추가
    df['month'] = df['vst_dtm'].dt.to_period('M')
    df['dt'] = df['vst_dtm'].dt.to_period('D')
    
    #sorting
    df = df.sort_values(['party_id', 'vst_dtm', 'sesn_id'])
    
    df = merge_app_and_pcd(df, pcd)
    df = merge_app_and_lbl(df, lblmth)
    
    return df

    
def prep_applog_per_mth(appdf, pcd, lblmth):
    
    appdf = basic_prep_applog_per_mth(appdf, pcd, lblmth)
    
    print('Before appdf len', len(appdf))
    #1.menu _nm_1 == Nan or menu_nm_2 == Nan인 경우로만 이뤄진 session_id 제거하기
    ##nan이 포함된 전체 고유 party_id와 session_id갯수
    menusess1 = appdf[['party_id','sesn_id','page_cd']].groupby(['party_id','sesn_id']).first().reset_index()
    menusess1 = menusess1[['party_id','sesn_id']]
    
    ## nan이 제거된 전체 고유 party_id와 session_id갯수
    menusess2 = appdf[['party_id','sesn_id','menu_nm_1','page_cd']].groupby(['party_id','sesn_id','menu_nm_1']).count().reset_index()
    menusess2 = menusess2[['party_id','sesn_id']].drop_duplicates()
    menusess3 = appdf[['party_id','sesn_id','menu_nm_2','page_cd']].groupby(['party_id','sesn_id','menu_nm_2']).count().reset_index()
    menusess3 = menusess3[['party_id','sesn_id']].drop_duplicates()
    
    menusess = pd.concat([menusess1, menusess2], axis=0)
    menusess = menusess.loc[~menusess.duplicated(keep=False)]
    
    pids_isin = np.isin(appdf['party_id'], menusess['party_id'])
    sess_isin = np.isin(appdf['sesn_id'], menusess['sesn_id'])
    
    appdf = appdf.loc[~np.all([pids_isin, sess_isin], axis=0)]
    print('after removing nan in category1', len(appdf))
    
    menusess = pd.concat([menusess1, menusess3], axis=0)
    menusess = menusess.loc[~menusess.duplicated(keep=False)]

    pids_isin = np.isin(appdf['party_id'], menusess['party_id'])
    sess_isin = np.isin(appdf['sesn_id'], menusess['sesn_id'])
    appdf = appdf.loc[~np.all([pids_isin, sess_isin], axis=0)]
    print('after removing nan in category2', len(appdf))
    
    #session간의 방문일자 차이
    seqdf = appdf[['party_id','page_cd','sesn_id','dt']].groupby(['party_id','sesn_id']).last()['dt']
    seqdf = seqdf.reset_index()
    seqdf = seqdf.sort_values(['party_id','dt'])
    seqdf = seqdf.reset_index(drop=True)
    
    def diff_vstdate(x):
        b = pd.concat([pd.Series(x['dt'].iloc[0]), x['dt'].iloc[:-1]]).reset_index(drop=True)
        seqdiff = x['dt'].reset_index(drop=True).dt.to_timestamp() - b.dt.to_timestamp()
        seqdiff.name = "diff_dt"
        return seqdiff
    
    diffdf = seqdf.groupby(['party_id']).apply(diff_vstdate)
    diffdf = diffdf.reset_index()
    seqdf = pd.concat([seqdf, diffdf['diff_dt']], axis=1)
    
    #session별 페이지 길이
    uni_pcd_depth1 = pcd['menu_nm_1'].unique()
    uni_pcd_depth2 = pcd['menu_nm_2'].unique()
    pglen_perse = appdf.groupby(['party_id','sesn_id']).count().reset_index()[['party_id','sesn_id','page_cd']]
    
    #카테고리별 방문횟수
    uv_per_d1 = appdf.groupby(['party_id','sesn_id','menu_nm_1']).count().reset_index()[['party_id','sesn_id','menu_nm_1','page_cd']]
    uv_per_d2 = appdf.groupby(['party_id','sesn_id','menu_nm_2']).count().reset_index()[['party_id','sesn_id','menu_nm_2','page_cd']]

    uv_per_d1 = uv_per_d1.pivot(index=['party_id','sesn_id'], columns='menu_nm_1', values='page_cd')
    uv_per_d1 = uv_per_d1.fillna(0).reset_index()

    uv_per_d2 = uv_per_d2.pivot(index=['party_id','sesn_id'], columns='menu_nm_2', values='page_cd')
    uv_per_d2 = uv_per_d2.fillna(0).reset_index()
    
    #카테고리별 체류시간
    stydepth1 = appdf[['party_id','sesn_id','menu_nm_1','sty_tms']].groupby(['party_id','sesn_id','menu_nm_1']).mean()
    stydepth1 = stydepth1.reset_index()
    stydepth1 = stydepth1.pivot(index=['party_id','sesn_id'], columns=['menu_nm_1'], values=['sty_tms'])
    stydepth1 = stydepth1.fillna(0).reset_index()
    
    stydepth2 = appdf[['party_id','sesn_id','menu_nm_2','sty_tms']].groupby(['party_id','sesn_id','menu_nm_2']).mean()
    stydepth2 = stydepth2.reset_index()
    stydepth2 = stydepth2.pivot(index=['party_id','sesn_id'], columns=['menu_nm_2'],values=['sty_tms'])
    stydepth2 = stydepth2.fillna(0).reset_index()
    
    #종료율 관련
    endmenu = appdf[['party_id','page_cd','sesn_id','menu_nm_1']].groupby(['party_id','sesn_id']).last().reset_index()
    endmenu['value'] = 1
    endmenu = endmenu.pivot(index=['party_id','sesn_id'], columns=['menu_nm_1'], values=['value'])
    endmenu = endmenu.fillna(0).reset_index()
    
    assert len(seqdf) == len(pglen_perse) == len(uv_per_d1) ==len(uv_per_d2) == len(stydepth1) == len(stydepth2) == len(endmenu), 'All of them should have same length'
    
    #merge
    cand_df = [pglen_perse, uv_per_d1, uv_per_d2, stydepth1, stydepth2, endmenu]
    for cand in cand_df:
        beflen = len(seqdf)
        seqdf = pd.merge(seqdf, cand, on=['party_id','sesn_id'])
        assert beflen == len(seqdf), 'they should have same length'

    return seqdf
    

def prep_lbl_per_mth(lbl, date):
    lbl['party_id'] = lbl['PartyId']
    lbl = lbl.drop(columns=['Unnamed: 0', 'PartyId'])
    lbl['month'] = pd.to_datetime(lbl['month'], format='%Y-%m')
    lblmth = lbl.loc[np.all([lbl['month'].dt.year==date.year, lbl['month'].dt.month==date.month], axis=0)]
    lblmth = lblmth.drop(columns=['month'])
    return lblmth

def prep_mbrlog_per_mth(mbrdf):
    
    def count_vtlt_age_eff_dt(x):
        count_vtlt_age = np.zeros(len(x['vtlt_age_eff_dt']), dtype=np.float32)
        vtlt_effs = np.unique(x['vtlt_age_eff_dt'])
        for eff in vtlt_effs:
            if eff == 99991231:
                continue
            else:
                ind = np.where(x['vtlt_age_eff_dt'] == eff)[0][0]
                count_vtlt_age[ind:] += 1
        return pd.Series(count_vtlt_age, name='count_vtlt_age')
    
    #dt -> datetime 으로 변경
    mbrdf['dt'] = pd.to_datetime(mbrdf['dt'], format='%Y%m%d')
#     mbrdf['dt'] = pd.to_datetime(mbrdf['dt'], format='%Y-%m-%d')
    
    #party_id당 dt순으로 sorting
    mbrdf = mbrdf.sort_values(['party_id', 'dt'])
    
    #사용안하는 컬럼 drop
    mbrdf = mbrdf.drop(columns=cfg.unused_mbrcol)
    
    #null제거
    print('Orig. data len:', len(mbrdf))
    mbrdf = mbrdf.dropna()
    print('Aft. drop-nan:', len(mbrdf), '\n')
    
    #party_id -> int형으로 변환
    mbrdf['party_id'] = mbrdf['party_id'].astype('int32')
    
    #바이탈리티 나이 측정 횟수 관련 전처리
    newcol = mbrdf.groupby(['party_id']).apply(count_vtlt_age_eff_dt)
    mbrdf['count_vtlt_age_dt'] = newcol.values
    
    #바이탈리티 나이 차이 관련 전처리
    mbrdf = mbrdf.reset_index(drop=True)
    inds = np.where(mbrdf['vtlt_age'] == 'NOT_ENOUGH_DATA')[0]
    mbrdf.loc[inds, 'vtlt_age'] = '0'
    mbrdf['vtlt_age'] = mbrdf['vtlt_age'].astype('int32')
    mbrdf['diff_age'] = mbrdf['vtlt_age'] - mbrdf['age']
    
    #주간미션달성률 관련 전처리
    mbrdf['achv_rat'] = mbrdf['cur_mbrsh_pd_goal_achv_cnt'] / mbrdf['cur_mbrsh_pd_goal_alct_cnt']
    
    #회원가입이후 경과일
    pids = np.unique(mbrdf.loc[mbrdf['mbr_scrb_dt'] == 99991231]['party_id'].values)
    newval = []
    passpids = []
    for pid in pids:
        pidmbrdf = mbrdf.loc[mbrdf['party_id'] == pid]
        vals = np.unique(pidmbrdf['mbr_scrb_dt'].values)
        inds = np.where(vals != 99991231)[0]
        if len(inds) > 1:
            newval.append(vals[np.where(vals != 99991231)[0][0]])
        else:
            mbrdf = mbrdf.drop(pidmbrdf.index)
            passpids.append(pid)
            
    for val, pid in zip(newval, pids):
        if pid in passpids:
            pass
        inds = np.where(mbrdf['party_id'] == pid)[0]
        mbrdf.loc[inds, 'mbr_scrb_dt'] = val 
    
    mbrdf['mbr_scrb_dt'] = pd.to_datetime(mbrdf['mbr_scrb_dt'], format='%Y%m%d')
    mbrdf['active_dur'] = mbrdf['dt'] - mbrdf['mbr_scrb_dt']
    
    #멤버십 등급 -> 1,2,3,4로 변경
    mbrsh_dic = {'Bronze': 1, 'Silver': 2, 'Gold': 3, 'Platinum': 4, '#':1}
    f = lambda x : mbrsh_dic[x]
    newmbrsh = mbrdf['cur_mbrsh_rwrd_st_cd'].transform(f)
    mbrdf['cur_mbrsh_rwrd_st_cd'] = newmbrsh
    
    #필요없는 칼럼 drop
    mbrdf = mbrdf.drop(columns=['vtlt_age_eff_dt', 'mbr_scrb_dt', 'cur_mbrsh_pd_goal_alct_cnt','cur_mbrsh_pd_goal_achv_cnt'])
    
    return mbrdf

def prep_gmlog_per_mth(gmdf):
    
    gmdf = gmdf[['party_id', 'p_event_apl_dte','points_value','points_effective_dte']]
    #gmdf = gmdf[cfg.used_gmcol]
    gmdf = gmdf.replace('#', np.nan)
    
    print('Orig. data len:', len(gmdf))
    gmdf = gmdf.dropna()
    print('Aft. drop-nan:', len(gmdf))
    
    #party_id -> int
    gmdf['party_id'] = gmdf['party_id'].astype('int32')
    
    #datetime형으로 변환
    gmdf['p_event_apl_dte'] = pd.to_datetime(gmdf['p_event_apl_dte'], format='%Y%m%d')
    gmdf['points_effective_dte'] = pd.to_datetime(gmdf['points_effective_dte'], format='%Y%m%d')
    
    #sorting
    gmdf = gmdf.sort_values(['party_id', 'p_event_apl_dte'])
    
    #(포인트 반영일 - 획득일) <= 10
    gmdf = gmdf.loc[(gmdf['p_event_apl_dte'] - gmdf['points_effective_dte']).dt.days <= 10]
    return gmdf

def prep_pagecd(pcd):
    pcd = pcd.reset_index(drop=True)
    pcd = pcd.drop(columns=['No'])
    
    code2name = {}
    for k, v in zip(pcd['page_cd'].values,  pcd['page_nm'].values):
        if pd.isnull(v):
            code2name[k]=k
        else:
            code2name[k]=v
    return pcd, code2name

def merge_app_and_pcd(df, pcd):
    return pd.merge(left=df, right=pcd[['page_cd','menu_nm_1','menu_nm_2']], on=['page_cd'], how='left', sort=False)

def merge_app_and_lbl(df, lbl):   
    return pd.merge(df, lbl, on=['party_id'], how='inner', sort=False)

def merge_app_and_mbr(seqdf, mbrdf):
    seqdf['dt'] = seqdf['dt'].dt.to_timestamp()
    return pd.merge(seqdf, mbrdf, on=['party_id','dt'], how='inner')

def merge_app_and_gm(seqdf, gmdf):
    
    pointsdf = gmdf[['party_id','p_event_apl_dte','points_value']]
    pointsdf['points_value'] = pointsdf['points_value'].astype('float32')
    pointsdf = pointsdf.groupby(['party_id','p_event_apl_dte']).sum()
    pointsdf = pointsdf.reset_index()
    pointsdf['dt'] = pointsdf['p_event_apl_dte']
    pointsdf = pointsdf.drop(columns=['p_event_apl_dte'])
    
    mergeddf = pd.merge(seqdf, pointsdf, on=['party_id','dt'], how='left')
    mergeddf[['achv_rat','points_value']] = mergeddf[['achv_rat','points_value']].fillna(value=0)
    return mergeddf

In [100]:
from util import *
from IPython.display import display, clear_output

In [4]:
date = '202002'
seqname = f'sunhwa/seq_{date}.csv'

In [5]:
start = pd.to_datetime('202001', format='%Y%m')
end = pd.to_datetime('202103', format='%Y%m')
dateranges = pd.date_range(start=start, end=end, freq='M')
dateranges

DatetimeIndex(['2020-01-31', '2020-02-29', '2020-03-31', '2020-04-30',
               '2020-05-31', '2020-06-30', '2020-07-31', '2020-08-31',
               '2020-09-30', '2020-10-31', '2020-11-30', '2020-12-31',
               '2021-01-31', '2021-02-28'],
              dtype='datetime64[ns]', freq='M')

- 1월 -> 148
- 2월 -> 148
- 3월 -> 150
- 4월 -> 150
- 5월 -> 158
- 6월 -> 158
- 7월 -> 162
- 8월 -> 157
- 9월 -> 165
- 10월 -> 170
- 11월 -> 186
- 12월 -> 
- 1월 ->
- 2월 -> 191

In [70]:
pcd.nunique()

menu_nm_1      23
menu_nm_2      59
page_nm      1291
page_cd      2242
No           2242
dtype: int64

In [7]:
dec = read_csv('sunhwa/seq_202012.csv')
jan = read_csv('sunhwa/seq_202001.csv')

  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):


In [14]:
print(jan.columns[:50])
print(dec.columns[:50])

Index(['Unnamed: 0', 'party_id', 'sesn_id', 'dt', 'diff_dt', 'page_cd',
       '가입/로그인/비밀번호', '건강', '건강걷기_x', '건강도전_x', '공통', '꿀팁', '내바이탈리티_x',
       '등급리워드_x', '디바이스연동_x', '마음챙김_x', '바이탈리티', '바이탈리티 나이_x', '보험', '설정',
       '이벤트_x', '주간 리워드_x', '주간미션_x', '헬시푸드_x', '활동', 'FAQ', '개인설정', '건강걷기_y',
       '건강도전_y', '건강메인', '공지사항', '공지팝업', '금연선언', '기초건강검진', '꿀팁 메인', '꿀팁-컨텐츠',
       '내바이탈리티_y', '다이렉트보험', '등급리워드_y', '디바이스연동_y', '로그인', '마음건강진단', '마음챙김_y',
       '멀티팝업', '바이탈리티 나이_y', '바이탈리티 메인', '비밀번호재설정', '서비스가이드', '시작', '약관'],
      dtype='object')
Index(['Unnamed: 0', 'party_id', 'sesn_id', 'dt', 'diff_dt', 'page_cd',
       '가입/로그인/비밀번호', '건강', '건강걷기_x', '건강도전_x', '공통', '꿀팁', '내바이탈리티_x',
       '등급리워드_x', '디바이스연동_x', '마음챙김_x', '바이탈리티', '바이탈리티 나이_x', '바이탈리티 할인_x',
       '보험', '사이트맵_x', '설정', '이벤트_x', '주간 리워드_x', '주간미션_x', '헬시푸드_x', '활동',
       'FAQ', '개인설정', '건강-배너', '건강걷기_y', '건강도전_y', '건강메인', '건강증진형보험', '공지사항',
       '공지팝업', '금연선언', '기초건강검진', '꿀팁 메인', '꿀팁-상단배너', '꿀팁-영양', '꿀팁-예방', '꿀

In [9]:
jan.columns

Index(['Unnamed: 0', 'party_id', 'sesn_id', 'dt', 'diff_dt', 'page_cd',
       '가입/로그인/비밀번호', '건강', '건강걷기_x', '건강도전_x',
       ...
       'age', 'vtlt_age', 'cur_mbrsh_rwrd_st_cd', 'cur_mbrsh_pd_acqr_pt',
       'push_alarm_yn', 'count_vtlt_age_dt', 'diff_age', 'achv_rat',
       'active_dur', 'points_value'],
      dtype='object', length=148)

In [17]:
pcd = read_csv(cfg.pgcd)

In [118]:
pcd = pcd.loc[pcd['menu_nm_1'] != '위젯']
pcd = pcd.loc[pcd['menu_nm_2'] != '위젯']

In [119]:
#nan제거
cat1 = pcd['menu_nm_1'].unique()[1:]
cat2 = pcd['menu_nm_2'].unique()[1:]

In [123]:
print(len(cat1), len(cat2))

22 58


In [185]:
def find_sty_ind(columns):
    for ind, col in enumerate(columns):
        if 'sty_tms' in col:
            return ind

def find_end_ind(columns):
    for ind, col in enumerate(columns):
        if 'value' in col:
            return ind
        
def appcol_names_pg(appcolnms):
    for cat in cat1:
        if cat in samecat:
            cat = cat + '_x'
        appcolnms.append(cat)
    for cat in cat2:
        if cat in samecat:
            cat = cat + '_y'
        appcolnms.append(cat)
    return appcolnms

def appcol_names_sty(appcolnms):
    for cat in cat1:
        if cat in samecat:
            cat = f"('sty_tms', '{cat}')_x"
        else:
            cat = f"('sty_tms', '{cat}')"
        appcolnms.append(cat)
    for cat in cat2:
        cat = f"('sty_tms_y', '{cat}')"
        appcolnms.append(cat)
    return appcolnms

def appcol_names_end(appcolnms):
    for cat in cat1:
        cat=f"('value', '{cat}')"
        appcolnms.append(cat)
    return appcolnms

In [130]:
# 카테고리 1 와 카테고리 2 이름 같은거
samecat = []
for cat in cat2:
    if cat in cat1:
        samecat.append(cat)
print(samecat)

['이벤트', '마음챙김', '디바이스연동', '갤럭시프로그램', '사이트맵', '바이탈리티 할인', '건강도전', '바이탈리티 나이', '내바이탈리티', '등급리워드', '헬시푸드', '주간 리워드', '주간미션', '건강걷기']


- 페이지방문횟수 -> 카테고리 1 : _x, 카테고리 2 : _y, 
- 체류시간 -> 카테고리 1 : ('sty_tms', '건강걷기')_x, 카테고리 2 : ('sty_tms', '건강걷기')_y
- 종료페이지 -> 카테고리 1. ('value', '가입/로그인/비밀번호')

In [197]:
appcolnms = []
appcolnms = appcol_names_pg(appcolnms)
appcolnms = appcol_names_sty(appcolnms)
appcolnms = appcol_names_end(appcolnms)
appcolnms = np.asarray(appcolnms)

In [198]:
masks = np.ones(len(appcolnms), dtype=np.bool)

In [214]:
pcd_ind = np.where(dec.columns == 'page_cd')[0][0]
sty_ind = find_sty_ind(dec.columns)
end_ind = find_end_ind(dec.columns)
gen_ind = np.where(dec.columns == 'gender_cd')[0][0]

In [215]:
#페이지 방문횟수 관련 masking
pgcols = dec.columns[pcd_ind+1:sty_ind]
stycols = dec.columns[sty_ind:end_ind]
endcols = dec.columns[end_ind:gen_ind]

In [216]:
inds = np.where(np.isin(appcolnms, pgcols))
masks[inds] = False
inds = np.where(np.isin(appcolnms, stycols))
masks[inds] = False
inds = np.where(np.isin(appcolnms, endcols))
masks[inds] = False

In [217]:
cols_notexist = appcolnms[masks]
cols_notexist

array(['갤럭시프로그램_x', '갤럭시프로그램_y', '팝업', '디지털다이렉트', '보험-상단배너', '보험-헤더메시지',
       "('sty_tms', '갤럭시프로그램')_x", "('sty_tms_y', '갤럭시프로그램')",
       "('sty_tms_y', '팝업')", "('sty_tms_y', '디지털다이렉트')",
       "('sty_tms_y', '보험-상단배너')", "('sty_tms_y', '보험-헤더메시지')",
       "('value', '갤럭시프로그램')"], dtype='<U28')

In [219]:
dec[cols_notexist] = np.zeros(shape=(len(dec), len(cols_notexist)), dtype=np.float32)

In [220]:
deccol_woapp = dec.columns[~np.isin(dec.columns, appcolnms)]
dec = dec[np.concatenate([deccol_woapp, appcolnms])]

In [213]:
jan

Unnamed: 0.1,Unnamed: 0,party_id,sesn_id,dt,diff_dt,page_cd,gender_cd,age,vtlt_age,cur_mbrsh_rwrd_st_cd,...,"('value', '건강도전')","('value', '건강')","('value', '바이탈리티 나이')","('value', '가입/로그인/비밀번호')","('value', '내바이탈리티')","('value', '등급리워드')","('value', '헬시푸드')","('value', '주간 리워드')","('value', '주간미션')","('value', '건강걷기')"
0,0,861497,7888CA43-60E4-4C39-8620-ACD561D08BEC,2020-01-14,0 days,4,1,32,31,3,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,861497,74B0683B-2022-4C67-A73A-0AE7868C5FC3,2020-01-16,2 days,6,1,32,31,3,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,861497,A4BCBC70-1AB8-4D64-AFC7-BD7FF6B75D5C,2020-01-16,0 days,4,1,32,31,3,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,861497,E3CE03CB-106E-47E1-AD28-FD6CD312A1DE,2020-01-16,0 days,12,1,32,31,3,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4,861497,1B238934-D379-40E2-856A-7B8F35304DB3,2020-01-20,4 days,5,1,32,31,3,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2350971,2350971,1200055780,daee0ae9-210c-4c79-ae12-b65b9cf45247,2020-01-31,0 days,22,0,43,47,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2350972,2350972,1200055801,262d966f-f863-4755-8057-3a821262206f,2020-01-31,0 days,24,0,35,38,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2350973,2350973,1200055802,6fc09324-fbe9-467c-90f3-afedb051d07a,2020-01-31,0 days,2,1,51,0,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2350974,2350974,1200055803,e30b888c-0498-4b82-a183-a3302e9a4f88,2020-01-31,0 days,25,1,45,55,1,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [221]:
dec

Unnamed: 0.1,Unnamed: 0,party_id,sesn_id,dt,diff_dt,page_cd,gender_cd,age,vtlt_age,cur_mbrsh_rwrd_st_cd,...,"('value', '건강도전')","('value', '건강')","('value', '바이탈리티 나이')","('value', '가입/로그인/비밀번호')","('value', '내바이탈리티')","('value', '등급리워드')","('value', '헬시푸드')","('value', '주간 리워드')","('value', '주간미션')","('value', '건강걷기')"
0,0,861497,A78A5053-C5FD-49B4-843A-D49F089E1CE6,2020-12-01,0 days,36,1,32,31,1,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,861497,5670F599-FE2C-4C46-8D48-301AC1702A74,2020-12-11,10 days,4,1,32,31,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,861497,90471820-22CD-41B4-A16E-ADE7193F28DE,2020-12-11,0 days,6,1,32,31,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,861497,45EAEDCB-D975-4A74-919B-68AF85206FB5,2020-12-18,7 days,1,1,32,31,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4,861497,2C760084-85C5-4D3E-99D3-0196D8712863,2020-12-21,3 days,2,1,32,31,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
909532,909532,1200313654,fa539f99-05cb-4dcc-9ed5-b0de16051065,2020-12-31,0 days,51,1,65,72,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
909533,909533,1200313655,EE6A7878-7EC5-420D-BE67-55F555AA6C26,2020-12-31,0 days,37,1,48,51,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
909534,909534,1200313656,fbae8e73-1353-4374-845c-274766614bf7,2020-12-31,0 days,33,1,37,45,1,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
909535,909535,1200313676,2eac0c68-6598-4f8d-b9b8-4a1782b1abc4,2020-12-31,0 days,1,0,65,0,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
#페이지 방문횟수 관련 masking
applog_cols = 

- raw data
- min-max scaler
- standard scaler

다 해보기

In [81]:
len(dec['sesn_id'])

909537

In [82]:
dec

Unnamed: 0.1,Unnamed: 0,party_id,sesn_id,dt,diff_dt,page_cd,가입/로그인/비밀번호,건강,건강걷기_x,건강도전_x,...,age,vtlt_age,cur_mbrsh_rwrd_st_cd,cur_mbrsh_pd_acqr_pt,push_alarm_yn,count_vtlt_age_dt,diff_age,achv_rat,active_dur,points_value
0,0,861497,A78A5053-C5FD-49B4-843A-D49F089E1CE6,2020-12-01,0 days,36,0.0,0.0,0.0,6.0,...,32,31,1,3650,1,1.0,-1,0.333333,858 days,0.0
1,1,861497,5670F599-FE2C-4C46-8D48-301AC1702A74,2020-12-11,10 days,4,0.0,0.0,0.0,0.0,...,32,31,1,3700,1,1.0,-1,0.328000,868 days,0.0
2,2,861497,90471820-22CD-41B4-A16E-ADE7193F28DE,2020-12-11,0 days,6,0.0,0.0,0.0,0.0,...,32,31,1,3700,1,1.0,-1,0.328000,868 days,0.0
3,3,861497,45EAEDCB-D975-4A74-919B-68AF85206FB5,2020-12-18,7 days,1,0.0,0.0,0.0,0.0,...,32,31,1,3750,1,1.0,-1,0.325397,875 days,0.0
4,4,861497,2C760084-85C5-4D3E-99D3-0196D8712863,2020-12-21,3 days,2,0.0,0.0,0.0,0.0,...,32,31,1,3750,1,1.0,-1,0.325397,878 days,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
909532,909532,1200313654,fa539f99-05cb-4dcc-9ed5-b0de16051065,2020-12-31,0 days,51,1.0,0.0,0.0,0.0,...,65,72,1,500,1,1.0,7,0.000000,0 days,0.0
909533,909533,1200313655,EE6A7878-7EC5-420D-BE67-55F555AA6C26,2020-12-31,0 days,37,1.0,0.0,0.0,0.0,...,48,51,1,500,1,1.0,3,0.000000,0 days,0.0
909534,909534,1200313656,fbae8e73-1353-4374-845c-274766614bf7,2020-12-31,0 days,33,1.0,0.0,0.0,0.0,...,37,45,1,500,1,1.0,8,0.000000,0 days,0.0
909535,909535,1200313676,2eac0c68-6598-4f8d-b9b8-4a1782b1abc4,2020-12-31,0 days,1,0.0,0.0,0.0,0.0,...,65,0,1,0,1,0.0,-65,0.000000,0 days,0.0


In [84]:
dec[['party_id','sesn_id']].groupby(['party_id']).count().mean()

sesn_id    11.248432
dtype: float64

In [86]:
jan[['party_id','sesn_id']].groupby(['party_id']).count().mean()

sesn_id    14.529867
dtype: float64

In [89]:
dec[['party_id','dt','sesn_id']].groupby(['party_id','dt']).count().mean()

sesn_id    1.843015
dtype: float64

In [90]:
jan[['party_id','dt','sesn_id']].groupby(['party_id','dt']).count().mean()

sesn_id    1.971976
dtype: float64

- 기간기준
    - 2달
    
- session?
    - 20개 ~ 30개
    
- train/test
    - chornological cross-valiation
    - 1/2, 2/3, 3/4, 4/5, 5/6 ||,  7/8 
    - 1/2, 2/3, 3/4, 4/5, 5/6, 6/7 || 8/9
    - 1/2, 2/3, 3/4, 4/5, 5/6, 6/7, 7/8 || 8/9
    - 1/2, 2/3, 3/4, 4/5, 5/6, 6/7, 7/8,

In [95]:
lbl = read_csv(cfg.label)

In [96]:
lbl

Unnamed: 0.1,Unnamed: 0,PartyId,month,class,리워드만료일,리워드유효여부
0,0,861497,2020-01,0,2019-01-11,N
1,1,861500,2020-01,0,2019-01-12,N
2,2,863497,2020-01,0,2019-01-11,N
3,3,870485,2020-01,0,2019-01-12,N
4,4,870486,2020-01,0,2019-01-13,N
...,...,...,...,...,...,...
1593487,1593597,1200298751,2021-02,0,2021-03-06,Y
1593488,1593598,200429863,2021-02,0,2019-06-28,N
1593489,1593599,1200252274,2021-02,0,2021-03-31,Y
1593490,1593600,201076298,2021-02,0,2019-11-15,N


In [104]:
for col in dec.columns:
    if col in lbl.columns:
        print(col)

Unnamed: 0


In [101]:
jan

Unnamed: 0.1,Unnamed: 0,party_id,sesn_id,dt,diff_dt,page_cd,가입/로그인/비밀번호,건강,건강걷기_x,건강도전_x,...,age,vtlt_age,cur_mbrsh_rwrd_st_cd,cur_mbrsh_pd_acqr_pt,push_alarm_yn,count_vtlt_age_dt,diff_age,achv_rat,active_dur,points_value
0,0,861497,7888CA43-60E4-4C39-8620-ACD561D08BEC,2020-01-14,0 days,4,0.0,0.0,0.0,0.0,...,32,31,3,4800,1,1.0,-1,0.384615,536 days,100.0
1,1,861497,74B0683B-2022-4C67-A73A-0AE7868C5FC3,2020-01-16,2 days,6,0.0,0.0,0.0,0.0,...,32,31,3,4800,1,1.0,-1,0.384615,538 days,150.0
2,2,861497,A4BCBC70-1AB8-4D64-AFC7-BD7FF6B75D5C,2020-01-16,0 days,4,0.0,0.0,0.0,1.0,...,32,31,3,4800,1,1.0,-1,0.384615,538 days,150.0
3,3,861497,E3CE03CB-106E-47E1-AD28-FD6CD312A1DE,2020-01-16,0 days,12,0.0,0.0,0.0,1.0,...,32,31,3,4800,1,1.0,-1,0.384615,538 days,150.0
4,4,861497,1B238934-D379-40E2-856A-7B8F35304DB3,2020-01-20,4 days,5,0.0,0.0,0.0,0.0,...,32,31,3,4800,1,1.0,-1,0.384615,542 days,150.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2350971,2350971,1200055780,daee0ae9-210c-4c79-ae12-b65b9cf45247,2020-01-31,0 days,22,1.0,1.0,0.0,2.0,...,43,47,1,500,1,1.0,4,0.000000,0 days,0.0
2350972,2350972,1200055801,262d966f-f863-4755-8057-3a821262206f,2020-01-31,0 days,24,1.0,1.0,0.0,3.0,...,35,38,1,500,1,1.0,3,0.000000,0 days,0.0
2350973,2350973,1200055802,6fc09324-fbe9-467c-90f3-afedb051d07a,2020-01-31,0 days,2,1.0,0.0,0.0,0.0,...,51,0,1,0,0,0.0,-51,0.000000,0 days,0.0
2350974,2350974,1200055803,e30b888c-0498-4b82-a183-a3302e9a4f88,2020-01-31,0 days,25,2.0,0.0,0.0,0.0,...,45,55,1,500,1,1.0,10,0.000000,0 days,0.0


In [None]:
date

In [107]:
for date in dateranges:
    print(date)
    lblmth = prep_lbl_per_mth(lbl, date)
    display(lblmth)

2020-01-31 00:00:00


Unnamed: 0,class,리워드만료일,리워드유효여부,party_id
0,0,2019-01-11,N,861497
1,0,2019-01-12,N,861500
2,0,2019-01-11,N,863497
3,0,2019-01-12,N,870485
4,0,2019-01-13,N,870486
...,...,...,...,...
161821,1,2020-07-17,Y,1200055780
161822,1,2020-07-17,Y,1200055801
161823,1,2020-07-17,Y,1200055802
161824,1,2020-07-17,Y,1200055803


2020-02-29 00:00:00


Unnamed: 0,class,리워드만료일,리워드유효여부,party_id
161826,0,2019-01-11,N,861497
161827,0,2019-01-12,N,861500
161828,0,2019-01-11,N,863497
161829,1,2019-01-11,N,863498
161830,0,2019-01-12,N,870485
...,...,...,...,...
317836,0,2020-08-15,Y,1200089978
317837,0,2020-08-15,Y,1200089979
317838,0,2020-08-15,Y,1200090001
317839,0,2020-08-15,Y,1200090002


2020-03-31 00:00:00


Unnamed: 0,class,리워드만료일,리워드유효여부,party_id
317841,0,2019-01-11,N,861497
317842,0,2019-01-12,N,861500
317843,0,2019-01-11,N,863496
317844,0,2019-01-11,N,863497
317845,0,2019-01-12,N,870485
...,...,...,...,...
474631,0,2020-09-15,Y,1200123881
474632,1,2020-09-15,Y,1200123901
474633,0,2020-09-15,Y,1200123902
474634,1,2020-09-15,Y,1200123903


2020-04-30 00:00:00


Unnamed: 0,class,리워드만료일,리워드유효여부,party_id
474636,0,2019-01-11,N,861497
474637,0,2019-01-12,N,861500
474638,0,2019-01-11,N,863496
474639,0,2019-01-11,N,863497
474640,0,2019-01-12,N,870485
...,...,...,...,...
617825,0,2020-10-15,Y,1200150227
617826,0,2020-10-15,Y,1200150228
617827,0,2020-10-15,Y,1200150229
617828,0,2020-10-15,Y,1200150230


2020-05-31 00:00:00


Unnamed: 0,class,리워드만료일,리워드유효여부,party_id
617830,0,2019-01-11,N,861497
617831,0,2019-01-12,N,861500
617832,0,2019-01-11,N,863496
617833,0,2019-01-11,N,863497
617834,0,2019-01-12,N,870485
...,...,...,...,...
749740,1,2020-11-15,Y,1200170708
749741,1,2020-11-15,Y,1200170726
749742,0,2020-11-15,Y,1200170727
749743,0,2020-11-15,Y,1200170728


2020-06-30 00:00:00


Unnamed: 0,class,리워드만료일,리워드유효여부,party_id
749745,1,2019-01-11,N,861497
749746,0,2019-01-12,N,861500
749747,1,2019-01-11,N,863496
749748,0,2019-01-11,N,863497
749749,0,2019-01-12,N,870485
...,...,...,...,...
873481,1,2020-12-15,Y,1200195676
873482,1,2020-12-15,Y,1200195677
873483,1,2020-12-15,Y,1200195678
873484,1,2020-12-15,Y,1200195701


2020-07-31 00:00:00


Unnamed: 0,class,리워드만료일,리워드유효여부,party_id
873486,0,2019-01-12,N,861500
873487,0,2019-01-11,N,863497
873488,0,2019-01-12,N,870485
873489,0,2019-01-13,N,870486
873490,0,2019-01-18,N,870494
...,...,...,...,...
984821,1,2021-01-15,Y,1200216027
984822,0,2021-01-15,Y,1200216028
984823,1,2021-01-15,Y,1200216029
984824,1,2021-01-15,Y,1200216051


2020-08-31 00:00:00


Unnamed: 0,class,리워드만료일,리워드유효여부,party_id
984826,0,2019-01-11,N,861497
984827,0,2019-01-12,N,861500
984828,1,2019-01-11,N,863496
984829,0,2019-01-11,N,863497
984830,0,2019-01-12,N,870485
...,...,...,...,...
1072318,0,2021-02-15,Y,1200228557
1072319,1,2021-02-15,Y,1200228558
1072320,0,2021-02-15,Y,1200228559
1072321,0,2021-02-15,Y,1200228560


2020-09-30 00:00:00


Unnamed: 0,class,리워드만료일,리워드유효여부,party_id
1072323,0,2019-01-11,N,861497
1072324,0,2019-01-12,N,861500
1072325,0,2019-01-11,N,863497
1072326,0,2019-01-12,N,870485
1072327,0,2019-01-13,N,870486
...,...,...,...,...
1152494,0,2021-03-17,Y,1200245843
1152495,0,2021-03-17,Y,1200245844
1152496,0,2021-03-17,Y,1200245845
1152497,0,2021-03-17,Y,1200245851


2020-10-31 00:00:00


Unnamed: 0,class,리워드만료일,리워드유효여부,party_id
1152499,0,2019-01-11,N,861497
1152500,0,2019-01-12,N,861500
1152501,0,2019-01-11,N,863497
1152502,0,2019-01-12,N,870485
1152503,0,2019-01-13,N,870486
...,...,...,...,...
1247266,0,2021-04-17,Y,1200266683
1247267,0,2021-04-17,Y,1200266685
1247268,0,2021-04-17,Y,1200266701
1247269,1,2021-04-17,Y,1200266702


2020-11-30 00:00:00


Unnamed: 0,class,리워드만료일,리워드유효여부,party_id
1247271,1,2019-01-18,N,968482
1247272,1,2019-01-25,N,1252992
1247273,1,2019-02-07,N,1896983
1247274,0,2019-02-08,N,2188497
1247275,1,2019-02-07,N,2354485
...,...,...,...,...
1363447,0,2021-02-24,Y,1200233762
1363448,0,2019-11-01,N,201041682
1363449,0,2021-02-21,Y,1200288761
1363450,0,2019-05-13,N,200223213


2020-12-31 00:00:00


Unnamed: 0,class,리워드만료일,리워드유효여부,party_id
1363452,1,2021-02-21,Y,1200288761
1363453,0,2020-08-22,N,1200100018
1363454,1,2019-05-05,N,200154581
1363455,1,2019-08-06,N,200701128
1363456,1,2021-02-24,Y,1200233828
...,...,...,...,...
1444466,0,2020-08-02,N,1200073589
1444467,0,2019-04-29,N,200084423
1444468,0,2020-02-26,N,201337745
1444469,0,2021-01-31,Y,1200273985


2021-01-31 00:00:00


Unnamed: 0,class,리워드만료일,리워드유효여부,party_id
1444471,1,2019-07-19,N,200516310
1444472,1,2019-12-27,N,201167103
1444473,1,2019-09-03,N,200807031
1444474,1,2019-07-22,N,200529637
1444475,1,2020-04-19,N,201415544
...,...,...,...,...
1525777,0,2020-12-03,N,1200187098
1525778,0,2019-07-18,N,200514583
1525779,0,2021-04-25,Y,1200338421
1525780,0,2019-05-23,N,200284759


2021-02-28 00:00:00


Unnamed: 0,class,리워드만료일,리워드유효여부,party_id
1525782,0,2020-06-24,N,1200035760
1525783,1,2019-10-30,N,201034762
1525784,0,2021-04-08,Y,1200259102
1525785,1,2021-03-13,Y,1200304489
1525786,1,2019-04-07,N,257371388
...,...,...,...,...
1593487,0,2021-03-06,Y,1200298751
1593488,0,2019-06-28,N,200429863
1593489,0,2021-03-31,Y,1200252274
1593490,0,2019-11-15,N,201076298


In [108]:
lblmth = prep_lbl_per_mth(lbl, pd.to_datetime(202012, format='%Y%m'))

In [109]:
lblmth

Unnamed: 0,class,리워드만료일,리워드유효여부,party_id
1363452,1,2021-02-21,Y,1200288761
1363453,0,2020-08-22,N,1200100018
1363454,1,2019-05-05,N,200154581
1363455,1,2019-08-06,N,200701128
1363456,1,2021-02-24,Y,1200233828
...,...,...,...,...
1444466,0,2020-08-02,N,1200073589
1444467,0,2019-04-29,N,200084423
1444468,0,2020-02-26,N,201337745
1444469,0,2021-01-31,Y,1200273985


In [112]:
aa = pd.merge(dec, lblmth, on=['party_id'], how='inner')

In [113]:
aa

Unnamed: 0.1,Unnamed: 0,party_id,sesn_id,dt,diff_dt,page_cd,가입/로그인/비밀번호,건강,건강걷기_x,건강도전_x,...,cur_mbrsh_pd_acqr_pt,push_alarm_yn,count_vtlt_age_dt,diff_age,achv_rat,active_dur,points_value,class,리워드만료일,리워드유효여부
0,0,861497,A78A5053-C5FD-49B4-843A-D49F089E1CE6,2020-12-01,0 days,36,0.0,0.0,0.0,6.0,...,3650,1,1.0,-1,0.333333,858 days,0.0,0,2019-01-11,N
1,1,861497,5670F599-FE2C-4C46-8D48-301AC1702A74,2020-12-11,10 days,4,0.0,0.0,0.0,0.0,...,3700,1,1.0,-1,0.328000,868 days,0.0,0,2019-01-11,N
2,2,861497,90471820-22CD-41B4-A16E-ADE7193F28DE,2020-12-11,0 days,6,0.0,0.0,0.0,0.0,...,3700,1,1.0,-1,0.328000,868 days,0.0,0,2019-01-11,N
3,3,861497,45EAEDCB-D975-4A74-919B-68AF85206FB5,2020-12-18,7 days,1,0.0,0.0,0.0,0.0,...,3750,1,1.0,-1,0.325397,875 days,0.0,0,2019-01-11,N
4,4,861497,2C760084-85C5-4D3E-99D3-0196D8712863,2020-12-21,3 days,2,0.0,0.0,0.0,0.0,...,3750,1,1.0,-1,0.325397,878 days,0.0,0,2019-01-11,N
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
909532,909532,1200313654,fa539f99-05cb-4dcc-9ed5-b0de16051065,2020-12-31,0 days,51,1.0,0.0,0.0,0.0,...,500,1,1.0,7,0.000000,0 days,0.0,0,2021-03-25,Y
909533,909533,1200313655,EE6A7878-7EC5-420D-BE67-55F555AA6C26,2020-12-31,0 days,37,1.0,0.0,0.0,0.0,...,500,1,1.0,3,0.000000,0 days,0.0,0,2021-03-25,Y
909534,909534,1200313656,fbae8e73-1353-4374-845c-274766614bf7,2020-12-31,0 days,33,1.0,0.0,0.0,0.0,...,500,1,1.0,8,0.000000,0 days,0.0,0,2021-03-25,Y
909535,909535,1200313676,2eac0c68-6598-4f8d-b9b8-4a1782b1abc4,2020-12-31,0 days,1,0.0,0.0,0.0,0.0,...,0,1,0.0,-65,0.000000,0 days,0.0,0,2021-03-25,Y


In [116]:
aa.loc[aa[['class','리워드만료일','리워드유효여부']].isnull().any(axis=1)]

Unnamed: 0.1,Unnamed: 0,party_id,sesn_id,dt,diff_dt,page_cd,가입/로그인/비밀번호,건강,건강걷기_x,건강도전_x,...,cur_mbrsh_pd_acqr_pt,push_alarm_yn,count_vtlt_age_dt,diff_age,achv_rat,active_dur,points_value,class,리워드만료일,리워드유효여부
