In [2]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import gc

import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from matplotlib import rcParams 
from plotly.subplots import make_subplots
from datetime import timedelta

# 한글폰트 
from matplotlib import font_manager,rc
font_name = font_manager.FontProperties(fname="c:/Windows/Fonts/malgun.ttf").get_name()
rc('font', family=font_name)
#matplotlib.rc('font', family="NanumBarunGothic")

from eda_lib.dataset import *

print(pd.__version__) 

1.1.5


# 0. Load (segment & members & pagecode_info) data

In [2]:
# Load segment data
seg_df = pd.read_csv('./data/partyid_monthly_class.csv', index_col=0)
seg_df['month'] = pd.to_datetime(seg_df['month'], format='%Y-%m')
seg_df['month'] = seg_df['month'].dt.to_period('M')
pd.set_option('display.max_rows', 50)
seg_df

Unnamed: 0,PartyId,month,class
0,861497,2020-01,0
1,861500,2020-01,0
2,863497,2020-01,0
3,870485,2020-01,0
4,870486,2020-01,0
...,...,...,...
1537301,1200362290,2021-02,0
1537302,1200362291,2021-02,0
1537303,1200362301,2021-02,0
1537304,1200362302,2021-02,1


In [3]:
# Load members data
members_df = load_member('202103')
members_df['멤버십통합'] = ['N'] * len(members_df)
# 'fcip_yn', 'ip_insr_cd' = 'Y' 제외
members_df['멤버십통합'][(members_df['유료멤버십보험가입여부']=='Y') | (members_df['11/4이전유료멤버십']=='Y')] = 'Y' # (members_df['멤버십비납부여부']=='Y') | 
members_df

Orig. data len: 53158077
Aft. drop-nan: 53158077 





A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0_level_0,가입일자,멤버십비납부여부,유료멤버십보험가입여부,11/4이전유료멤버십,마지막로그인일자,멤버십통합
PartyId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
892490,2018-08-03,N,N,N,2020-12-19,N
909992,2018-08-03,Y,N,N,2021-03-30,N
1072989,2018-08-06,N,N,N,2021-03-31,N
1252497,2018-08-10,N,N,N,2019-05-05,N
1263494,2018-08-10,N,N,N,2019-03-05,N
...,...,...,...,...,...,...
1200388851,2021-03-30,N,Y,Y,2021-03-31,Y
1200388971,2021-03-31,N,N,N,2021-03-31,N
1200389026,2021-03-31,N,N,N,2021-03-31,N
1200389035,2021-03-31,N,Y,Y,2021-03-31,Y


In [4]:
# Load pagecode info
pagecode_df = pd.read_csv('./data/pagecode_info_210421.csv', index_col=0)
pagecode_df = pagecode_df.reset_index()
pagecode_df = pagecode_df.drop(columns=['No'])
pagecode_df.columns = ['카테고리1', '카테고리2', '페이지이름', '페이지코드']

code2name = {}
for k, v in zip(pagecode_df['페이지코드'].values, pagecode_df['페이지이름'].values):
    if pd.isnull(v):
        code2name[k] = k
    else:
        code2name[k] = v
        
pagecode_df.describe(include=['O'])

Unnamed: 0,카테고리1,카테고리2,페이지이름,페이지코드
count,2029,1984,2239,2242
unique,23,59,1291,2242
top,꿀팁,꿀팁-컨텐츠,0,/setup/faq/detail/615
freq,381,283,415,1


In [27]:
# Load applog data
def load_applog(data_dir, start, periods): 
    # Load applog data
    monthly_df = []
    filename = pd.period_range(start=start, periods=periods, freq='M').strftime('%Y%m')
    for fn in filename:
        monthly_df.append(pd.read_csv(data_dir + 'applog_' + fn + '.csv'))
    df = pd.concat(monthly_df)
    
    # Basic preprocess 
    # 칼럼명 변경
    df.columns = ['PartyId', '방문일시', '페이지코드', '체류시간', '세션ID', '로그인여부', '신규방문여부', '통신회사코드', '이탈여부', '연월일']
    # null 제거
    print('Orig. data len:', len(df))
    df = df.dropna()
    print('Aft. drop-nan:', len(df), '\n')
    # party_id int형으로 변경
    df['PartyId'] = df['PartyId'].astype('Int64')
    # 방문일시 datetime으로 변환
    vst_dtm = df['방문일시'].astype('str')
    f = lambda x: x[:-3]
    vst_dtm = vst_dtm.apply(f)
    vst_dtm = pd.to_datetime(vst_dtm, format='%Y%m%d%H%M%S')
    df['방문일시'] = vst_dtm
    # 연월일 칼럼 제거
    df = df.drop(columns='연월일')
    # 신규방문여부 Y->1, N->0
    df['신규방문여부'] = df['신규방문여부'].replace({'Y': 1, 'N': 0})
    # 1970년 데이터 제외
    df = df[df['방문일시'].dt.year != 1970]
    df = df.reset_index(drop=True)
    # 불필요한 칼럼 제거
    df = df.drop(columns=['로그인여부', '신규방문여부', '통신회사코드', '이탈여부'])
    # 세션ID '#' 제거
    df = df[df['세션ID']!='#']
    
    df_len = len(df)
    return df, df_len

# Filters 
# 1. 최종 보험가입 partyid 제외 
# 2. 리워드유효여부(칼럼생성) 
# 3. 
def filters(df, df_len, options):
    if 1 in options:
        df = df.join(members_df['멤버십통합'], on='PartyId')
        assert len(df) == df_len, 'Error! length change'
        df = df[df['멤버십통합']=='N']
        df_len = len(df)
        df = df.drop(columns=['멤버십통합'])
    if 2 in options:
        df = df.join(members_df['가입일자'], on='PartyId')
        assert len(df) == df_len, 'Error! length change'

        diff_days = (df['방문일시'] - df['가입일자']).dt.floor('d')
        diff_days = diff_days.dt.days

        rewardIn_lst = []
        for diff, join_date in zip(diff_days, df['가입일자']):
            if pd.isnull(join_date):
                rewardIn_lst.append(join_date)
                continue
            if join_date < pd.Timestamp(2020, 11, 4):
                if diff <= 168:
                    rewardIn_lst.append('Y')
                else:
                    rewardIn_lst.append('N')
            else:
                if diff <= 84:
                    rewardIn_lst.append('Y')
                else:
                    rewardIn_lst.append('N')
        df['리워드기간'] = rewardIn_lst
        df = df.drop(columns='가입일자')
    if 3 in options:
        uv_num = 50
        categories = pagecode_df['카테고리1'].dropna().unique()

        pageUV_df = df[['카테고리1', '페이지코드', 'month', 'PartyId']].groupby(['카테고리1', '페이지코드', 'month'], as_index=True)['PartyId'].nunique().to_frame()
        pageUV_df.columns=['순페이지수']
        pageUV_df = pageUV_df.reset_index()
        pageUV_df = pageUV_df.groupby(['카테고리1', '페이지코드'], as_index=True).mean().round(1)
        pageUV_df = pageUV_df.reset_index()

        print('{0:<15} | {1:>3} - {2:>3} | {3:>3}'.format('카테고리', 'ORIG.#','Filter#', 'Remain#'), end='\n\n')

        for c in categories:
            # 한글 , space 간격이 다름 > 정렬
            print(c, end='')
            print(' '*c.count(' '), end='')
            for space in range(10-len(c)):
                print('  ', end='')
            print('| {0:>5} - {1:>5} | {2:>5}'.format(len(pageUV_df[(pageUV_df['카테고리1']==c)]),len(pageUV_df[(pageUV_df['카테고리1']==c)&(pageUV_df['순페이지수']<uv_num)]),len(pageUV_df[(pageUV_df['카테고리1']==c)&(pageUV_df['순페이지수']>=uv_num)])))

        print()
        print('{3:^19} | {0:>5} - {1:>5} | {2:>5}'.format(len(pageUV_df),len(pageUV_df[pageUV_df['순페이지수']<uv_num]),len(pageUV_df[pageUV_df['순페이지수']>=uv_num]), 'SUM'))

        filter_pg = pageUV_df[pageUV_df['순페이지수']<uv_num]['페이지코드'].values
        filter_df = df[~df.페이지코드.isin(filter_pg)]
        print('\nBef.filter df len:', len(df))
        print('Aft.filter df len:', len(filter_df), '\n')
        df = filter_df
    return df, len(df)

# applog df에 class join
def join_applog_class(df, df_len, seg_df):
    df['month'] = df['방문일시'].dt.to_period('M')
    df = pd.merge(left=df, right=seg_df, on=['PartyId','month'], how='left', sort=False)
    assert len(df) == df_len, 'Error! length change'
    df['class'] = df['class'].astype('Int64')
    return df

def join_applog_category(df, df_len, pagecode_df):
    df = pd.merge(left=df, right=pagecode_df[['페이지코드', '카테고리1', '카테고리2']], on=['페이지코드'], how='left', sort=False)
    assert len(df) == df_len, 'Error! length change'
    return df

# 1. Generate depth-1 applog data (2020/01 - 2021/03) 

In [29]:
data_dir = './data/applog/'
start, periods = '2020/01', 14
st_months = pd.period_range(start=start, periods=periods, freq='M').strftime('%Y%m')

for m in st_months:
    print('='*30, m, '='*30)
    
    # Load applog data
    df, df_len = load_applog(data_dir, m, 1)
    
    # Filters 
    # (1) 최종 보험가입 partyid 제외 
    # (2) 리워드유효여부(칼럼생성) 
    df, df_len = filters(df, df_len, [1,2]) 
    
    # applog df에 'class' join
    df = join_applog_class(df, df_len, seg_df)
    gc.collect()
    
    # 1. 세션별 페이지수 (세션 길이)
    # class
    users_df = df[['PartyId', 'month', 'class']].groupby(['PartyId','month']).first()
    users_len = len(users_df)
    # 전체 페이지수
    tmp_df = df[['PartyId', 'month', '방문일시']].groupby(['PartyId','month']).count()
    tmp_df.columns = ['전체페이지수']
    users_df = pd.merge(users_df, tmp_df, left_index=True, right_index=True)
    assert len(users_df) == users_len, 'Error! length change'
    # 전체 세션수
    tmp_df = df[['PartyId', 'month','세션ID']].groupby(['PartyId','month'])['세션ID'].nunique().to_frame()
    tmp_df.columns = ['전체세션수']
    users_df = pd.merge(users_df, tmp_df, left_index=True, right_index=True)
    assert len(users_df) == users_len, 'Error! length change'
    # 세션별 페이지수
    users_df['세션당페이지수'] = np.round(users_df['전체페이지수']/users_df['전체세션수'],1)
    users_df = users_df.reset_index()
    users_df = users_df.drop(columns=['전체페이지수', '전체세션수'])
    
    # applog df에 '카테고리' join
    df = join_applog_category(df, df_len, pagecode_df)
    
    # 2. 카테고리별 uv (방문여부)
    category_df = df[['카테고리1', 'month', 'PartyId']].groupby(['PartyId', 'month','카테고리1'], as_index=True)['카테고리1'].nunique().to_frame()
    category_df.columns=['방문여부']
    # 카테고리1 rows -> columns
    category_df = category_df.pivot_table('방문여부', ['PartyId', 'month'], '카테고리1')
    category_df = category_df.fillna(0)
    cols = category_df.columns.values
    category_df[cols] = category_df[cols].astype('Int64')
    # column prefix 'UV'
    category_df.columns = ['UV_'+name for name in cols]
    # users_df에 join
    category_df = category_df.reset_index()
    users_df = pd.merge(left=users_df, right=category_df, on=['PartyId', 'month'], how='left', sort=False)
    assert len(users_df) == users_len, 'Error! length change'
    
    # 3. 카테고리별 종료율
    # df에 '종료여부' 칼럼 생성
    close = []
    sess_id = df['세션ID'].values
    for i, now in enumerate(sess_id[:-1]):
        if now == sess_id[i+1]:
            close.append(0)
        else:
            close.append(1)
    close.append(1)
    df['종료여부'] = close
    # 종료 수
    tmp_df = df[['PartyId', '카테고리1', '종료여부']].groupby(['PartyId', '카테고리1'], as_index=True).sum()
    tmp_df.columns = ['전체종료수']
    df = df.drop(columns = ['종료여부'])
    gc.collect()
    # 카테고리1 rows -> columns
    tmp_df = tmp_df.reset_index()
    tmp_df = tmp_df.pivot(index='PartyId', columns='카테고리1', values='전체종료수')
    tmp_df = tmp_df.fillna(0)
    # 모든 칼럼 / sum
    sum_df = tmp_df.sum(axis=1)
    tmp_df = tmp_df.apply(lambda x:x/sum_df*100)
    close_df = tmp_df.round(2)
    # column prefix 'CLOSE'
    cols = close_df.columns.values
    close_df.columns = ['CLOSE_'+name for name in cols]
    # users_df에 join
    close_df = close_df.reset_index()
    users_df = pd.merge(left=users_df, right=close_df, on=['PartyId'], how='left', sort=False)
    assert len(users_df) == users_len, 'Error! length change'
    
    # Filter (3) 페이지 UV > 50
    filter_df, filter_len = filters(df, df_len, [3]) 
    
    # 4. 카테고리별 체류시간
    # 평균체류시간
    time_df = filter_df[['PartyId', '카테고리1', '체류시간']].groupby(['PartyId','카테고리1'], as_index=True).mean()
    time_df.columns = ['평균체류시간']
    time_df = time_df.reset_index()
    # 카테고리1 rows -> columns
    time_df = time_df.pivot(index='PartyId', columns='카테고리1', values='평균체류시간')
    time_df = time_df.fillna(0)
    time_df = time_df.round(2)
    # column prefix 'Time'
    cols = time_df.columns.values
    time_df.columns = ['TIME_'+name for name in cols]
    # users_df에 join
    time_df = time_df.reset_index()
    users_df = pd.merge(left=users_df, right=time_df, on=['PartyId'], how='left', sort=False)
    assert len(users_df) == users_len, 'Error! length change'
    print()
    
    users_df = users_df.reset_index(drop=True)
    users_df.to_csv('applog_df_depth1_'+m+'.csv')
    gc.collect()

Orig. data len: 15888536
Aft. drop-nan: 13715360 

카테고리            | ORIG.# - Filter# | Remain#

바이탈리티          |     9 -     1 |     8
이벤트              |    12 -     1 |    11
마음챙김            |    19 -    18 |     1
활동                |     2 -     0 |     2
디바이스연동        |     1 -     0 |     1
공통                |    31 -     5 |    26
보험                |     4 -     0 |     4
설정                |    56 -     6 |    50
갤럭시프로그램      |     0 -     0 |     0
사이트맵            |     0 -     0 |     0
꿀팁                |    63 -     0 |    63
바이탈리티 할인     |     0 -     0 |     0
건강도전            |     1 -     0 |     1
건강                |   184 -    22 |   162
바이탈리티 나이     |    69 -     0 |    69
가입/로그인/비밀번호|    20 -     6 |    14
내바이탈리티        |     4 -     0 |     4
등급리워드          |    16 -     2 |    14
헬시푸드            |    35 -     9 |    26
위젯                |     0 -     0 |     0
주간 리워드         |    20 -     1 |    19
주간미션            |     6 -     0 |     6
건강걷기            |     4 -    

In [5]:
start, periods = '2020/01', 14
st_months = pd.period_range(start=start, periods=periods, freq='M').strftime('%Y%m')

save_df_lst = []
for m in st_months:
    tmp_df = pd.read_csv('applog_df_depth1_'+m+'.csv', index_col=0)
    save_df_lst.append(tmp_df)

save_df = pd.concat(save_df_lst)
save_df = save_df.reset_index(drop=True)
save_df.to_csv('applog_df_depth1_202001_202102.csv')

# 2. Generate depth-2 applog data (2020/01 - 2021/03) 

In [31]:
data_dir = './data/applog/'
start, periods = '2020/01', 14 
st_months = pd.period_range(start=start, periods=periods, freq='M').strftime('%Y%m')

for m in st_months:
    print('='*30, m, '='*30)
    
    # Load applog data
    df, df_len = load_applog(data_dir, m, 1)
    
    # Filters 
    # (1) 최종 보험가입 partyid 제외 
    # (2) 리워드유효여부(칼럼생성) 
    df, df_len = filters(df, df_len, [1,2]) 
    
    # applog df에 'class' join
    df = join_applog_class(df, df_len, seg_df)
    gc.collect()
    
    # 1. 세션별 페이지수 (세션 길이)
    # class
    users_df = df[['PartyId', 'month', 'class']].groupby(['PartyId','month']).first()
    users_len = len(users_df)
    # 전체 페이지수
    tmp_df = df[['PartyId', 'month', '방문일시']].groupby(['PartyId','month']).count()
    tmp_df.columns = ['전체페이지수']
    users_df = pd.merge(users_df, tmp_df, left_index=True, right_index=True)
    assert len(users_df) == users_len, 'Error! length change'
    # 전체 세션수
    tmp_df = df[['PartyId', 'month','세션ID']].groupby(['PartyId','month'])['세션ID'].nunique().to_frame()
    tmp_df.columns = ['전체세션수']
    users_df = pd.merge(users_df, tmp_df, left_index=True, right_index=True)
    assert len(users_df) == users_len, 'Error! length change'
    # 세션별 페이지수
    users_df['세션당페이지수'] = np.round(users_df['전체페이지수']/users_df['전체세션수'],1)
    users_df = users_df.reset_index()
    users_df = users_df.drop(columns=['전체페이지수', '전체세션수'])
    
    # applog df에 '카테고리' join
    df = join_applog_category(df, df_len, pagecode_df)
    
    # 2. 카테고리별 uv (방문여부)
    category_df = df[['카테고리2', 'month', 'PartyId']].groupby(['PartyId', 'month','카테고리2'], as_index=True)['카테고리2'].nunique().to_frame()
    category_df.columns=['방문여부']
    # 카테고리2 rows -> columns
    category_df = category_df.pivot_table('방문여부', ['PartyId', 'month'], '카테고리2')
    category_df = category_df.fillna(0)
    cols = category_df.columns.values
    category_df[cols] = category_df[cols].astype('Int64')
    # column prefix 'UV'
    category_df.columns = ['UV_'+name for name in cols]
    # users_df에 join
    category_df = category_df.reset_index()
    users_df = pd.merge(left=users_df, right=category_df, on=['PartyId', 'month'], how='left', sort=False)
    assert len(users_df) == users_len, 'Error! length change'
    
    # 3. 카테고리별 종료율
    # df에 '종료여부' 칼럼 생성
    close = []
    sess_id = df['세션ID'].values
    for i, now in enumerate(sess_id[:-1]):
        if now == sess_id[i+1]:
            close.append(0)
        else:
            close.append(1)
    close.append(1)
    df['종료여부'] = close
    # 종료 수
    tmp_df = df[['PartyId', '카테고리2', '종료여부']].groupby(['PartyId', '카테고리2'], as_index=True).sum()
    tmp_df.columns = ['전체종료수']
    df = df.drop(columns = ['종료여부'])
    gc.collect()
    # 카테고리2 rows -> columns
    tmp_df = tmp_df.reset_index()
    tmp_df = tmp_df.pivot(index='PartyId', columns='카테고리2', values='전체종료수')
    tmp_df = tmp_df.fillna(0)
    # 모든 칼럼 / sum
    sum_df = tmp_df.sum(axis=1)
    tmp_df = tmp_df.apply(lambda x:x/sum_df*100)
    close_df = tmp_df.round(2)
    # column prefix 'CLOSE'
    cols = close_df.columns.values
    close_df.columns = ['CLOSE_'+name for name in cols]
    # users_df에 join
    close_df = close_df.reset_index()
    users_df = pd.merge(left=users_df, right=close_df, on=['PartyId'], how='left', sort=False)
    assert len(users_df) == users_len, 'Error! length change'
    
    # Filter (3) 페이지 UV > 50
    filter_df, filter_len = filters(df, df_len, [3]) 
    
    # 4. 카테고리별 체류시간
    # 평균체류시간
    time_df = filter_df[['PartyId', '카테고리2', '체류시간']].groupby(['PartyId','카테고리2'], as_index=True).mean()
    time_df.columns = ['평균체류시간']
    time_df = time_df.reset_index()
    # 카테고리2 rows -> columns
    time_df = time_df.pivot(index='PartyId', columns='카테고리2', values='평균체류시간')
    time_df = time_df.fillna(0)
    time_df = time_df.round(2)
    # column prefix 'Time'
    cols = time_df.columns.values
    time_df.columns = ['TIME_'+name for name in cols]
    # users_df에 join
    time_df = time_df.reset_index()
    users_df = pd.merge(left=users_df, right=time_df, on=['PartyId'], how='left', sort=False)
    assert len(users_df) == users_len, 'Error! length change'
    print()
    
    users_df = users_df.reset_index(drop=True)
    users_df.to_csv('applog_df_depth2_'+m+'.csv')
    gc.collect()

Orig. data len: 15888536
Aft. drop-nan: 13715360 

카테고리            | ORIG.# - Filter# | Remain#

바이탈리티          |     9 -     1 |     8
이벤트              |    12 -     1 |    11
마음챙김            |    19 -    18 |     1
활동                |     2 -     0 |     2
디바이스연동        |     1 -     0 |     1
공통                |    31 -     5 |    26
보험                |     4 -     0 |     4
설정                |    56 -     6 |    50
갤럭시프로그램      |     0 -     0 |     0
사이트맵            |     0 -     0 |     0
꿀팁                |    63 -     0 |    63
바이탈리티 할인     |     0 -     0 |     0
건강도전            |     1 -     0 |     1
건강                |   184 -    22 |   162
바이탈리티 나이     |    69 -     0 |    69
가입/로그인/비밀번호|    20 -     6 |    14
내바이탈리티        |     4 -     0 |     4
등급리워드          |    16 -     2 |    14
헬시푸드            |    35 -     9 |    26
위젯                |     0 -     0 |     0
주간 리워드         |    20 -     1 |    19
주간미션            |     6 -     0 |     6
건강걷기            |     4 -    

In [6]:
start, periods = '2020/01', 14
st_months = pd.period_range(start=start, periods=periods, freq='M').strftime('%Y%m')

save_df_lst = []
for m in st_months:
    tmp_df = pd.read_csv('applog_df_depth2_'+m+'.csv', index_col=0)
    save_df_lst.append(tmp_df)

save_df = pd.concat(save_df_lst)
save_df = save_df.reset_index(drop=True)
save_df.to_csv('applog_df_depth2_202001_202102.csv')

# 별도 #1 Create new monthly segment_df 

In [None]:
save_df_lst = []

def generate_save_df(y, i):
    df, df_len = load_applog(data_dir, y+str(i), 2)
    
    # 최종 보험가입 partyid 제외 
    df = df.join(members_df['멤버십통합'], on='PartyId')
    assert len(df) == df_len, 'Error! length change'
    df = df[df['멤버십통합']=='N']
    df_len = len(df)
    df = df.drop(columns=['멤버십통합'])
    
    month_df = df[['PartyId', '방문일시']]
    month_df['month'] = month_df['방문일시'].dt.to_period('M')
    
    # m월 접속자 partyid
    m_df_lst = []
    m_id_lst = []
    m_names = pd.period_range(start=y+str(i), periods=2, freq='M').strftime('%Y-%m')
    for i, m in enumerate(m_names):
        tmp_df = month_df[month_df['month']==m]
        tmp_df = tmp_df[~tmp_df['PartyId'].duplicated(keep='last')]
        m_df_lst.append(tmp_df)
        m_id_lst.append(m_df_lst[i].PartyId.unique())

    # ~(m+1)월: 1, (m+1)월: 0 
    save_df = m_df_lst[0]
    save_df['class'] = [0] * len(save_df) 
    save_df['class'][(~save_df.PartyId.isin(m_id_lst[1]))] = 1
    save_df = save_df.drop(columns=['방문일시'])
    
    # '가입일자' column 추가
    save_len = len(save_df)
    save_df = save_df.join(members_df['가입일자'], on='PartyId')
    assert len(save_df) == save_len, 'Error! length change'
    
    # '리워드만료일'
    reward_end_lst = []
    for join_date in save_df['가입일자']:
        if pd.isnull(join_date):
            reward_end_lst.append(join_date)
            continue
        if join_date < pd.Timestamp(2020, 11, 4):
            reward_end_lst.append(join_date + timedelta(168))
        else:
            reward_end_lst.append(join_date + timedelta(84))
    save_df['리워드만료일'] = reward_end_lst
    save_df['리워드만료월'] = save_df['리워드만료일'].dt.to_period('M')

    # '리워드유효여부'
    reward_in_lst = []
    curr_m = save_df['month'].iloc[0]
    for reward_m in save_df['리워드만료월']:
        if curr_m < reward_m:
            reward_in_lst.append('Y')
        elif curr_m == reward_m:
            reward_in_lst.append('Y->N')
        else:
            reward_in_lst.append('N')
    save_df['리워드유효여부'] = reward_in_lst
    
    save_df = save_df.drop(columns=['가입일자', '리워드만료월'])
    save_df_lst.append(save_df)
    

# 2020 
for i in range(1, 13): 
    generate_save_df('2020/', i)
# 2021
for i in range(1, 3):
    generate_save_df('2021/', i)

save_df = pd.concat(save_df_lst)
save_df = save_df.reset_index(drop=True)
save_df.to_csv('partyid_monthly_class.csv')