In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import folium 
import dask.dataframe as dd
from dask.diagnostics import ProgressBar
from glob import glob
import os
pbar = ProgressBar()
pbar.register()

from tqdm import tqdm_notebook as tqdm
import geopandas as gpd
from shapely.geometry import Point, Polygon

#한글깨짐 방지
plt.rc('font', family = 'Malgun Gothic')
plt.rcParams['axes.unicode_minus'] = False

In [None]:
yearly_2016 = pd.read_csv('./data/KT 데이터/2016_wlk_통합(5179).csv')
yearly_2017 = pd.read_csv('./data/KT 데이터/2017_wlk_통합(5179).csv')

### 지역경계

In [None]:
#11개 구역 클러스터링 한 내역
label_line = gpd.read_file('E:/python/data/신한카드데이터/라벨별_명칭_geo값.shp')

In [None]:
def near_park(df):
    '''범위내 축소 
    
    '''
    # 대공원주변 범위
    ymin,ymax = 1949300, 1951100
    xmin,xmax = 962000, 964100
    
    target = df[(df.new_X >= xmin)&(df.new_X <= xmax)]
    target = target[(target.new_y >= ymin)&(target.new_y <= ymax)]
    
    #건대후문쪽 제외
    gun_x, gun_y = 962760, 1949560
    target = target[~((target.new_X <= gun_x)&((target.new_y <= gun_y)))]
    
    return target

In [None]:
n_2016 = near_park(yearly_2016)
n_2017 = near_park(yearly_2017)

### lable 추가

In [None]:
def add_label(df):
    '''
    라벨 추가
    '''
    #라벨딕셔너리 생성
    label_dict = label_line.set_index('names')
    del label_dict['labels']
    label_dict = label_dict.to_dict()['geometry']
    
    # 중복제거하고 확인
    origin_axis = df[['new_X','new_y']].drop_duplicates()
    
    #라벨링
    label_names =[]
    for i in range(0,len(origin_axis)):
        label = np.nan
        for key in label_dict:
            check_point = Point(origin_axis['new_X'].iloc[i],origin_axis['new_y'].iloc[i])
            if label_dict[key].contains(check_point):
                label = key
        label_names.append(label)  
    origin_axis['label'] = label_names
    
    #매칭안되는 부분 제외(범위밖)
    origin_axis = origin_axis[~origin_axis.label.isna()]

    label_df = pd.merge(df, origin_axis, how = 'left', on = ['new_X','new_y'])
    return label_df 

In [None]:
n_2016_l = add_label(n_2016)
n_2017_l = add_label(n_2017)

### codebook

In [None]:
code_book = {}
code_book['under_20'] = "20세이하"
code_book['under_30'] = "21~30세"
code_book['under_40'] = "31~40세"
code_book['under_50'] = "41~50세"
code_book['under_60'] = "51~60세"
code_book['upper_60'] = "61세이상"
code_book['t1'] = "00시00~05시59"
code_book['t2'] = "06시00~11시59"
code_book['t3'] = "12시00~17시59"
code_book['t4'] = "18시00~23시59"
code_book["1"] = "주말"
code_book["0"] = "주중"

In [None]:
def add_col(df):
    #주말구분
    df.loc[df.WEEK>5,'weekend'] = "1"
    df.loc[df.WEEK<6,'weekend'] = "0"
    
    #시간대 구분
    df.loc[(df.TIME<6),"TIME_GAP"] = 't1'
    df.loc[(df.TIME>5)&(df.TIME<12),"TIME_GAP"] = 't2'
    df.loc[(df.TIME>11)&(df.TIME<18),"TIME_GAP"] = 't3'
    df.loc[(df.TIME>17),"TIME_GAP"] = 't4'
    
    #연령대별 구분
    #남
    df['M_under_20'] = df[['M0_4', 'M5_9', 'M10_14', 'M15_19']].sum(axis= 1)
    df['M_under_30'] = df[['M20_24','M25_29']].sum(axis = 1)
    df['M_under_40'] = df[['M30_34', 'M35_39']].sum(axis = 1)
    df['M_under_50'] = df[['M40_44', 'M45_49']].sum(axis = 1)
    df['M_under_60'] = df[['M50_54', 'M55_59']].sum(axis = 1)
    df['M_upper_60'] = df[['M60_64', 'M65_69', 'M70_']].sum(axis = 1)

    #여
    df['F_under_20'] = df[['F0_4', 'F5_9', 'F10_14', 'F15_19']].sum(axis= 1)
    df['F_under_30'] = df[['F20_24','F25_29']].sum(axis = 1)
    df['F_under_40'] = df[['F30_34', 'F35_39']].sum(axis = 1)
    df['F_under_50'] = df[['F40_44', 'F45_49']].sum(axis = 1)
    df['F_under_60'] = df[['F50_54', 'F55_59']].sum(axis = 1)
    df['F_upper_60'] = df[['F60_64', 'F65_69', 'F70_']].sum(axis = 1)

    #종합
    df['under_20'] = df['F_under_20'] + df['M_under_20']
    df['under_30'] = df['F_under_30'] + df['M_under_30']
    df['under_40'] = df['F_under_40'] + df['M_under_40']
    df['under_50'] = df['F_under_50'] + df['M_under_50'] 
    df['under_60'] = df['F_under_60'] + df['M_under_60']
    df['upper_60'] = df['F_upper_60'] + df['M_upper_60']
    return df

In [None]:
n_2016_l = add_col(n_2016_l)
n_2017_l = add_col(n_2017_l)

### 클러스터별 그래프 

In [None]:
size_label = n_2017_l[['new_X','new_y','label']].drop_duplicates()
size_label = size_label.label.value_counts().reset_index()
size_label.columns = ['label','label_count']

In [None]:
#가장 기본그래프 
def cluset_axis_basic(df):
    year = df['YYYYMM'].astype(str)[0][:4]
    sample = df.groupby(['YYYYMM','label'])['SUM'].sum().reset_index()
    sample['MONTH'] = sample['YYYYMM'].astype(str).apply(lambda x: int(x[4:6]))
    
    sample = sample.merge(size_label)
    sample['C_PER_SUM'] = sample['SUM']/sample['label_count']
    
    #클러스터별 전체 유동인구
    plt.figure(figsize= (13,10))
    sns.lineplot(x = 'MONTH',y = 'SUM', hue = 'label', palette = 'tab20', data = sample)
    plt.legend(loc = 'center left', bbox_to_anchor = (1,0.5),fontsize = 15)
    plt.title(f'{year}_주변상권의 전체 유동인구',fontsize= 15)
    plt.yticks(fontsize = 15)
    plt.xticks(fontsize = 15)
    plt.tight_layout()
    plt.savefig(f'./output/KT/{year}_주변상권의 전체 유동인구')
    plt.close()
    
    #클러스터별 평균 유동인구(각 포인트 개수만큼 나눠줌)
    plt.figure(figsize= (13,10))
    sns.lineplot(x = 'MONTH',y = 'C_PER_SUM', hue = 'label', palette = 'tab20', data = sample)
    plt.legend(loc = 'center left', bbox_to_anchor = (1,0.5),fontsize = 15)
    plt.title(f'{year}_주변상권의 평균 유동인구',fontsize= 15)
    plt.yticks(fontsize = 15)
    plt.xticks(fontsize = 15)
    plt.tight_layout()
    plt.savefig(f'./output/KT/{year}_주변상권의 평균 유동인구')
    plt.close()
    
    # 클러스터별 각 비율 csv
    os.makedirs('./output/KT/비율/', exist_ok = True)
    for c in ['SUM','C_PER_SUM']:
        pivot_t = sample.pivot_table(index= 'YYYYMM',columns = 'label', values = c, 
                                     aggfunc = 'sum',margins= True)
        result = pivot_t.apply(lambda x : (x/pivot_t['All']*100),axis=0)
        result.to_csv(f'./output/KT/비율/{year}_{c}_rate.csv',encoding = 'cp949')
    
#     return sample

In [None]:
cluset_axis_basic(n_2016_l)
cluset_axis_basic(n_2017_l)

### 주중주말 월평균 유동인구 

In [None]:
def col_monthly_n(df,col):
    '''
    특정컬럼으로 묶어서 확인
    '''
    #저장위치
    os.makedirs(f'./output/KT/{col}',exist_ok=True)
    year = str(df['YYYYMM'].iloc[0])[:4]
    
    #그룹화
    cols = ['new_X','new_y',col,'SUM']
    sample = df[cols]
    sample = (sample.groupby(['new_X','new_y',col])['SUM'].sum()/12).reset_index()
    
    for c in sample[col].unique():
        sample_g = sample[sample[col] == c]
        label_line.plot(color = 'w', edgecolor = 'black',figsize= (15,15))
        m = plt.scatter(x = 'new_X', y= 'new_y', s = 150,
                        marker= 's',alpha = 0.8,
                    c= sample_g['SUM'],cmap = "PuBu", data= sample_g)
        plt.colorbar(m)
        plt.title(f'{code_book[c]}_월평균 유동인구',fontsize= 15)
        plt.axis('off')
        plt.savefig(f'./output/KT/{col}/{year}_{code_book[c]}_월평균 유동인구')
        plt.close()
    return sample

In [None]:
col_monthly_n(n_2016,"weekend")
col_monthly_n(n_2017,"weekend")

In [None]:
col_monthly_n(n_2016,"TIME_GAP")
col_monthly_n(n_2017,"TIME_GAP")

In [None]:
def col_monthly_add(df,col,add_col):
    '''
    add_col 문자형
    특정컬럼으로 묶어서 확인
    '''
    #저장위치
    os.makedirs(f'./output/KT/{col}',exist_ok=True)
    year = str(df['YYYYMM'].iloc[0])[:4]
    
    #그룹화
    cols = ['new_X','new_y',col,'SUM']
    cols.append(add_col)
    sample = df[cols]
    sample = (sample.groupby(['new_X','new_y',col,add_col])['SUM'].sum()/12).reset_index()
    
    for c in sample[col].unique():
        for ac in sample[add_col].unique():
            sample_g = sample[sample[col] == c]
            sample_g = sample[sample[add_col] == ac]
            label_line.plot(color = 'w', edgecolor = 'black',figsize= (15,15))
            m = plt.scatter(x = 'new_X', y= 'new_y', s = 150,
                            marker= 's',alpha = 0.8,
                            c= sample_g['SUM'],cmap = "PuBu", data= sample_g)
            plt.colorbar(m)
            plt.title(f'{code_book[c]}_{code_book[ac]}_월평균 유동인구',fontsize= 15)
            plt.axis('off')
            plt.savefig(f'./output/KT/{year}_{code_book[c]}_{code_book[ac]}_월평균 유동인구')
            plt.close()
#     return sample

In [None]:
col_monthly_add(n_2016,"weekend","TIME_GAP")
col_monthly_add(n_2017,"weekend","TIME_GAP")

### 연령대별

In [None]:
#노멀
def ageband_monthly_n(df):
    '''
    노멀하게 뽑는방법
    '''
    #저장위치
    os.makedirs('./output/KT/',exist_ok=True)
    year = str(df['YYYYMM'].iloc[0])[:4]
    
    n_df = df[['new_X', 'new_y','weekend', 'TIME_GAP','YYYYMM',
        'under_20','under_30', 'under_40', 'under_50', 'under_60','upper_60']]

    n_df_m = n_df.melt(id_vars = ['new_X','new_y','weekend', 'TIME_GAP','YYYYMM'],var_name = 'AGE_GAP')
    
    #그룹화
    group_col = ['new_X','new_y','AGE_GAP']
    sample = (n_df_m.groupby(group_col)['value'].sum()/12).reset_index()
    
    for gap in sample.AGE_GAP.unique():
        sample_g = sample[sample['AGE_GAP'] == gap]
        label_line.plot(color = 'w', edgecolor = 'black',figsize= (15,15))
        m = plt.scatter(x = 'new_X', y= 'new_y', s = 150,
                        marker= 's',alpha = 0.8,
                        c = 'value',cmap = "PuBu", data= sample_g)
        plt.colorbar(m)
        plt.title(f'{code_book[gap]}__월평균 유동인구',fontsize= 15)
        plt.axis('off')
        plt.savefig(f'./output/KT/{year}_{code_book[gap]}_월평균 유동인구')       
        plt.close()
    

In [None]:
ageband_monthly_n(n_2016)
ageband_monthly_n(n_2017)

In [None]:
#조합형
def ageband_monthly(df, col):
    
    #저장위치
    os.makedirs(f'./output/KT/{col}',exist_ok=True)
    year = str(df['YYYYMM'].iloc[0])[:4]
    
    #연령대별 구분
    #남
    df['M_under_20'] = df[['M0_4', 'M5_9', 'M10_14', 'M15_19']].sum(axis= 1)
    df['M_under_30'] = df[['M20_24','M25_29']].sum(axis = 1)
    df['M_under_40'] = df[['M30_34', 'M35_39']].sum(axis = 1)
    df['M_under_50'] = df[['M40_44', 'M45_49']].sum(axis = 1)
    df['M_under_60'] = df[['M50_54', 'M55_59']].sum(axis = 1)
    df['M_upper_60'] = df[['M60_64', 'M65_69', 'M70_']].sum(axis = 1)

    #여
    df['F_under_20'] = df[['F0_4', 'F5_9', 'F10_14', 'F15_19']].sum(axis= 1)
    df['F_under_30'] = df[['F20_24','F25_29']].sum(axis = 1)
    df['F_under_40'] = df[['F30_34', 'F35_39']].sum(axis = 1)
    df['F_under_50'] = df[['F40_44', 'F45_49']].sum(axis = 1)
    df['F_under_60'] = df[['F50_54', 'F55_59']].sum(axis = 1)
    df['F_upper_60'] = df[['F60_64', 'F65_69', 'F70_']].sum(axis = 1)

    #종합
    df['under_20'] = df['F_under_20'] + df['M_under_20']
    df['under_30'] = df['F_under_30'] + df['M_under_30']
    df['under_40'] = df['F_under_40'] + df['M_under_40']
    df['under_50'] = df['F_under_50'] + df['M_under_50'] 
    df['under_60'] = df['F_under_60'] + df['M_under_60']
    df['upper_60'] = df['F_upper_60'] + df['M_upper_60']

    n_df = df[['new_X', 'new_y','weekend', 'TIME_GAP','YYYYMM',
        'under_20','under_30', 'under_40', 'under_50', 'under_60','upper_60']]

    n_df_m = n_df.melt(id_vars = ['new_X','new_y','weekend', 'TIME_GAP','YYYYMM'],var_name = 'AGE_GAP')
    
    #그룹화
    group_col = ['new_X','new_y','AGE_GAP',col]
    sample = (n_df_m.groupby(group_col)['value'].sum()/12).reset_index()
    
    for gap in sample.AGE_GAP.unique():
        for c in sample[col].unique():
            sample_g = sample[sample['AGE_GAP'] == gap]
            sample_g = sample_g[sample_g[col] == c]
            label_line.plot(color = 'w', edgecolor = 'black',figsize= (15,15))
            m = plt.scatter(x = 'new_X', y= 'new_y', s = 150,
                            marker= 's',alpha = 0.8,
                            c = 'value',cmap = "PuBu", data= sample_g)
            plt.colorbar(m)
            plt.title(f'{code_book[gap]}_{code_book[c]}_월평균 유동인구',fontsize= 15)
            plt.axis('off')
            plt.savefig(f'./output/KT/{col}/{year}_{code_book[gap]}_{code_book[c]}_월평균 유동인구')
            plt.close()
    

In [None]:
ageband_monthly(n_2016,'weekend')
ageband_monthly(n_2017,'weekend')
ageband_monthly(n_2016,'TIME_GAP')
ageband_monthly(n_2017,'TIME_GAP')