In [1]:
import pandas as pd
import numpy as np
import scipy 

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import os
import gc
import pickle

import warnings
warnings.filterwarnings('ignore')

from tqdm import tqdm,trange

plt.style.use('ggplot')

mpl.rcParams['axes.unicode_minus'] = False
plt.rcParams["font.family"] = "Malgun Gothic"

os.chdir("../../")
root_path = os.getcwd()

raw_file_path = os.path.join(root_path, "Bigcon2020_raw_file")
csv_file_path = os.path.join(root_path, "BigCon_2020/csv_files")
pickle_file_path = os.path.join(root_path, "BigCon_2020/pickle_files")
image_path = 'C:/Users/rears/OneDrive/바탕 화면/dacon_covid19/'

  import pandas.util.testing as tm


## Prepare Files

* 필요파일
    * 인접 행정동, 행정동간 거리 파일
    * 행정동별 생활인구 파일 
    * 행정동별 교통편의성 파일

In [2]:
distance_file=[pd.read_csv(os.path.join(csv_file_path,file), index_col=[0]) for file in os.listdir(csv_file_path) if file.startswith('distance')]

In [3]:
real_dist = distance_file[0]
adjacent_dist = distance_file[1]

In [4]:
living_population=pd.read_csv(os.path.join(csv_file_path,'CTGG_HDNG_FLOW.csv'))

In [28]:
convenience_index = pd.read_csv(os.path.join(csv_file_path,'conv_index.csv'))

In [30]:
convenience_index.loc[:,'conv']=np.sqrt(convenience_index.conv)

In [35]:
living_population.loc[:,'value'] = np.sqrt(living_population.value)

In [52]:
convenience_index

Unnamed: 0,HDONG_NM,subway_station_num,bus_station_num,bus_route_num,subway_route_num,AREA,conv
0,신당동,5.0,21,23,2.0,544408.19,3.401794
1,명동,3.0,49,48,3.0,1053644.29,3.201584
2,을지로동,3.0,32,20,2.0,599091.52,3.164674
3,황학동,2.0,9,12,1.0,328678.23,3.120251
4,신당5동,2.0,11,6,1.0,321347.03,2.844458
5,청구동,2.0,5,4,1.0,336949.37,2.668845
6,회현동,1.0,42,39,1.0,847351.57,2.638725
7,중림동,2.0,23,12,1.0,482329.92,2.575745
8,소공동,2.0,28,36,1.0,893419.62,2.503608
9,약수동,2.0,8,4,1.0,490077.51,2.212958


## MCLP MODELING

In [44]:
living_population.max()

CTGG_NM             중구
HDNG_NM            회현동
Covid_class          E
dayofweek            1
variable           60U
value          216.218
dtype: object

In [129]:
HDONGS=adjacent_dist.columns.tolist()
adjacent_dong_dict = {}
    
for i,dong in tqdm(enumerate(HDONGS), desc='Finding Adjacent Dongs'):
    adjacent_dong_dict[dong] = list(adjacent_dist.loc[adjacent_dist[dong]==0,HDONGS[i]].drop(dong).index)

Finding Adjacent Dongs: 34it [00:00, 1407.47it/s]


In [37]:
pop_dict=dict(living_population.loc[(living_population.dayofweek==0) & (living_population.Covid_class=='A') & (living_population.variable=='3059')].set_index('HDNG_NM')['value'])
conv_dict=dict(convenience_index.set_index('HDONG_NM')['conv'])

In [11]:
import copy

In [38]:
def MCLP(dist_file, population_file, convenience_file, distance='adjacent', N=5, covid_class='A', Ages='3059'):
    HDONGS=dist_file.columns.tolist()
    #all_cases = list(combinations(HDONGS,N))
    
    # 평일 생활인구, 주말 생활인구
    weekday_living_pop_dict = dict(population_file.loc[(population_file.dayofweek==0) & (population_file.Covid_class==covid_class) & (population_file.variable==Ages)].set_index('HDNG_NM')['value'])
    weekend_living_pop_dict = dict(population_file.loc[(population_file.dayofweek==1) & (population_file.Covid_class==covid_class) & (population_file.variable==Ages)].set_index('HDNG_NM')['value'])
    
    conv_dict = dict(convenience_file.set_index(['HDONG_NM'])['conv'])
    
    """
    # case안의 행정동끼리는 인접하지 않도록 filtering
    filtered_case = []
    
    for case in tqdm(all_cases, desc='Filtering Cases'):
        if sum(sum((dist_file.loc[case,case]==0).values)) == N:
            filtered_case.append(case)
    """
    
    #각 행정동별 인접 행정동
    adjacent_dong_dict = {}
    
    for dong in tqdm(HDONGS, desc='Finding Adjacent Dongs'):
        adjacent_dong_dict[dong] = list(dist_file.loc[dist_file[dong]==0, dong].drop(dong).index)
    
    weekday_HDONG = []
    weekend_HDONG = []
    
    adjacent_dong_dict_copy = copy.deepcopy(adjacent_dong_dict)
    HDONGS_copy = copy.deepcopy(HDONGS)
    
    for i in trange(N, desc='Getting HDONG [Weekday]'):
        if i != 0:
            adjacent_dong_dict_copy = {}
            for dong in HDONGS_copy:
                tmp_df = dist_file.loc[HDONGS_copy,HDONGS_copy]
                adjacent_dong_dict_copy[dong] = list(tmp_df.loc[tmp_df[dong]==0, dong].drop(dong).index)
                
                
        hdong = HDONGS_copy[np.argmax([weekday_living_pop_dict.get(dong) + (conv_dict.get(dong)*(sum([weekday_living_pop_dict.get(adjacent_dong) for adjacent_dong in adjacent_dongs])))
                           for dong, adjacent_dongs in adjacent_dong_dict_copy.items()])]
        
        weekday_HDONG.append(hdong)

        drop_dongs = adjacent_dong_dict_copy.get(hdong)
        drop_dongs.append(hdong)
        #print(list(adjacent_dong_dict_copy.keys()))
        
        for dong in drop_dongs:
            adjacent_dong_dict_copy.pop(dong)
            HDONGS_copy.remove(dong)
        
    adjacent_dong_dict_copy = copy.deepcopy(adjacent_dong_dict)
    HDONGS_copy = copy.deepcopy(HDONGS)
    
    for i in trange(N, desc='Getting HDONG [Weekend]'):
        if i != 0:
            adjacent_dong_dict_copy = {}
            for dong in HDONGS_copy:
                tmp_df = dist_file.loc[HDONGS_copy,HDONGS_copy]
                adjacent_dong_dict_copy[dong] = list(tmp_df.loc[tmp_df[dong]==0, dong].drop(dong).index)
                
                
        hdong = HDONGS_copy[np.argmax([weekend_living_pop_dict.get(dong) + (conv_dict.get(dong)*(sum([weekend_living_pop_dict.get(adjacent_dong) for adjacent_dong in adjacent_dongs])))
                           for dong, adjacent_dongs in adjacent_dong_dict_copy.items()])]
        
        weekend_HDONG.append(hdong)

        drop_dongs = adjacent_dong_dict_copy.get(hdong)
        drop_dongs.append(hdong)
        #print(list(adjacent_dong_dict_copy.keys()))
        
        for dong in drop_dongs:
            adjacent_dong_dict_copy.pop(dong)
            HDONGS_copy.remove(dong)
            
    return weekday_HDONG, weekend_HDONG

In [39]:
MCLP(adjacent_dist, living_population, convenience_index,Ages='60U', N=4)

Finding Adjacent Dongs: 100%|████████████████████████████████████████████████████████| 34/34 [00:00<00:00, 1045.08it/s]
Getting HDONG [Weekday]: 100%|███████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 24.82it/s]
Getting HDONG [Weekend]: 100%|███████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 21.59it/s]


(['신당동', '명동', '월계3동', '상계2동'], ['신당동', '월계3동', '명동', '상계2동'])

In [40]:
MCLP(adjacent_dist, living_population, convenience_index, N=4)

Finding Adjacent Dongs: 100%|█████████████████████████████████████████████████████████| 34/34 [00:00<00:00, 895.86it/s]
Getting HDONG [Weekday]: 100%|███████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 27.93it/s]
Getting HDONG [Weekend]: 100%|███████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 36.59it/s]


(['신당동', '명동', '월계3동', '상계2동'], ['신당동', '월계3동', '명동', '상계2동'])

In [41]:
MCLP(adjacent_dist, living_population, convenience_index,Ages='30L', N=4)

Finding Adjacent Dongs: 100%|█████████████████████████████████████████████████████████| 34/34 [00:00<00:00, 653.41it/s]
Getting HDONG [Weekday]: 100%|███████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 26.70it/s]
Getting HDONG [Weekend]: 100%|███████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 29.41it/s]


(['신당동', '명동', '월계3동', '하계1동'], ['신당동', '월계3동', '명동', '하계1동'])

In [49]:
result = {}
for class_ in living_population.Covid_class.unique():
    for var in living_population.variable.unique():
        result[(class_,var)] = MCLP(adjacent_dist, living_population, convenience_index, Ages=var, covid_class=class_, N=4)

Finding Adjacent Dongs: 100%|████████████████████████████████████████████████████████| 34/34 [00:00<00:00, 1623.31it/s]
Getting HDONG [Weekday]: 100%|███████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 50.16it/s]
Getting HDONG [Weekend]: 100%|███████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 50.75it/s]
Finding Adjacent Dongs: 100%|████████████████████████████████████████████████████████| 34/34 [00:00<00:00, 1707.64it/s]
Getting HDONG [Weekday]: 100%|███████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 49.49it/s]
Getting HDONG [Weekend]: 100%|███████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 47.73it/s]
Finding Adjacent Dongs: 100%|████████████████████████████████████████████████████████| 34/34 [00:00<00:00, 1704.45it/s]
Getting HDONG [Weekday]: 100%|███████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 50.13it/s]
Getting HDONG [Weekend]: 100%|██████████

In [50]:
result

{('A', '3059'): (['신당동', '명동', '월계3동', '상계2동'], ['신당동', '월계3동', '명동', '상계2동']),
 ('A', '30L'): (['신당동', '명동', '월계3동', '하계1동'], ['신당동', '월계3동', '명동', '하계1동']),
 ('A', '60U'): (['신당동', '명동', '월계3동', '상계2동'], ['신당동', '월계3동', '명동', '상계2동']),
 ('B', '3059'): (['신당동', '명동', '월계3동', '상계2동'], ['신당동', '월계3동', '명동', '상계2동']),
 ('B', '30L'): (['신당동', '월계3동', '명동', '하계1동'], ['신당동', '월계3동', '명동', '하계1동']),
 ('B', '60U'): (['신당동', '명동', '월계3동', '상계2동'], ['신당동', '월계3동', '명동', '상계2동']),
 ('C', '3059'): (['신당동', '명동', '월계3동', '상계2동'], ['신당동', '월계3동', '명동', '상계2동']),
 ('C', '30L'): (['신당동', '월계3동', '명동', '하계1동'], ['신당동', '월계3동', '명동', '하계1동']),
 ('C', '60U'): (['신당동', '명동', '월계3동', '상계2동'], ['신당동', '월계3동', '명동', '상계2동']),
 ('D', '3059'): (['신당동', '명동', '월계3동', '상계2동'], ['신당동', '월계3동', '명동', '상계2동']),
 ('D', '30L'): (['신당동', '월계3동', '명동', '하계1동'], ['신당동', '월계3동', '명동', '하계1동']),
 ('D', '60U'): (['신당동', '명동', '월계3동', '상계2동'], ['신당동', '월계3동', '명동', '상계2동']),
 ('E', '3059'): (['신당동', '명동', '월계3동', '상계2동'], 

In [48]:
living_population.variable.unique()

array(['3059', '30L', '60U'], dtype=object)

In [16]:
conveniece_index

Unnamed: 0,HDONG_NM,subway_station_num,bus_station_num,bus_route_num,subway_route_num,AREA,conv
0,신당동,5.0,21,23,2.0,544408.19,11.572199
1,명동,3.0,49,48,3.0,1053644.29,10.250139
2,을지로동,3.0,32,20,2.0,599091.52,10.015164
3,황학동,2.0,9,12,1.0,328678.23,9.735966
4,신당5동,2.0,11,6,1.0,321347.03,8.090941
5,청구동,2.0,5,4,1.0,336949.37,7.122732
6,회현동,1.0,42,39,1.0,847351.57,6.962871
7,중림동,2.0,23,12,1.0,482329.92,6.634463
8,소공동,2.0,28,36,1.0,893419.62,6.268051
9,약수동,2.0,8,4,1.0,490077.51,4.897185
