- 데이터 : 750m 총합.csv + 3개.csv (어린이집명으로 병합)
- 파생변수 : '보육실 면적' / '정원'
- MinMax 스케일링 ([0.28802173, 0.29011101, 0.14582084, 0.27604642])
- x / sum(x) 평균 스케일링([0.10536745, 0.30839818, 0.25513354, 0.33110083])
-> '엔트로피 가중치 산정방법을 활용한 도시지역 홍수취약성 평가' minmax 참고

In [95]:
import os
import warnings
warnings.simplefilter(action='ignore',category=FutureWarning)
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
from sklearn.preprocessing import MinMaxScaler

%matplotlib inline
import os
if os.name=='nt':
    font_family="Malgun Gothic"
else:
    font_family='AppleGothic'
    
sns.set(font=font_family,rc={"axes.unicode_minus":False})

In [96]:
df = pd.read_csv('../data/750m 총합.csv', sep = ",",encoding='cp949')

In [97]:
df2 = pd.read_csv('../data/3개.csv', sep = ",",encoding='cp949')

In [98]:
df['면적당 수용인원'] = df['보육실 면적'] /  df['정원'] 

In [99]:
df1 = df[['어린이집명','면적당 수용인원']]

In [100]:
df3 = pd.merge(df1, df2, how = 'inner' , on = '어린이집명')

In [101]:
df_final = df3[['면적당 수용인원','보육교사_1인당_아동수', '충족률', '어린이집개수']]
df_final.head(3)

Unnamed: 0,면적당 수용인원,보육교사_1인당_아동수,충족률,어린이집개수
0,2.789474,2.166667,0.684211,13
1,4.25,2.857143,1.0,12
2,3.052632,1.714286,0.631579,13


## MinMax 스케일링

In [102]:
# MinMax 스케일링
minmax = MinMaxScaler()
scale_minmax = minmax.fit_transform(df_final.iloc[:, :])

scale_df = pd.DataFrame(scale_minmax, columns = ["면적당 수용인원","보육교사_1인당_아동수","충족률","어린이집개수"])

In [103]:
scale_df

Unnamed: 0,면적당 수용인원,보육교사_1인당_아동수,충족률,어린이집개수
0,0.165713,0.206642,0.648120,0.722222
1,0.425307,0.313653,1.000000,0.666667
2,0.212486,0.136531,0.589474,0.722222
3,0.183383,0.232472,0.319048,0.722222
4,0.220906,0.309963,0.832857,0.722222
...,...,...,...,...
311,0.175067,0.269373,0.941353,0.444444
312,0.157051,0.289299,1.000000,0.388889
313,0.158697,0.490775,1.000000,0.611111
314,0.300889,0.361624,0.944286,0.555556


In [104]:
scale_p = scale_df.to_numpy()

In [105]:
entropy = []
for i in range(0,4):
    scale = scale_p[:,i]
    id_scale = np.where(scale)
    weight = -(1/np.log(len(scale))) * np.sum(scale[id_scale]*np.log(scale[id_scale]))
    entropy.append(weight)

In [106]:
# 지표 속성 값의 다양성 산정
div = np.ones(4) - entropy

weights = div / sum(div)

In [107]:
weights 

array([0.28802173, 0.29011101, 0.14582084, 0.27604642])

In [108]:
# weights min-max 가중치
# array([0.28802173, 0.29011101, 0.14582084, 0.27604642])

In [109]:
ID = np.array(["면적당 수용인원","보육교사 1인당 아동수","충족률","어린이집개수"])
data = pd.concat([pd.DataFrame(ID),pd.DataFrame(weights)], axis = 1)
data.columns = ["ID","가중치"]
data

Unnamed: 0,ID,가중치
0,면적당 수용인원,0.288022
1,보육교사 1인당 아동수,0.290111
2,충족률,0.145821
3,어린이집개수,0.276046


## x / sum(x) 평균

In [110]:
# x / sum(x) 평균
scale_df1 = df_final.iloc[:, :] / df_final.iloc[:, :].sum()
scale_df1

Unnamed: 0,면적당 수용인원,보육교사_1인당_아동수,충족률,어린이집개수
0,0.002688,0.002236,0.002869,0.004487
1,0.004096,0.002949,0.004192,0.004142
2,0.002942,0.001769,0.002648,0.004487
3,0.002784,0.002408,0.001630,0.004487
4,0.002987,0.002924,0.003564,0.004487
...,...,...,...,...
311,0.002739,0.002654,0.003972,0.002761
312,0.002641,0.002787,0.004192,0.002416
313,0.002650,0.004128,0.004192,0.003797
314,0.003421,0.003268,0.003983,0.003452


In [111]:
scale_df = pd.DataFrame(scale_df1, columns = ["면적당 수용인원","보육교사_1인당_아동수","충족률","어린이집개수"])

In [112]:
scale_p = scale_df.to_numpy()

In [113]:
entropy = []
for i in range(0,4):
    scale = scale_p[:,i]
    id_scale = np.where(scale)
    weight = -(1/np.log(len(scale))) * np.sum(scale[id_scale]*np.log(scale[id_scale]))
    entropy.append(weight)

In [114]:
# 지표 속성 값의 다양성 산정
div = np.ones(4) - entropy

weights = div / sum(div)

In [115]:
weights 

array([0.10536745, 0.30839818, 0.25513354, 0.33110083])

In [116]:
ID = np.array(["면적당 수용인원","보육교사 1인당 아동수","충족률","어린이집개수"])
data = pd.concat([pd.DataFrame(ID),pd.DataFrame(weights)], axis = 1)
data.columns = ["ID","가중치"]
data

Unnamed: 0,ID,가중치
0,면적당 수용인원,0.105367
1,보육교사 1인당 아동수,0.308398
2,충족률,0.255134
3,어린이집개수,0.331101
