In [45]:
import numpy as np
import pandas as pd

## 고도 및 경사도 데이터 전처리
- 값의 범위별 고도 및 경사도 데이터를 모두 추가
- 범위별 고도 및 경사도 값의 평균을 산출하여 추가

### 법정동별로 고도 및 경사도의 평균 산출

In [46]:
# 고도 및 경사도 데이터 불러오기
df_alt = pd.read_csv('data/raw/altitude.csv', encoding='cp949')
df_slo = pd.read_csv('data/raw/slope.csv', encoding='cp949')
df_alt['평균'] = 0.
df_slo['평균'] = 0.

In [47]:
# 각 컬럼에서 중간값과 영역의 비율로 고도 및 경사도 평균내기
for alt in df_alt.columns.to_list()[2:-1]:
    df_alt['평균'] += np.mean(list(map(int, alt[:-1].split("~")))) * df_alt[alt] / 100

for slo in df_slo.columns.to_list()[2:-1]:
    df_slo["평균"] += np.mean(list(map(int,slo[:-1].split("~")))) * df_slo[slo] / 100

In [48]:
# 불필요 컬럼 제거
alt_drop = df_alt.columns.to_list()[2:-1]
df_alt.drop(columns=alt_drop, inplace=True)

slo_drop = df_slo.columns.to_list()[2:-1]
df_slo.drop(columns=slo_drop, inplace=True)

In [49]:
# 컬럼명 바꾸기
df_alt.rename(columns={
    '법정동코드': 'EMD_CD', '법정동명': 'EMD_NAME', '평균': 'AVG_ALT'
}, inplace=True)

df_slo.rename(columns={
    '법정동코드': 'EMD_CD', '법정동명': 'EMD_NAME', '평균': 'AVG_SLOP'
}, inplace=True)

df_alt['EMD_CD'] = df_alt['EMD_CD'].apply(lambda x: str(x))
df_slo['EMD_CD'] = df_slo['EMD_CD'].apply(lambda x: str(x))

In [51]:
# 평균낸 데이터 저장
df_alt.to_csv("data/altitude_avg_pp.csv", index=False)
df_slo.to_csv("data/slope_avg_pp.csv", index=False)

### 모든 고도 및 경사도 산출

In [52]:
# 고도 및 경사도 데이터 불러오기
df_alt = pd.read_csv('data/raw/altitude.csv', encoding='cp949')
df_slo = pd.read_csv('data/raw/slope.csv', encoding='cp949')

In [55]:
# 고도 데이터 체크
df_alt.describe()

Unnamed: 0,법정동코드,0~10m,10~50m,50~100m,100~200m,200~300m,300~400m,400~500m,500~600m,600~700m,700~800m,800~900m,900~1000m,1000~1100m
count,285.0,285.0,285.0,285.0,285.0,285.0,285.0,285.0,285.0,285.0,285.0,285.0,285.0,285.0
mean,44353860.0,16.215333,41.239298,20.00986,14.047614,5.487298,2.036491,0.673404,0.215263,0.054632,0.017649,0.003158,0.0,0.0
std,269328.6,24.407359,26.642313,16.449471,16.43156,9.825722,5.089486,2.18145,0.830613,0.263691,0.104321,0.028207,0.0,0.0
min,44131100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,44150310.0,0.0,20.64,6.16,0.41,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,44210110.0,0.88,40.79,18.56,8.42,0.22,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,44710380.0,25.41,59.49,28.99,22.55,6.11,1.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,44825360.0,100.0,100.0,88.14,92.89,50.82,40.45,17.37,6.84,2.34,1.08,0.37,0.0,0.0


In [57]:
# 900m 이상 고도는 없으므로 제거
# 컬럼명 변경
df_alt.drop(columns=['900~1000m', '1000~1100m'], inplace=True)
df_alt.rename(columns={'법정동코드':'EMD_CD', '법정동명': 'EMD_NAME'}, inplace=True)
df_alt['EMD_CD'] = df_alt['EMD_CD'].apply(lambda x: str(x))

In [59]:
# 전처리한 고도 데이터 저장
df_alt.to_csv('data/altitude_full_pp.csv', index=False)

In [60]:
# 경사 데이터 체크
df_slo.describe()

Unnamed: 0,법정동코드,0~5도,5~10도,10~15도,15~20도,20~25도,25~30도,30~35도
count,285.0,285.0,285.0,285.0,285.0,285.0,285.0,285.0
mean,44353860.0,45.219439,12.577614,9.818877,9.755579,8.573544,6.418526,7.636421
std,269328.6,21.937671,3.628139,3.451307,4.687753,5.238215,5.065761,8.646844
min,44131100.0,6.77,4.83,1.05,0.31,0.03,0.0,0.0
25%,44150310.0,27.63,9.74,7.41,6.33,3.98,1.97,1.28
50%,44210110.0,43.46,12.36,10.18,10.44,8.42,5.45,3.86
75%,44710380.0,62.9,14.81,12.17,13.27,12.8,9.97,11.46
max,44825360.0,91.13,24.29,20.97,22.08,20.3,18.67,41.41


In [61]:
# 컬럼명 변경
df_slo.rename(columns={
    '법정동코드':'EMD_CD', '법정동명': 'EMD_NAME',
    '0~5도': '0~5deg', '5~10도': '5~10deg', '10~15도': '10~15deg', '15~20도': '15~20deg',
    '20~25도': '20~25deg', '25~30도': '25~30deg', '30~35도': '30~35deg'
}, inplace=True)
df_slo['EMD_CD'] = df_slo['EMD_CD'].apply(lambda x: str(x))

In [63]:
# 전처리한 경사도 데이터 저장
df_slo.to_csv('data/slope_full_pp.csv', index=False)