# 동별 자동차 등록 현황 및 인구 밀도에 대한 밀도
- 동별 인구밀도를 구한다.
  - 저장 데이터셋 파일명
    - population_mido.csv
  #
- 자동차 등록 수 / 인구밀도
  - 저장 데이터셋 파일명
    - car_pop_mildo.csv
  #
- 읍면동별 자동차 등록 현황
  - 2021년 기준
  - 파일명: 대구광역시_읍면동별 자동차 등록현황_20211031.csv
  - 공공데이터 포털: https://www.data.go.kr/data/15073712/fileData.do#layer_data_infomation
  #
- 읍면동별 인구 현황
  - 2021년 12월 기준
  - 폴더명: 인구현황_2021_12
  - 공공데이터 포털: https://www.data.go.kr/data/3033304/fileData.do
  #
- 행정구역 면적
  - 2018년 기준
  - 파일명: jiri_dong_area_size.csv
  - 대구 빅데이터 활용 센터 내부에서 1차 전처리함
  - 동별_사고_다발지역_밀도.ipynb 에서 2차 전처리 후 저장됨.

## 저장파일:
- 인구밀도:
  - 구별 인구밀도: population_mildo_gu_basis.csv

In [58]:
import geopandas as gpd
import pandas as pd

In [59]:
IMPORT_DATA_ROOT_PATH = './externals'
EXPORT_DATA_ROOT_PATH = '../../main/data/origin/additionals'

In [60]:
# PATHS
car_path = f'{IMPORT_DATA_ROOT_PATH}/공공_데이터_포털/대구광역시_읍면동별 자동차 등록현황_20211031.csv'
population_path = lambda idx : f'{IMPORT_DATA_ROOT_PATH}/공공_데이터_포털/인구현황_2021_12/202112_202112_연령별인구현황_연간_{idx}.csv' # 0 부터 7 까지
area_size_path = f'{EXPORT_DATA_ROOT_PATH}/jiri_dong_area_size.csv'

In [61]:
car = pd.read_csv(car_path, encoding='cp949')
area_size = pd.read_csv(area_size_path, index_col=0)
population = pd.read_csv(population_path(0), encoding='cp949')
for idx in range(1, 8): # 1~7
  new_pop = pd.read_csv(population_path(idx), encoding='cp949')
  population = pd.concat([population, new_pop]).reset_index(drop=True)
population.head()

Unnamed: 0,행정구역,2021년_계_총인구수,2021년_계_연령구간인구수,2021년_계_0~9세,2021년_계_10~19세,2021년_계_20~29세,2021년_계_30~39세,2021년_계_40~49세,2021년_계_50~59세,2021년_계_60~69세,2021년_계_70~79세,2021년_계_80~89세,2021년_계_90~99세,2021년_계_100세 이상
0,대구광역시 대구광역시 중구 (2711000000),74791,74791,5002,5003,11372,10583,10604,11025,10225,7000,3475,480,22
1,대구광역시 중구 동인동(2711051700),8053,8053,274,442,1406,1091,1020,1238,1213,861,434,67,7
2,대구광역시 중구 삼덕동(2711054500),6587,6587,400,376,1781,1246,889,699,632,347,191,25,1
3,대구광역시 중구 성내1동(2711056500),4851,4851,142,213,1411,775,512,588,576,393,197,42,2
4,대구광역시 중구 성내2동(2711057500),4368,4368,108,187,796,551,464,684,755,537,243,43,0


# 인구수 데이터의 행정구역 전처리
- 구와 동 두 컬럼 생성
- 나이별 컬럼명 수정 0대 10대 20대 ...
- 총인구수 및 구간 인구수 삭제

In [62]:
# 총인구수 구간 인구수 삭제
population = population.drop(['2021년_계_총인구수', '2021년_계_연령구간인구수'], axis=1).rename(columns={
  '2021년_계_0~9세': '0대',
  '2021년_계_10~19세': '10대',
  '2021년_계_20~29세': '20대',
  '2021년_계_30~39세': '30대',
  '2021년_계_40~49세': '40대',
  '2021년_계_50~59세': '50대',
  '2021년_계_60~69세': '60대',
  '2021년_계_70~79세': '70대',
  '2021년_계_80~89세': '80대',
  '2021년_계_90~99세': '90대',
  '2021년_계_100세 이상': '100이상'
})
population.head()

Unnamed: 0,행정구역,0대,10대,20대,30대,40대,50대,60대,70대,80대,90대,100이상
0,대구광역시 대구광역시 중구 (2711000000),5002,5003,11372,10583,10604,11025,10225,7000,3475,480,22
1,대구광역시 중구 동인동(2711051700),274,442,1406,1091,1020,1238,1213,861,434,67,7
2,대구광역시 중구 삼덕동(2711054500),400,376,1781,1246,889,699,632,347,191,25,1
3,대구광역시 중구 성내1동(2711056500),142,213,1411,775,512,588,576,393,197,42,2
4,대구광역시 중구 성내2동(2711057500),108,187,796,551,464,684,755,537,243,43,0


In [63]:
population[['구', '동']] = population['행정구역'].str.split(expand=True).drop([0, 3], axis=1)
population.drop('행정구역', inplace=True, axis=1)
population.head()

Unnamed: 0,0대,10대,20대,30대,40대,50대,60대,70대,80대,90대,100이상,구,동
0,5002,5003,11372,10583,10604,11025,10225,7000,3475,480,22,대구광역시,중구
1,274,442,1406,1091,1020,1238,1213,861,434,67,7,중구,동인동(2711051700)
2,400,376,1781,1246,889,699,632,347,191,25,1,중구,삼덕동(2711054500)
3,142,213,1411,775,512,588,576,393,197,42,2,중구,성내1동(2711056500)
4,108,187,796,551,464,684,755,537,243,43,0,중구,성내2동(2711057500)


In [64]:
population = population.loc[population['구'] != '대구광역시'].copy()
population.head()

Unnamed: 0,0대,10대,20대,30대,40대,50대,60대,70대,80대,90대,100이상,구,동
1,274,442,1406,1091,1020,1238,1213,861,434,67,7,중구,동인동(2711051700)
2,400,376,1781,1246,889,699,632,347,191,25,1,중구,삼덕동(2711054500)
3,142,213,1411,775,512,588,576,393,197,42,2,중구,성내1동(2711056500)
4,108,187,796,551,464,684,755,537,243,43,0,중구,성내2동(2711057500)
5,561,216,524,920,717,632,641,389,169,20,2,중구,성내3동(2711058500)


In [65]:
population['동'] = population['동'].str.split('(').str[0].str.replace('.', '·')
population.head()

Unnamed: 0,0대,10대,20대,30대,40대,50대,60대,70대,80대,90대,100이상,구,동
1,274,442,1406,1091,1020,1238,1213,861,434,67,7,중구,동인동
2,400,376,1781,1246,889,699,632,347,191,25,1,중구,삼덕동
3,142,213,1411,775,512,588,576,393,197,42,2,중구,성내1동
4,108,187,796,551,464,684,755,537,243,43,0,중구,성내2동
5,561,216,524,920,717,632,641,389,169,20,2,중구,성내3동


In [66]:
population.reset_index(drop=True, inplace=True)

In [67]:
population.head()

Unnamed: 0,0대,10대,20대,30대,40대,50대,60대,70대,80대,90대,100이상,구,동
0,274,442,1406,1091,1020,1238,1213,861,434,67,7,중구,동인동
1,400,376,1781,1246,889,699,632,347,191,25,1,중구,삼덕동
2,142,213,1411,775,512,588,576,393,197,42,2,중구,성내1동
3,108,187,796,551,464,684,755,537,243,43,0,중구,성내2동
4,561,216,524,920,717,632,641,389,169,20,2,중구,성내3동


# 인구수 string 에서 int 로 변경

In [68]:
population.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 144 entries, 0 to 143
Data columns (total 13 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   0대      144 non-null    object
 1   10대     144 non-null    object
 2   20대     144 non-null    object
 3   30대     144 non-null    object
 4   40대     144 non-null    object
 5   50대     144 non-null    object
 6   60대     144 non-null    object
 7   70대     144 non-null    object
 8   80대     144 non-null    object
 9   90대     144 non-null    object
 10  100이상   144 non-null    int64 
 11  구       144 non-null    object
 12  동       144 non-null    object
dtypes: int64(1), object(12)
memory usage: 14.8+ KB


In [69]:
population['90대'].unique()

array([67, 25, 42, 43, 20, 40, 41, 71, 54, 12, '70', '55', '40', '62',
       '29', '86', '50', '32', '68', '85', '30', '88', '63', '81', '91',
       '195', '54', '35', '143', 36, 45, 81, 61, 44, 30, 38, 35, 22, 49,
       87, 2, 24, 91, 126, 66, 118, 65, 53, 70, 72, 39, 57, 103, 60, '31',
       '121', '25', '67', '45', '47', '44', '48', '109', '23', '98', '34',
       '95', '99', '80', '89', '49', '76', '59', '43', '56', '119', '122',
       '93', '61', '123', '149', '104', '106', '71', '110', '145', '66',
       '69', '51', '73', '180', '28', '141', '84', '107', '150', '114',
       '82', 183, 120, 48, 56, 90, 105, 52], dtype=object)

In [70]:
population[population.columns[:-3]] = population[population.columns[:-3]].map(lambda x: x if type(x) == int else int(''.join(x.split(','))))
population

Unnamed: 0,0대,10대,20대,30대,40대,50대,60대,70대,80대,90대,100이상,구,동
0,274,442,1406,1091,1020,1238,1213,861,434,67,7,중구,동인동
1,400,376,1781,1246,889,699,632,347,191,25,1,중구,삼덕동
2,142,213,1411,775,512,588,576,393,197,42,2,중구,성내1동
3,108,187,796,551,464,684,755,537,243,43,0,중구,성내2동
4,561,216,524,920,717,632,641,389,169,20,2,중구,성내3동
...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,2507,1767,2460,3832,3727,4224,3999,1832,794,90,0,달성군,옥포읍
140,2329,1747,3141,4190,3645,3093,2760,1389,683,71,2,달성군,현풍읍
141,388,484,635,588,898,1313,1690,1047,630,105,5,달성군,가창면
142,91,121,252,262,293,772,878,508,366,52,1,달성군,하빈면


In [71]:
population.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 144 entries, 0 to 143
Data columns (total 13 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   0대      144 non-null    int64 
 1   10대     144 non-null    int64 
 2   20대     144 non-null    int64 
 3   30대     144 non-null    int64 
 4   40대     144 non-null    int64 
 5   50대     144 non-null    int64 
 6   60대     144 non-null    int64 
 7   70대     144 non-null    int64 
 8   80대     144 non-null    int64 
 9   90대     144 non-null    int64 
 10  100이상   144 non-null    int64 
 11  구       144 non-null    object
 12  동       144 non-null    object
dtypes: int64(11), object(2)
memory usage: 14.8+ KB


# 인구수 밀도 구하기
- 인구수 / 면적

In [72]:
area_size.head()

Unnamed: 0,dong_nm,geometry,area
0,동인동1가,POLYGON ((1099906.2690911961 1764700.534162934...,181647.159739
1,동인동2가,"POLYGON ((1099361.708782717 1764418.396578179,...",204218.234816
2,동인동3가,POLYGON ((1100221.3470132628 1764840.322008588...,338887.496128
3,동인동4가,POLYGON ((1100597.1235927364 1764210.862818123...,268684.326456
4,삼덕동1가,POLYGON ((1098895.4475602645 1764086.792736463...,114181.093757


In [73]:
area_size.dong_nm.unique()

array(['동인동1가', '동인동2가', '동인동3가', '동인동4가', '삼덕동1가', '삼덕동2가', '삼덕동3가',
       '봉산동', '장관동', '상서동', '수동', '덕산동', '종로1가', '종로2가', '사일동', '동일동',
       '남일동', '전동', '동성로3가', '동문동', '문화동', '공평동', '동성로2가', '태평로1가', '교동',
       '용덕동', '상덕동', '완전동', '도원동', '수창동', '태평로3가', '인교동', '서야동', '서성로1가',
       '시장북로', '하서동', '남성로', '계산동1가', '계산동2가', '동산동', '서문로2가', '서성로2가',
       '포정동', '서문로1가', '서내동', '북성로2가', '대안동', '동성로1가', '태평로2가', '북성로1가',
       '화전동', '향촌동', '북내동', '대신동', '달성동', '남산동', '대봉동', '신암동', '신천동',
       '효목동', '평광동', '봉무동', '불로동', '도동', '지저동', '입석동', '검사동', '방촌동',
       '둔산동', '부동', '신평동', '서호동', '동호동', '신기동', '율하동', '용계동', '율암동',
       '상매동', '매여동', '각산동', '신서동', '동내동', '괴전동', '금강동', '대림동', '사복동',
       '숙천동', '내곡동', '능성동', '진인동', '도학동', '백안동', '미곡동', '용수동', '신무동',
       '미대동', '내동', '신용동', '중대동', '송정동', '덕곡동', '지묘동', '내당동', '비산동',
       '평리동', '상리동', '중리동', '이현동', '원대동1가', '원대동2가', '원대동3가', '이천동',
       '봉덕동', '대명동', '칠성동1가', '칠성동2가', '고성동1가', '고성동2가', '고성동3가', '침산동',
       

In [74]:
population['동'].unique()

array(['동인동', '삼덕동', '성내1동', '성내2동', '성내3동', '대신동', '남산1동', '남산2동',
       '남산3동', '남산4동', '대봉1동', '대봉2동', '신암1동', '신암2동', '신암3동', '신암4동',
       '신암5동', '신천1·2동', '신천3동', '신천4동', '효목1동', '효목2동', '도평동', '불로·봉무동',
       '지저동', '동촌동', '방촌동', '해안동', '안심1동', '안심2동', '안심3동', '안심4동', '혁신동',
       '공산동', '내당1동', '내당2·3동', '내당4동', '비산1동', '비산2·3동', '비산4동', '비산5동',
       '비산6동', '비산7동', '평리1동', '평리2동', '평리3동', '평리4동', '평리5동', '평리6동',
       '상중이동', '원대동', '이천동', '봉덕1동', '봉덕2동', '봉덕3동', '대명1동', '대명2동',
       '대명3동', '대명4동', '대명5동', '대명6동', '대명9동', '대명10동', '대명11동', '고성동',
       '칠성동', '침산1동', '침산2동', '침산3동', '산격1동', '산격2동', '산격3동', '산격4동',
       '대현동', '복현1동', '복현2동', '검단동', '무태조야동', '관문동', '태전1동', '태전2동',
       '구암동', '관음동', '읍내동', '동천동', '노원동', '국우동', '범어1동', '범어2동', '범어3동',
       '범어4동', '만촌1동', '만촌2동', '만촌3동', '수성1가동', '수성2·3가동', '수성4가동',
       '황금1동', '황금2동', '중동', '상동', '파동', '두산동', '지산1동', '지산2동', '범물1동',
       '범물2동', '고산1동', '고산2동', '고산3동', '성당동', '두류1·2동', '두류3동', '감삼동',
    

In [75]:
len(area_size.dong_nm.unique())

209

In [76]:
len(population['동'].unique())

144

# 면적 존재하지 않는 동 제거
- 인구수의 경우, 행정동으로 되어있음
- area 의 데이터의 경우, 법정동으로 되어있어서 쉽게 매칭이 안됨
- 포기 그냥 구단위로 넣자

# 인구밀도로 변환
- 인구 / area

In [77]:
gungu = gpd.read_file(f'{IMPORT_DATA_ROOT_PATH}/대구_빅데이터_활용센터/8_1_jiri_sigungu.shp', encoding='cp949')
gungu = gungu.iloc[:, 2:]
gungu

Unnamed: 0,sigungu_nm,geometry
0,중구,"POLYGON ((1097857.218 1765177.561, 1097872.134..."
1,동구,"POLYGON ((1107777.596 1780522.263, 1107845.409..."
2,서구,"POLYGON ((1094929.282 1767244.458, 1094939.136..."
3,남구,"POLYGON ((1097010.572 1763145.859, 1097030.717..."
4,북구,"POLYGON ((1100413.419 1776575.012, 1100415.868..."
5,수성구,"POLYGON ((1103905.976 1765250.085, 1103921.812..."
6,달서구,"POLYGON ((1091387.729 1763977.918, 1091424.370..."
7,달성군,"MULTIPOLYGON (((1089667.472 1758560.378, 10896..."


In [78]:
gungu['area'] = gungu['geometry'].map(lambda x: x.area)
gungu

Unnamed: 0,sigungu_nm,geometry,area
0,중구,"POLYGON ((1097857.218 1765177.561, 1097872.134...",7042380.0
1,동구,"POLYGON ((1107777.596 1780522.263, 1107845.409...",181482600.0
2,서구,"POLYGON ((1094929.282 1767244.458, 1094939.136...",17403740.0
3,남구,"POLYGON ((1097010.572 1763145.859, 1097030.717...",17798930.0
4,북구,"POLYGON ((1100413.419 1776575.012, 1100415.868...",93748510.0
5,수성구,"POLYGON ((1103905.976 1765250.085, 1103921.812...",76787670.0
6,달서구,"POLYGON ((1091387.729 1763977.918, 1091424.370...",62516770.0
7,달성군,"MULTIPOLYGON (((1089667.472 1758560.378, 10896...",422804200.0


In [79]:
def calc_mildo_by_area(row: pd.Series):
  row = {**row}
  area = gungu.loc[gungu['sigungu_nm'] == row['구']]['area']
  if len(area) == 0: print(row)
  else: area = area.iloc[0]
  for col in ['0대', '10대', '20대', '30대', '40대', '50대', '60대', '70대', '80대', '90대', '100이상']:
    row[col] = row[col]/area
  return pd.Series(row)

In [80]:
population = population.apply(
  calc_mildo_by_area,
  axis=1
)
population

Unnamed: 0,0대,10대,20대,30대,40대,50대,60대,70대,80대,90대,100이상,구,동
0,3.890730e-05,6.276288e-05,1.996484e-04,1.549192e-04,1.448374e-04,0.000176,0.000172,0.000122,6.162690e-05,9.513830e-06,9.939822e-07,중구,동인동
1,5.679898e-05,5.339104e-05,2.528975e-04,1.769288e-04,1.262357e-04,0.000099,0.000090,0.000049,2.712151e-05,3.549936e-06,1.419975e-07,중구,삼덕동
2,2.016364e-05,3.024546e-05,2.003584e-04,1.100480e-04,7.270270e-05,0.000083,0.000082,0.000056,2.797350e-05,5.963893e-06,2.839949e-07,중구,성내1동
3,1.533573e-05,2.655352e-05,1.130300e-04,7.824060e-05,6.588682e-05,0.000097,0.000107,0.000076,3.450538e-05,6.105891e-06,0.000000e+00,중구,성내2동
4,7.966057e-05,3.067145e-05,7.440667e-05,1.306377e-04,1.018122e-04,0.000090,0.000091,0.000055,2.399757e-05,2.839949e-06,2.839949e-07,중구,성내3동
...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,5.929459e-06,4.179240e-06,5.818296e-06,9.063297e-06,8.814955e-06,0.000010,0.000009,0.000004,1.877938e-06,2.128645e-07,0.000000e+00,달성군,옥포읍
140,5.508460e-06,4.131936e-06,7.428971e-06,9.910025e-06,8.621012e-06,0.000007,0.000007,0.000003,1.615405e-06,1.679264e-07,4.730322e-09,달성군,현풍읍
141,9.176825e-07,1.144738e-06,1.501877e-06,1.390715e-06,2.123915e-06,0.000003,0.000004,0.000002,1.490051e-06,2.483419e-07,1.182581e-08,달성군,가창면
142,2.152297e-07,2.861845e-07,5.960206e-07,6.196722e-07,6.929922e-07,0.000002,0.000002,0.000001,8.656489e-07,1.229884e-07,2.365161e-09,달성군,하빈면


# 인구밀도 저장
- 동별 인구밀도 저장

In [81]:
population = population.drop(columns=['동'])
population = population.groupby('구').sum().reset_index()

In [82]:
population.to_csv(f'{EXPORT_DATA_ROOT_PATH}/population_mildo_gu_basis.csv')

# 인구밀도에 대한 차량 등록수
- 동 맞추기

In [83]:
car.head()

Unnamed: 0,구군,읍면동,승용,승합,화물,특수,소계
0,남구,대명동,29753,1244,4953,118,36068
1,남구,봉덕동,14627,418,1792,48,16885
2,남구,이천동,4207,116,504,14,4841
3,달서구,갈산동,1201,138,807,4,2150
4,달서구,감삼동,12204,289,1892,40,14425


In [84]:
car['읍면동'].unique()

array(['대명동', '봉덕동', '이천동', '갈산동', '감삼동', '대곡동', '대천동', '도원동', '두류동',
       '본동', '본리동', '상인동', '성당동', '송현동', '신당동', '용산동', '월성동', '월암동',
       '유천동', '이곡동', '장기동', '장동', '죽전동', '진천동', '파호동', '호림동', '호산동',
       '가창면', '구지면', '논공읍', '다사읍', '옥포읍', '유가읍', '하빈면', '현풍읍', '화원읍',
       '각산동', '검사동', '괴전동', '금강동', '내곡동', '내동', '능성동', '대림동', '덕곡동', '도동',
       '도학동', '동내동', '동호동', '둔산동', '매여동', '미곡동', '미대동', '방촌동', '백안동',
       '봉무동', '부동', '불로동', '사복동', '상매동', '서호동', '송정동', '숙천동', '신기동',
       '신무동', '신서동', '신암동', '신용동', '신천동', '신평동', '용계동', '용수동', '율암동',
       '율하동', '입석동', '중대동', '지묘동', '지저동', '진인동', '평광동', '효목동', '검단동',
       '고성동1가', '고성동2가', '고성동3가', '관음동', '구암동', '국우동', '금호동', '노곡동',
       '노원동1가', '노원동2가', '노원동3가', '대현동', '도남동', '동변동', '동천동', '매천동',
       '복현동', '사수동', '산격동', '서변동', '연경동', '읍내동', '조야동', '칠성동1가', '칠성동2가',
       '침산동', '태전동', '팔달동', '학정동', '내당동', '비산동', '상리동', '원대동1가', '원대동2가',
       '원대동3가', '이현동', '중리동', '평리동', '가천동', '고모동', '노변동', '대흥동', '두산동',
       '만촌

In [85]:
len(car['읍면동'].unique())

201

In [86]:
def calc_mildo_by_area(row: pd.Series):
  row = {**row}
  area = area_size.loc[area_size['dong_nm'] == row['읍면동']]['area']
  if len(area) == 0: print(row)
  else: area = area.iloc[0]
  for col in ['승용','승합','화물','특수','소계']:
    row[col] = row[col]/area
  return pd.Series(row)

In [87]:
car = car.apply(
  calc_mildo_by_area,
  axis=1
)
car

Unnamed: 0,구군,읍면동,승용,승합,화물,특수,소계
0,남구,대명동,0.002920,0.000122,0.000486,0.000012,0.003540
1,남구,봉덕동,0.002260,0.000065,0.000277,0.000007,0.002608
2,남구,이천동,0.003937,0.000109,0.000472,0.000013,0.004530
3,달서구,갈산동,0.000579,0.000067,0.000389,0.000002,0.001037
4,달서구,감삼동,0.009020,0.000214,0.001398,0.000030,0.010661
...,...,...,...,...,...,...,...
199,중구,태평로3가,0.000961,0.000055,0.000307,0.000000,0.001323
200,중구,포정동,0.000742,0.000133,0.000685,0.000000,0.001561
201,중구,하서동,0.008736,0.000508,0.000102,0.000000,0.009346
202,중구,향촌동,0.002819,0.000092,0.000462,0.000000,0.003373


In [88]:
car.to_csv(f'{EXPORT_DATA_ROOT_PATH}/car_per_area_size_dong.csv')