In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import matplotlib.font_manager as fm
font_name = fm.FontProperties(fname="C:/Windows/Fonts/malgun.TTF").get_name()
plt.rc("font", family=font_name)

import matplotlib as mlp
mlp.rcParams["axes.unicode_minus"] = False

import urllib.request as req
from urllib.request import urlopen
from urllib.error import HTTPError, URLError
import urllib.parse
from bs4 import BeautifulSoup
import requests
import pprint
import json

from pandas.io.json import json_normalize

# 1. 구 별 인구밀집도

## 1) Data Prepare

In [2]:
# 서울시 법정동별 인구현황 데이터 불러오기
pop_seoul = pd.read_excel("data/서울시 법정동별 인구현황.xls")
pop_seoul.head()

Unnamed: 0,기간,자치구,동,세대,인구,인구.1,인구.2,인구.3,인구.4,인구.5,인구.6,인구.7,인구.8,세대당인구,65세이상고령자
0,기간,자치구,동,세대,합계,합계,합계,한국인,한국인,한국인,외국인,외국인,외국인,세대당인구,65세이상고령자
1,기간,자치구,동,세대,계,남자,여자,계,남자,여자,계,남자,여자,세대당인구,65세이상고령자
2,2022.1/4,합계,합계,4451444,9732617,4716704,5015913,9506778,4613910,4892868,225839,102794,123045,2.14,1628980
3,2022.1/4,종로구,소계,73766,153684,74018,79666,144275,69911,74364,9409,4107,5302,1.96,28064
4,2022.1/4,종로구,사직동,4679,9630,4335,5295,9370,4189,5181,260,146,114,2,1787


## 2) EDA & Preprocessing

In [3]:
# 정제1. 필요없는 행,열 삭제
pop_seoul = pop_seoul.drop([0,1, 2])
pop_seoul = pop_seoul.drop(["기간", "세대", "인구.3", "인구.4", "인구.5", "인구.6", "인구.7", "인구.8", "세대당인구", "65세이상고령자"],axis=1)
pop_seoul.head()

Unnamed: 0,자치구,동,인구,인구.1,인구.2
3,종로구,소계,153684,74018,79666
4,종로구,사직동,9630,4335,5295
5,종로구,삼청동,2741,1312,1429
6,종로구,부암동,9702,4654,5048
7,종로구,평창동,18272,8518,9754


In [4]:
# 구별 데이터 중복이나 결측치 확인

print(len(pop_seoul["자치구"].unique()))
pop_seoul[["자치구"]].count()
pop_seoul[["자치구"]].value_counts()

pop_seoul[pop_seoul["자치구"].isnull()]

25


Unnamed: 0,자치구,동,인구,인구.1,인구.2


In [5]:
pop_seoul_gu = pop_seoul[pop_seoul['동'] == '소계']
pop_seoul_gu = pop_seoul_gu.drop(["동"], axis=1)
pop_seoul_gu.set_index("자치구")


Unnamed: 0_level_0,인구,인구.1,인구.2
자치구,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
종로구,153684,74018,79666
중구,131450,63869,67581
용산구,236518,114571,121947
성동구,291609,141662,149947
광진구,353110,169525,183585
동대문구,352592,172318,180274
중랑구,393600,193623,199977
성북구,442680,212277,230403
강북구,300824,145934,154890
도봉구,318100,154440,163660


In [6]:
pop_seoul_gu.to_csv("data/pop_seoul_gu.csv", encoding="utf-8")

In [7]:
pop_seoul_del = pop_seoul[pop_seoul['동'] == '소계'].index
pop_seoul_del

Int64Index([  3,  21,  37,  54,  72,  88, 103, 120, 141, 155, 170, 190, 207,
            222, 239, 258, 279, 296, 307, 326, 342, 364, 383, 406, 434],
           dtype='int64')

In [8]:
pop_seoul = pop_seoul.drop(pop_seoul_del)
pop_seoul

Unnamed: 0,자치구,동,인구,인구.1,인구.2
4,종로구,사직동,9630,4335,5295
5,종로구,삼청동,2741,1312,1429
6,종로구,부암동,9702,4654,5048
7,종로구,평창동,18272,8518,9754
8,종로구,무악동,8245,3823,4422
...,...,...,...,...,...
449,강동구,천호2동,33671,16467,17204
450,강동구,길동,45863,22374,23489
451,강동구,강일동,32947,15789,17158
452,강동구,상일1동,38832,19137,19695


In [9]:
pop_seoul_dong=pop_seoul.set_index(['자치구','동'])

In [10]:
pop_seoul_dong.columns= ["인구수", "여자", "남자"]

In [11]:
pop_seoul_dong

Unnamed: 0_level_0,Unnamed: 1_level_0,인구수,여자,남자
자치구,동,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
종로구,사직동,9630,4335,5295
종로구,삼청동,2741,1312,1429
종로구,부암동,9702,4654,5048
종로구,평창동,18272,8518,9754
종로구,무악동,8245,3823,4422
...,...,...,...,...
강동구,천호2동,33671,16467,17204
강동구,길동,45863,22374,23489
강동구,강일동,32947,15789,17158
강동구,상일1동,38832,19137,19695


In [12]:
pop_seoul_dong.to_csv("data/pop_seoul_dong.csv", encoding="utf-8")

## 3) 시각화

In [173]:
cols=["인구수", "여자", "남자"]

In [188]:
# ##### 히트맵

# plt.figure(figsize=(10,10))
# sns.heatmap(pop_seoul_gu["인구", "인구.1", "인구.2"], cmap="RdPu",
#            annot=True, fmt="f", linewidths=0.5)

# plt.title("구별 인구")

KeyError: ('인구', '인구.1', '인구.2')

<Figure size 720x720 with 0 Axes>

In [195]:
# import folium
# import json

In [199]:
# pop_seoul_gu
# pop_seoul_gu.set_index("자치구")

Unnamed: 0_level_0,인구,인구.1,인구.2
자치구,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
종로구,153684,74018,79666
중구,131450,63869,67581
용산구,236518,114571,121947
성동구,291609,141662,149947
광진구,353110,169525,183585
동대문구,352592,172318,180274
중랑구,393600,193623,199977
성북구,442680,212277,230403
강북구,300824,145934,154890
도봉구,318100,154440,163660


In [191]:
# 서울지역 지도 불러오기
map = folium.Map(location=[37.497955, 127.027619], zoom_start=10)
map

In [192]:
geo_path ="data/skorea_municipalities_geo_simple_seoul.json"
geo_str = json.load(open(geo_path, encoding="utf-8"))

In [194]:
# map = folium.Map(location=[37.497955, 127.027619], zoom_start=10)
# map.choropleth(geo_data = geo_str, data = pop_seoul_gu["인구"], 
#               key_on="feature.id", 
#                 columns=[pop_seoul_gu.index, pop_seoul_gu["인구"]],
#               fill_color="PuRd")
# map

