## 서울시 코로나 확진자 데이터
### 모듈 import

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
%matplotlib inline

import warnings

# 경고 메시지를 무시하고 숨기거나
warnings.filterwarnings(action='ignore')

In [2]:
df = pd.read_csv('./seoul.csv')

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 101411 entries, 0 to 101410
Data columns (total 14 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   연번      101411 non-null  int64  
 1   확진일     101411 non-null  object 
 2   환자번호    0 non-null       float64
 3   국적      0 non-null       float64
 4   환자정보    0 non-null       float64
 5   지역      101411 non-null  object 
 6   여행력     1729 non-null    object 
 7   접촉력     101411 non-null  object 
 8   조치사항    0 non-null       float64
 9   상태      101411 non-null  object 
 10  이동경로    10000 non-null   object 
 11  등록일     101411 non-null  object 
 12  수정일     101411 non-null  object 
 13  노출여부    101411 non-null  object 
dtypes: float64(4), int64(1), object(9)
memory usage: 10.8+ MB


### 빈 킬럼 제거
- 시간-지역별 확진자 수를 파악하기 위함이므로 결측치가 포함된 컬럼은 삭제

In [4]:
df.drop(['환자번호', '국적', '환자정보', '여행력', '조치사항', '이동경로'], axis=1, inplace=True)

In [5]:
df.isnull().sum()

연번      0
확진일     0
지역      0
접촉력     0
상태      0
등록일     0
수정일     0
노출여부    0
dtype: int64

### 지역별 확진자 수를 파악하기 위해 count 컬럼 추가

In [6]:
df['count'] = 1

### 구별 확진자 수 분류

In [7]:
df_seoul = df.groupby('지역').sum().sort_values(by='count', ascending=False)
df_seoul

Unnamed: 0_level_0,연번,count
지역,Unnamed: 1_level_1,Unnamed: 2_level_1
강남구,372105439,7028
송파구,347139814,6477
관악구,287623783,5492
타시도,242537259,4776
영등포구,250444647,4536
강서구,211743005,4526
은평구,221170979,4341
서초구,209226831,4311
구로구,249243106,4286
동작구,205210716,4176


### 서울시 구가 아닌 타시도와, 기타 데이터는 삭제

In [8]:
df = df.drop(df[df['지역'] == '타시도'].index)
corona_seoul = df.drop(df[df['지역'] == '기타'].index)

### 지역명 영어로 변경

In [9]:
province = { 'Gangnam': '강남구', 'Gwanak': '관악구', 'Songpa': '송파구', 
                    'Ydp': '영등포구', 'Gangseo': '강서구', 'Ep': '은평구', 
                    'Seocho': '서초구', 'Guro': '구로구', 'Dongjak': '동작구', 
                    'Nowon': '노원구', 'Gangdong': '강동구', 'Seongbuk': '성북구', 
                    'Jungrang': '중랑구', 'Mapo': '마포구', 'Dongdaemun': '동대문구', 
                    'Gwangjin': '광진구', 'Yangchun': '양천구', 'Sungdong': '성동구', 
                    'Yongsan': '용산구', 'Seodaemun': '서대문구', 'Gangbuk': '강북구', 
                    'Dobong': '도봉구', 'Geumchun': '금천구', 'Jung': '중구', 'Jongro': '종로구' }    

In [10]:
for i, j in province.items():
    corona_seoul['지역'] = corona_seoul['지역'].apply(lambda x: i if x == j else x)

### 구별로 데이터 나누기

In [11]:
province2 = np.unique(corona_seoul['지역'].values).tolist()

In [12]:
for i in province2:
    globals()['{}'.format(i)] = corona_seoul[corona_seoul['지역'] == i]

In [13]:
Sungdong

Unnamed: 0,연번,확진일,지역,접촉력,상태,등록일,수정일,노출여부,count
17,101394,2021-09-30,Sungdong,감염경로 조사중,-,2021-10-01 10:57:21,2021-10-01 10:57:21,Y,1
172,101239,2021-09-30,Sungdong,기타 확진자 접촉,-,2021-10-01 10:57:21,2021-10-01 10:57:21,Y,1
213,101198,2021-09-30,Sungdong,기타 확진자 접촉,-,2021-10-01 10:57:21,2021-10-01 10:57:21,Y,1
512,100899,2021-09-30,Sungdong,감염경로 조사중,-,2021-10-01 10:57:20,2021-10-01 10:57:20,Y,1
547,100864,2021-09-30,Sungdong,감염경로 조사중,-,2021-10-01 10:57:20,2021-10-01 10:57:20,Y,1
...,...,...,...,...,...,...,...,...,...
101107,304,2020-03-20,Sungdong,해외유입,퇴원,2021-05-27 11:08:13,2021-05-27 11:08:13,Y,1
101119,292,2020-03-19,Sungdong,해외유입,퇴원,2021-05-27 11:08:13,2021-05-27 11:08:13,Y,1
101330,81,2020-02-28,Sungdong,감염경로 조사중,퇴원,2021-05-27 11:08:12,2021-05-27 11:08:12,Y,1
101391,20,2020-02-20,Sungdong,성동구 아파트 관리사무소,퇴원,2021-05-27 11:08:12,2021-05-27 11:08:12,Y,1


### 단위기간별로 확진자를 파악하기 위해 object 타입인 확진일을 datetime 타입으로 변경 후 인덱스로 설정

In [14]:
location = [Gangnam, Gwanak, Songpa, Ydp, Gangseo, Ep, 
            Seocho, Guro, Dongjak, Nowon, Gangdong, Seongbuk, 
            Jungrang, Mapo, Dongdaemun, Gwangjin, Yangchun, 
            Sungdong, Yongsan, Seodaemun, Gangbuk, Dobong, 
            Geumchun, Jung, Jongro]

for dataset in location:
    dataset['datetime'] = dataset['확진일'].apply(lambda x: pd.to_datetime(str(x), format='%Y-%m-%d'))
    dataset.set_index('datetime', inplace=True)
    dataset.drop(['확진일', '연번'], axis=1, inplace=True)

In [15]:
Gangnam.index

DatetimeIndex(['2021-09-30', '2021-09-30', '2021-09-30', '2021-09-30',
               '2021-09-30', '2021-09-30', '2021-09-30', '2021-09-30',
               '2021-09-30', '2021-09-30',
               ...
               '2020-03-05', '2020-03-02', '2020-03-01', '2020-02-29',
               '2020-02-28', '2020-02-28', '2020-02-28', '2020-02-27',
               '2020-02-26', '2020-02-26'],
              dtype='datetime64[ns]', name='datetime', length=7028, freq=None)

### resample 함수를 이용해서 주, 월, 분기별로 확진자 수 파악

In [16]:
location_dict = { 'Gangnam': Gangnam, 'Gwanak': Gwanak, 'Songpa': Songpa, 
                    'Ydp': Ydp, 'Gangseo': Gangseo, 'Ep': Ep, 
                    'Seocho': Seocho, 'Guro': Guro, 'Dongjak': Dongjak, 
                    'Nowon': Nowon, 'Gangdong': Gangdong, 'Seongbuk': Seongbuk, 
                    'Jungrang': Jungrang, 'Mapo': Mapo, 'Dongdaemun': Dongdaemun, 
                    'Gwangjin': Gwangjin, 'Yangchun': Yangchun, 'Sungdong': Sungdong, 
                    'Yongsan': Yongsan, 'Seodaemun': Seodaemun, 'Gangbuk': Gangbuk, 
                    'Dobong': Dobong, 'Geumchun': Geumchun, 'Jung': Jung, 'Jongro': Jongro }                         

In [17]:
for i, j in location_dict.items():
    globals()['{}_weekly'.format(i)] = j.resample('W').sum()
    globals()['{}_monthly'.format(i)] = j.resample('M').sum()
    globals()['{}_quarterly'.format(i)] = j.resample('Q').sum()

In [18]:
Ydp_monthly

Unnamed: 0_level_0,count
datetime,Unnamed: 1_level_1
2020-02-29,1
2020-03-31,18
2020-04-30,4
2020-05-31,11
2020-06-30,29
2020-07-31,7
2020-08-31,67
2020-09-30,53
2020-10-31,18
2020-11-30,95


In [19]:
Ep_quarterly

Unnamed: 0_level_0,count
datetime,Unnamed: 1_level_1
2020-03-31,21
2020-06-30,32
2020-09-30,198
2020-12-31,535
2021-03-31,581
2021-06-30,774
2021-09-30,2200


In [20]:
Geumchun_monthly

Unnamed: 0_level_0,count
datetime,Unnamed: 1_level_1
2020-02-29,1
2020-03-31,11
2020-04-30,0
2020-05-31,3
2020-06-30,17
2020-07-31,7
2020-08-31,34
2020-09-30,20
2020-10-31,5
2020-11-30,27


<br>

--------------------------

<br>


<br>

### folium으로 맵에 시각화

In [21]:
pip install folium

Note: you may need to restart the kernel to use updated packages.


In [22]:
import folium
map_osm = folium.Map(location=[37.529622, 126.984307], zoom_start=11)

In [23]:
df_seoul.reset_index(inplace=True)

### 서울시 구별 좌표 json 파일 불러오기

In [24]:
state_geo = 'https://raw.githubusercontent.com/southkorea/seoul-maps/master/kostat/2013/json/seoul_municipalities_geo_simple.json'

### choropleth를 이용한 구별 누적 확진자 수 시각화

In [25]:
map_osm.choropleth(
    geo_data=state_geo,
    name='서울시 코로나 누적 확진자 수',
    data=df_seoul,
    columns=['지역', 'count'],
    key_on='feature.properties.name',
    fill_color='Blues',
    fill_opacity=0.7,
    line_opacity=0.3,
    color = 'gray',
    legend_name = '확진자 수'
)

map_osm

In [31]:
gu = pd.read_csv('./location.csv')
gu

Unnamed: 0,순번,시군구코드,시군구명_한글,시군구명_영문,ESRI_PK,위도,경도
0,1,11320,도봉구,Dobong-gu,0,37.665861,127.031767
1,2,11380,은평구,Eunpyeong-gu,1,37.617612,126.9227
2,3,11230,동대문구,Dongdaemun-gu,2,37.583801,127.0507
3,4,11590,동작구,Dongjak-gu,3,37.496504,126.944307
4,5,11545,금천구,Geumcheon-gu,4,37.460097,126.900155
5,6,11530,구로구,Guro-gu,5,37.495486,126.858121
6,7,11110,종로구,Jongno-gu,6,37.5991,126.986149
7,8,11305,강북구,Gangbuk-gu,7,37.646995,127.014716
8,9,11260,중랑구,Jungnang-gu,8,37.595379,127.093967
9,10,11680,강남구,Gangnam-gu,9,37.495985,127.066409


In [32]:
province = { 'Gangnam': '강남구', 'Gwanak': '관악구', 'Songpa': '송파구', 
                    'Ydp': '영등포구', 'Gangseo': '강서구', 'Ep': '은평구', 
                    'Seocho': '서초구', 'Guro': '구로구', 'Dongjak': '동작구', 
                    'Nowon': '노원구', 'Gangdong': '강동구', 'Seongbuk': '성북구', 
                    'Jungrang': '중랑구', 'Mapo': '마포구', 'Dongdaemun': '동대문구', 
                    'Gwangjin': '광진구', 'Yangchun': '양천구', 'Sungdong': '성동구', 
                    'Yongsan': '용산구', 'Seodaemun': '서대문구', 'Gangbuk': '강북구', 
                    'Dobong': '도봉구', 'Geumchun': '금천구', 'Jung': '중구', 'Jongro': '종로구' }    

In [34]:
for i, j in province.items():
    gu['시군구명_한글'] = gu['시군구명_한글'].apply(lambda x: i if x == j else x)

In [36]:
for region in set(corona_seoul['지역']):

    # 해당 지역의 데이터 개수를 count에 저장합니다.
    count = len(corona_seoul[corona_seoul['지역'] == region])
    tmp_region = gu[gu['시군구명_한글'] == region]
    
    marker = folium.Marker([tmp_region['위도'], tmp_region['경도']], # 위치
                            popup=' '.join((region, str(count), '명'))) # 팝업 설정
    
    marker.add_to(map_osm)

In [37]:
map_osm