# 서울시 생활인구
## 서울 생활인구 현황 (2022.10.05. 기준)
### url: https://data.seoul.go.kr/dataVisual/seoul/seoulLivingPopulation.do
### 서울시 생활인구란? 
    - 서울시와 KT가 공공빅데이터와 통신데이터를 이용하여 추계한 서울의 특정지역, 특정시점에 존재하는 모든 인구

#### 데이터: 행정동 단위 서울 생활인구(내국인)
    - url: https://data.seoul.go.kr/dataList/OA-14979/F/1/datasetView.do
    - 설명: 서울시가 보유한 공공데이터와 통신데이터로 측정한 특정시점에 서울의 특정 지역에 존재하는 인구 중 내국인

<!-- <br> -->



※ 개인정보 비 식별화를 위하여 ‘3명’ 이하인 경우 “ * ” 처리


In [1]:
import os
import glob
import pandas as pd
import numpy as np

In [2]:
from tqdm.auto import tqdm, trange
from time import sleep

In [3]:
# 폴더 안 파일명 리스트 가져오기
local_people_list = []

long_foreigner_list = []
temp_foreinger_list= []
pbar = tqdm(os.listdir(r"./local_people_dong/"))
for file in pbar:
    if file.endswith(".csv"):
        pbar.set_description(file)
        local_people_list.append(file)


local_people_list.sort()


print("no. of local_people >>", len(local_people_list))

  0%|          | 0/13 [00:00<?, ?it/s]

no. of local_people >> 12


In [4]:
def data_process(file_list, path):
    df_sum_per_day_행정동_list = []
    problem_list = []
    pbar = tqdm(file_list)
    for file in pbar:
        pbar.set_description(file) # pbar description에 작업중인 파일명 업데이트
        try:df_temp = pd.read_csv(path + file, encoding="utf-8", na_values="*")
        # except UnicodeDecodeError:df_temp = pd.read_csv(path + file, encoding="euc-kr", na_values="*", index_col=0)
        except Exception as e: 
            # df_temp = pd.read_csv(path + file, encoding="euc-kr", na_values="*")
            print(file, ">>", e)
            problem_list.append(file)
            continue
        df_temp.rename(columns={'?"기준일ID"' : '기준일ID'})
        #결측값 0으로 대체
        df_temp.fillna(0, inplace=True)
        df_temp = create_dates(df_temp)        
        
        # 연령별로 컬럼 정리
        df_sum_per_day_행정동 = combine_ages(df_temp)
        df_sum_per_day_행정동_list.append(df_sum_per_day_행정동)
    
    # 데이터 합치기
    df_concat_행정동 = pd.concat(df_sum_per_day_행정동_list)

    return df_concat_행정동

In [5]:
# 기준일ID 컬럼을 date형식으로 변환
# 년, 월 컬럼 생성
def create_dates(df_temp):
    df_temp.insert(0, '날짜', pd.to_datetime(df_temp['기준일ID'], format='%Y%m%d'))
    df_temp.insert(1, '년', df_temp['날짜'].dt.year)
    df_temp.insert(2, '월', df_temp['날짜'].dt.month)

    # 주말 여부 컬럼 생성
    df_temp.insert(3, "주말여부", df_temp['날짜'].dt.dayofweek > 4)
    df_temp['주말여부'] = df_temp['주말여부'].apply(lambda x: '주말' if x==True else '평일')

    # 기준일ID컬럼 drop
    df_temp.drop(columns=['기준일ID', '시간대구분'], inplace=True)
    return df_temp

In [6]:
# 연령별로 컬럼 정리
def combine_ages(df_temp):
    cols = [
    '남자0세부터9세생활인구수', '여자0세부터9세생활인구수', 
    '남자10세부터14세생활인구수', '남자15세부터19세생활인구수', '여자10세부터14세생활인구수', '여자15세부터19세생활인구수', 
    '남자20세부터24세생활인구수', '남자25세부터29세생활인구수', '여자20세부터24세생활인구수', '여자25세부터29세생활인구수',
    '남자30세부터34세생활인구수', '남자35세부터39세생활인구수', '여자30세부터34세생활인구수', '여자35세부터39세생활인구수', 
    '남자40세부터44세생활인구수', '남자45세부터49세생활인구수', '여자40세부터44세생활인구수', '여자45세부터49세생활인구수', 
    '남자50세부터54세생활인구수', '남자55세부터59세생활인구수', '여자50세부터54세생활인구수', '여자55세부터59세생활인구수', 
    '남자60세부터64세생활인구수', '남자65세부터69세생활인구수', '여자60세부터64세생활인구수', '여자65세부터69세생활인구수', 
    '남자70세이상생활인구수', '여자70세이상생활인구수'
    ]
    df_temp['10세 이하'] = df_temp.남자0세부터9세생활인구수 + df_temp.여자0세부터9세생활인구수
    df_temp['10대'] = df_temp.남자10세부터14세생활인구수 + df_temp.남자15세부터19세생활인구수 + df_temp.여자10세부터14세생활인구수 + df_temp.여자15세부터19세생활인구수
    df_temp['20대'] = df_temp.남자20세부터24세생활인구수 + df_temp.남자25세부터29세생활인구수 + df_temp.여자20세부터24세생활인구수 + df_temp.여자25세부터29세생활인구수
    df_temp['30대'] = df_temp.남자30세부터34세생활인구수 + df_temp.남자35세부터39세생활인구수 + df_temp.여자30세부터34세생활인구수 + df_temp.여자35세부터39세생활인구수
    df_temp['40대'] = df_temp.남자40세부터44세생활인구수 + df_temp.남자45세부터49세생활인구수 + df_temp.여자40세부터44세생활인구수 + df_temp.여자45세부터49세생활인구수
    df_temp['50대'] = df_temp.남자50세부터54세생활인구수 + df_temp.남자55세부터59세생활인구수 + df_temp.여자50세부터54세생활인구수 + df_temp.여자55세부터59세생활인구수
    df_temp['60대'] = df_temp.남자60세부터64세생활인구수 + df_temp.남자65세부터69세생활인구수 + df_temp.여자60세부터64세생활인구수 + df_temp.여자65세부터69세생활인구수
    df_temp['70세 이상'] = df_temp.남자70세이상생활인구수 + df_temp.여자70세이상생활인구수
    df_temp.drop(columns=cols, inplace=True)
    df_temp = df_temp.groupby(['월', '주말여부', '행정동코드']).mean()

    return df_temp

In [7]:
local_people = data_process(local_people_list, "./local_people_dong/")

  0%|          | 0/12 [00:00<?, ?it/s]

In [8]:
local_people

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,년,총생활인구수,Unnamed: 32,10세 이하,10대,20대,30대,40대,50대,60대,70세 이상
월,주말여부,행정동코드,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
10,주말,11110515,2021.0,17452.395658,0.0,1123.139024,1616.000590,2759.571150,2712.479976,3151.672946,2562.220863,1561.693502,1965.617702
10,주말,11110530,2021.0,17400.655807,0.0,837.091080,1108.639791,2807.027515,3345.219335,2988.166902,2592.035403,1827.479415,1894.996385
10,주말,11110540,2021.0,5366.844920,0.0,180.024785,377.619094,1161.239330,945.749068,881.286498,766.495053,570.405246,484.025791
10,주말,11110550,2021.0,14757.828239,0.0,786.197590,1509.365232,1753.201194,1883.564982,2560.226758,2435.996037,1826.377031,2002.899419
10,주말,11110560,2021.0,19151.517713,0.0,1234.832764,1844.368885,1981.303032,2187.662117,3071.521611,3409.897936,2708.560233,2713.371135
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9,평일,11740650,2022.0,23874.350475,0.0,1464.560339,1348.279941,3271.869507,4209.800993,3835.563025,3782.828473,2990.370903,2971.077318
9,평일,11740660,2022.0,25991.086548,0.0,1449.580704,2316.839451,3519.352714,3977.498193,4448.904011,4176.656182,3135.247451,2967.007880
9,평일,11740685,2022.0,54291.025357,0.0,3852.301164,3810.418074,6657.611510,8493.841100,8537.393960,8564.054602,7159.445973,7215.959012
9,평일,11740690,2022.0,4498.447455,0.0,382.235267,908.984415,361.265348,486.742254,698.843266,640.092286,498.849340,521.435323


In [9]:
local_people.drop(columns=['Unnamed: 32', '년'], inplace=True)
local_people

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,총생활인구수,10세 이하,10대,20대,30대,40대,50대,60대,70세 이상
월,주말여부,행정동코드,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
10,주말,11110515,17452.395658,1123.139024,1616.000590,2759.571150,2712.479976,3151.672946,2562.220863,1561.693502,1965.617702
10,주말,11110530,17400.655807,837.091080,1108.639791,2807.027515,3345.219335,2988.166902,2592.035403,1827.479415,1894.996385
10,주말,11110540,5366.844920,180.024785,377.619094,1161.239330,945.749068,881.286498,766.495053,570.405246,484.025791
10,주말,11110550,14757.828239,786.197590,1509.365232,1753.201194,1883.564982,2560.226758,2435.996037,1826.377031,2002.899419
10,주말,11110560,19151.517713,1234.832764,1844.368885,1981.303032,2187.662117,3071.521611,3409.897936,2708.560233,2713.371135
...,...,...,...,...,...,...,...,...,...,...,...
9,평일,11740650,23874.350475,1464.560339,1348.279941,3271.869507,4209.800993,3835.563025,3782.828473,2990.370903,2971.077318
9,평일,11740660,25991.086548,1449.580704,2316.839451,3519.352714,3977.498193,4448.904011,4176.656182,3135.247451,2967.007880
9,평일,11740685,54291.025357,3852.301164,3810.418074,6657.611510,8493.841100,8537.393960,8564.054602,7159.445973,7215.959012
9,평일,11740690,4498.447455,382.235267,908.984415,361.265348,486.742254,698.843266,640.092286,498.849340,521.435323


In [10]:
pd.options.display.float_format = '{:,.0f}'.format
df_dong = local_people.reset_index()

In [11]:
df_dong.rename(columns={'총생활인구수' : '일일평균생활인구'}, inplace=True)

In [12]:
df_code = pd.read_excel("../code/data/집계구 단위 서울 생활인구/행정동코드_매핑정보_20200325.xlsx")
df_code.drop(0, inplace=True)
df_dong['행정동코드']=df_dong['행정동코드'].astype(str)
df_code['행자부행정동코드']=df_code['행자부행정동코드'].astype(str)
df_dong = pd.merge(df_dong, df_code, how='inner',left_on='행정동코드', right_on='행자부행정동코드')
df_dong.drop(columns='행정동코드', inplace=True)

In [13]:
col_order = ['시도명', '시군구명', '행정동명', '통계청행정동코드', '행자부행정동코드', '월', '주말여부', '일일평균생활인구', '10세 이하', '10대', '20대', '30대', '40대',
       '50대', '60대', '70세 이상']

In [14]:
df_dong = df_dong.reindex(columns=col_order)

In [15]:
df_dong.to_csv("서울_월별_일일평균생활인구.csv")

In [16]:
df_dong

Unnamed: 0,시도명,시군구명,행정동명,통계청행정동코드,행자부행정동코드,월,주말여부,일일평균생활인구,10세 이하,10대,20대,30대,40대,50대,60대,70세 이상
0,서울,종로구,청운효자동,1101072,11110515,10,주말,17452,1123,1616,2760,2712,3152,2562,1562,1966
1,서울,종로구,청운효자동,1101072,11110515,10,평일,17811,1297,2005,2475,2655,3230,2631,1566,1951
2,서울,종로구,청운효자동,1101072,11110515,11,주말,18222,1173,1644,3060,2853,3200,2699,1611,1982
3,서울,종로구,청운효자동,1101072,11110515,11,평일,18457,1338,2029,2771,2789,3267,2698,1606,1959
4,서울,종로구,청운효자동,1101072,11110515,12,주말,16774,1122,1588,2572,2476,3002,2520,1556,1938
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10171,서울,강동구,둔촌2동,1125071,11740700,7,평일,27371,2389,2737,3107,3782,4026,4088,3467,3775
10172,서울,강동구,둔촌2동,1125071,11740700,8,주말,26256,2205,2583,3008,3637,3960,4098,3353,3412
10173,서울,강동구,둔촌2동,1125071,11740700,8,평일,27287,2361,2745,3076,3762,4024,4137,3403,3779
10174,서울,강동구,둔촌2동,1125071,11740700,9,주말,26403,2278,2595,2921,3652,4035,4091,3382,3448


In [17]:
#총 일평균 생활인구
df_dong.pivot_table("일일평균생활인구", index=["시군구명","행정동명"],aggfunc = 'sum').sort_values(by=["일일평균생활인구"],ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,일일평균생활인구
시군구명,행정동명,Unnamed: 2_level_1
강남구,역삼1동,2266701
영등포구,여의동,1949972
마포구,서교동,1660425
은평구,진관동,1515651
서초구,서초3동,1483665
...,...,...
종로구,삼청동,121455
종로구,창신3동,118809
강동구,둔촌1동,110462
양천구,신월6동,109244


In [18]:
#순위 분석 : 각 평균에 따른 합 비교
age_lp=df_dong.filter(regex='\d|시군구명|행정동명')
age_lp

Unnamed: 0,시군구명,행정동명,10세 이하,10대,20대,30대,40대,50대,60대,70세 이상
0,종로구,청운효자동,1123,1616,2760,2712,3152,2562,1562,1966
1,종로구,청운효자동,1297,2005,2475,2655,3230,2631,1566,1951
2,종로구,청운효자동,1173,1644,3060,2853,3200,2699,1611,1982
3,종로구,청운효자동,1338,2029,2771,2789,3267,2698,1606,1959
4,종로구,청운효자동,1122,1588,2572,2476,3002,2520,1556,1938
...,...,...,...,...,...,...,...,...,...,...
10171,강동구,둔촌2동,2389,2737,3107,3782,4026,4088,3467,3775
10172,강동구,둔촌2동,2205,2583,3008,3637,3960,4098,3353,3412
10173,강동구,둔촌2동,2361,2745,3076,3762,4024,4137,3403,3779
10174,강동구,둔촌2동,2278,2595,2921,3652,4035,4091,3382,3448


In [19]:
# 1위
a=age_lp.groupby(["시군구명","행정동명"]).mean()
a["1위"]=a.idxmax(axis=1)
a

Unnamed: 0_level_0,Unnamed: 1_level_0,10세 이하,10대,20대,30대,40대,50대,60대,70세 이상,1위
시군구명,행정동명,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
강남구,개포1동,731,953,694,1224,1663,1346,1064,983,40대
강남구,개포2동,1850,3779,2318,2865,4573,3792,2813,2829,40대
강남구,개포4동,1126,1677,1853,2594,2804,2583,1871,1406,40대
강남구,논현1동,1050,1632,9439,10726,7713,4757,2918,2591,30대
강남구,논현2동,1826,1932,7450,10137,8286,4960,2826,2784,30대
...,...,...,...,...,...,...,...,...,...,...
중랑구,상봉2동,1063,1245,4968,5238,3885,4310,3070,2646,30대
중랑구,신내1동,3774,3330,3810,4773,5429,5475,4323,4147,50대
중랑구,신내2동,1275,1750,1934,2507,2577,2864,2344,2466,50대
중랑구,중화1동,1538,1447,2115,2577,2354,2611,2191,2252,50대


In [21]:
# 모든 순위
a=age_lp.groupby(["시군구명","행정동명"]).mean()
a_list=a.apply(np.argsort,axis=1)
a_list.values

array([[2, 0, 1, ..., 3, 5, 4],
       [0, 2, 6, ..., 1, 5, 4],
       [0, 7, 1, ..., 5, 3, 4],
       ...,
       [0, 1, 2, ..., 3, 4, 5],
       [1, 0, 2, ..., 4, 3, 5],
       [0, 1, 2, ..., 7, 3, 5]])

In [22]:
a_cols=a.columns.to_numpy()[a_list.values[:,::-1]]
a_cols

array([['40대', '50대', '30대', ..., '10대', '10세 이하', '20대'],
       ['40대', '50대', '10대', ..., '60대', '20대', '10세 이하'],
       ['40대', '30대', '50대', ..., '10대', '70세 이상', '10세 이하'],
       ...,
       ['50대', '40대', '30대', ..., '20대', '10대', '10세 이하'],
       ['50대', '30대', '40대', ..., '20대', '10세 이하', '10대'],
       ['50대', '30대', '70세 이상', ..., '20대', '10대', '10세 이하']],
      dtype=object)