# 서울시 생활인구
## 서울 생활인구 현황 (2022.10.05. 기준)
### url: https://data.seoul.go.kr/dataVisual/seoul/seoulLivingPopulation.do
### 서울시 생활인구란? 
    - 서울시와 KT가 공공빅데이터와 통신데이터를 이용하여 추계한 서울의 특정지역, 특정시점에 존재하는 모든 인구

#### 데이터: 행정동 단위 서울 생활인구(내국인)
    - url: https://data.seoul.go.kr/dataList/OA-14979/F/1/datasetView.do
    - 설명: 서울시가 보유한 공공데이터와 통신데이터로 측정한 특정시점에 서울의 특정 지역에 존재하는 인구 중 내국인

<!-- <br> -->



※ 개인정보 비 식별화를 위하여 ‘3명’ 이하인 경우 “ * ” 처리


In [1]:
import os
import glob
import pandas as pd
import numpy as np

In [2]:
from tqdm.auto import tqdm, trange
from time import sleep

In [3]:
# 폴더 안 파일명 리스트 가져오기
local_people_list = []

long_foreigner_list = []
temp_foreinger_list= []
pbar = tqdm(os.listdir(r"./local_people_dong/"))
for file in pbar:
    if file.endswith(".csv"):
        pbar.set_description(file)
        local_people_list.append(file)


local_people_list.sort()


print("no. of local_people >>", len(local_people_list))

  0%|          | 0/13 [00:00<?, ?it/s]

no. of local_people >> 12


In [4]:
def data_process(file_list, path):
    df_sum_per_day_행정동_list = []
    problem_list = []
    pbar = tqdm(file_list)
    for file in pbar:
        pbar.set_description(file) # pbar description에 작업중인 파일명 업데이트
        try:df_temp = pd.read_csv(path + file, encoding="utf-8", na_values="*")
        # except UnicodeDecodeError:df_temp = pd.read_csv(path + file, encoding="euc-kr", na_values="*", index_col=0)
        except Exception as e: 
            # df_temp = pd.read_csv(path + file, encoding="euc-kr", na_values="*")
            print(file, ">>", e)
            problem_list.append(file)
            continue
        df_temp.rename(columns={'?"기준일ID"' : '기준일ID'})
        #결측값 0으로 대체
        df_temp.fillna(0, inplace=True)
        df_temp = create_dates(df_temp)        
        
        # 연령별로 컬럼 정리
        df_sum_per_day_행정동 = combine_ages(df_temp)
        df_sum_per_day_행정동_list.append(df_sum_per_day_행정동)
    
    # 데이터 합치기
    df_concat_행정동 = pd.concat(df_sum_per_day_행정동_list)

    return df_concat_행정동

In [5]:
# 기준일ID 컬럼을 date형식으로 변환
# 년, 월 컬럼 생성
def create_dates(df_temp):
    df_temp.insert(0, '날짜', pd.to_datetime(df_temp['기준일ID'], format='%Y%m%d'))
    df_temp.insert(1, '년', df_temp['날짜'].dt.year)
    df_temp.insert(2, '월', df_temp['날짜'].dt.month)

    # 주말 여부 컬럼 생성
    df_temp.insert(3, "주말여부", df_temp['날짜'].dt.dayofweek > 4)
    df_temp['주말여부'] = df_temp['주말여부'].apply(lambda x: '주말' if x==True else '평일')

    # 기준일ID컬럼 drop
    df_temp.drop(columns=['기준일ID', '시간대구분'], inplace=True)
    return df_temp

In [6]:
# 연령별로 컬럼 정리
def combine_ages(df_temp):
    cols = [
    '남자0세부터9세생활인구수', '여자0세부터9세생활인구수', 
    '남자10세부터14세생활인구수', '남자15세부터19세생활인구수', '여자10세부터14세생활인구수', '여자15세부터19세생활인구수', 
    '남자20세부터24세생활인구수', '남자25세부터29세생활인구수', '여자20세부터24세생활인구수', '여자25세부터29세생활인구수',
    '남자30세부터34세생활인구수', '남자35세부터39세생활인구수', '여자30세부터34세생활인구수', '여자35세부터39세생활인구수', 
    '남자40세부터44세생활인구수', '남자45세부터49세생활인구수', '여자40세부터44세생활인구수', '여자45세부터49세생활인구수', 
    '남자50세부터54세생활인구수', '남자55세부터59세생활인구수', '여자50세부터54세생활인구수', '여자55세부터59세생활인구수', 
    '남자60세부터64세생활인구수', '남자65세부터69세생활인구수', '여자60세부터64세생활인구수', '여자65세부터69세생활인구수', 
    '남자70세이상생활인구수', '여자70세이상생활인구수'
    ]
    df_temp['10세 이하'] = df_temp.남자0세부터9세생활인구수 + df_temp.여자0세부터9세생활인구수
    df_temp['10대'] = df_temp.남자10세부터14세생활인구수 + df_temp.남자15세부터19세생활인구수 + df_temp.여자10세부터14세생활인구수 + df_temp.여자15세부터19세생활인구수
    df_temp['20대'] = df_temp.남자20세부터24세생활인구수 + df_temp.남자25세부터29세생활인구수 + df_temp.여자20세부터24세생활인구수 + df_temp.여자25세부터29세생활인구수
    df_temp['30대'] = df_temp.남자30세부터34세생활인구수 + df_temp.남자35세부터39세생활인구수 + df_temp.여자30세부터34세생활인구수 + df_temp.여자35세부터39세생활인구수
    df_temp['40대'] = df_temp.남자40세부터44세생활인구수 + df_temp.남자45세부터49세생활인구수 + df_temp.여자40세부터44세생활인구수 + df_temp.여자45세부터49세생활인구수
    df_temp['50대'] = df_temp.남자50세부터54세생활인구수 + df_temp.남자55세부터59세생활인구수 + df_temp.여자50세부터54세생활인구수 + df_temp.여자55세부터59세생활인구수
    df_temp['60대'] = df_temp.남자60세부터64세생활인구수 + df_temp.남자65세부터69세생활인구수 + df_temp.여자60세부터64세생활인구수 + df_temp.여자65세부터69세생활인구수
    df_temp['70세 이상'] = df_temp.남자70세이상생활인구수 + df_temp.여자70세이상생활인구수
    df_temp.drop(columns=cols, inplace=True)
    df_temp = df_temp.groupby(['날짜', '월', '주말여부', '행정동코드']).sum()
    df_temp = df_temp.groupby(['월', '주말여부', '행정동코드']).mean()


    return df_temp

In [7]:
local_people = data_process(local_people_list, "./local_people_dong/")

  0%|          | 0/12 [00:00<?, ?it/s]

In [8]:
local_people

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,년,총생활인구수,Unnamed: 32,10세 이하,10대,20대,30대,40대,50대,60대,70세 이상
월,주말여부,행정동코드,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
10,주말,11110515,48504.0,4.188575e+05,0.0,26955.336580,38784.014150,66229.707590,65099.519430,75640.150700,61493.300710,37480.644040,47174.824850
10,주말,11110530,48504.0,4.176157e+05,0.0,20090.185930,26607.354980,67368.660360,80285.264040,71716.005650,62208.849670,43859.505960,45479.913230
10,주말,11110540,48504.0,1.288043e+05,0.0,4320.594830,9062.858250,27869.743930,22697.977640,21150.875950,18395.881270,13689.725900,11616.618980
10,주말,11110550,48504.0,3.541879e+05,0.0,18868.742150,36224.765560,42076.828650,45205.559560,61445.442180,58463.904890,43833.048740,48069.586060
10,주말,11110560,48504.0,4.596364e+05,0.0,29635.986340,44264.853230,47551.272770,52503.890810,73716.518660,81837.550470,65005.445590,65120.907230
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9,평일,11740650,48528.0,5.729844e+05,0.0,35149.448145,32358.718573,78524.868173,101035.223823,92053.512609,90787.883359,71768.901677,71305.855627
9,평일,11740660,48528.0,6.237861e+05,0.0,34789.936891,55604.146818,84464.465145,95459.956636,106773.696268,100239.748368,75245.938827,71208.189114
9,평일,11740685,48528.0,1.302985e+06,0.0,92455.227941,91450.033782,159782.676250,203852.186395,204897.455032,205537.310445,171826.703364,173183.016291
9,평일,11740690,48528.0,1.079627e+05,0.0,9173.646405,21815.625959,8670.368359,11681.814095,16772.238373,15362.214859,11972.384159,12514.447764


In [9]:
local_people.drop(columns=['Unnamed: 32', '년'], inplace=True)
local_people

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,총생활인구수,10세 이하,10대,20대,30대,40대,50대,60대,70세 이상
월,주말여부,행정동코드,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
10,주말,11110515,4.188575e+05,26955.336580,38784.014150,66229.707590,65099.519430,75640.150700,61493.300710,37480.644040,47174.824850
10,주말,11110530,4.176157e+05,20090.185930,26607.354980,67368.660360,80285.264040,71716.005650,62208.849670,43859.505960,45479.913230
10,주말,11110540,1.288043e+05,4320.594830,9062.858250,27869.743930,22697.977640,21150.875950,18395.881270,13689.725900,11616.618980
10,주말,11110550,3.541879e+05,18868.742150,36224.765560,42076.828650,45205.559560,61445.442180,58463.904890,43833.048740,48069.586060
10,주말,11110560,4.596364e+05,29635.986340,44264.853230,47551.272770,52503.890810,73716.518660,81837.550470,65005.445590,65120.907230
...,...,...,...,...,...,...,...,...,...,...,...
9,평일,11740650,5.729844e+05,35149.448145,32358.718573,78524.868173,101035.223823,92053.512609,90787.883359,71768.901677,71305.855627
9,평일,11740660,6.237861e+05,34789.936891,55604.146818,84464.465145,95459.956636,106773.696268,100239.748368,75245.938827,71208.189114
9,평일,11740685,1.302985e+06,92455.227941,91450.033782,159782.676250,203852.186395,204897.455032,205537.310445,171826.703364,173183.016291
9,평일,11740690,1.079627e+05,9173.646405,21815.625959,8670.368359,11681.814095,16772.238373,15362.214859,11972.384159,12514.447764


In [10]:
pd.options.display.float_format = '{:,.0f}'.format
df_dong = local_people.reset_index()

In [11]:
df_dong.rename(columns={'총생활인구수' : '일일평균생활인구'}, inplace=True)

In [12]:
df_code = pd.read_excel("../code/data/집계구 단위 서울 생활인구/행정동코드_매핑정보_20200325.xlsx")
df_code.drop(0, inplace=True)
df_dong['행정동코드']=df_dong['행정동코드'].astype(str)
df_code['행자부행정동코드']=df_code['행자부행정동코드'].astype(str)
df_dong = pd.merge(df_dong, df_code, how='inner',left_on='행정동코드', right_on='행자부행정동코드')
df_dong.drop(columns='행정동코드', inplace=True)

In [13]:
col_order = ['시도명', '시군구명', '행정동명', '통계청행정동코드', '행자부행정동코드', '월', '주말여부', '일일평균생활인구', '10세 이하', '10대', '20대', '30대', '40대',
       '50대', '60대', '70세 이상']

In [14]:
df_dong = df_dong.reindex(columns=col_order)

In [15]:
df_dong.to_csv("서울_월별_일일평균생활인구.csv")

In [16]:
df_dong

Unnamed: 0,시도명,시군구명,행정동명,통계청행정동코드,행자부행정동코드,월,주말여부,일일평균생활인구,10세 이하,10대,20대,30대,40대,50대,60대,70세 이상
0,서울,종로구,청운효자동,1101072,11110515,10,주말,418857,26955,38784,66230,65100,75640,61493,37481,47175
1,서울,종로구,청운효자동,1101072,11110515,10,평일,427452,31123,48128,59397,63722,77525,63144,37586,46827
2,서울,종로구,청운효자동,1101072,11110515,11,주말,437318,28148,39454,73439,68469,76793,64780,38666,47570
3,서울,종로구,청운효자동,1101072,11110515,11,평일,442959,32101,48700,66507,66928,78406,64761,38537,47020
4,서울,종로구,청운효자동,1101072,11110515,12,주말,402578,26925,38101,61724,59425,72048,60489,37347,46520
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10171,서울,강동구,둔촌2동,1125071,11740700,7,평일,656905,57326,65695,74560,90769,96631,98122,83208,90596
10172,서울,강동구,둔촌2동,1125071,11740700,8,주말,630142,52924,61990,72190,87278,95048,98348,80474,81892
10173,서울,강동구,둔촌2동,1125071,11740700,8,평일,654897,56665,65880,73829,90282,96577,99279,81680,90703
10174,서울,강동구,둔촌2동,1125071,11740700,9,주말,633667,54678,62292,70108,87657,96831,98176,81164,82762


In [23]:
#총 일평균 생활인구
df_dong.pivot_table("일일평균생활인구", index=["시군구명","행정동명"],aggfunc = 'mean').sort_values(by=["일일평균생활인구"],ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,일일평균생활인구
시군구명,행정동명,Unnamed: 2_level_1
강남구,역삼1동,2266701
영등포구,여의동,1949972
마포구,서교동,1660425
은평구,진관동,1515651
서초구,서초3동,1483665
...,...,...
종로구,삼청동,121455
종로구,창신3동,118809
강동구,둔촌1동,110462
양천구,신월6동,109244


In [24]:
#순위 분석 : 각 평균에 따른 합 비교
age_lp=df_dong.filter(regex='\d|시군구명|행정동명')
age_lp

Unnamed: 0,시군구명,행정동명,10세 이하,10대,20대,30대,40대,50대,60대,70세 이상
0,종로구,청운효자동,26955,38784,66230,65100,75640,61493,37481,47175
1,종로구,청운효자동,31123,48128,59397,63722,77525,63144,37586,46827
2,종로구,청운효자동,28148,39454,73439,68469,76793,64780,38666,47570
3,종로구,청운효자동,32101,48700,66507,66928,78406,64761,38537,47020
4,종로구,청운효자동,26925,38101,61724,59425,72048,60489,37347,46520
...,...,...,...,...,...,...,...,...,...,...
10171,강동구,둔촌2동,57326,65695,74560,90769,96631,98122,83208,90596
10172,강동구,둔촌2동,52924,61990,72190,87278,95048,98348,80474,81892
10173,강동구,둔촌2동,56665,65880,73829,90282,96577,99279,81680,90703
10174,강동구,둔촌2동,54678,62292,70108,87657,96831,98176,81164,82762


In [19]:
# 1위
a=age_lp.groupby(["시군구명","행정동명"]).mean()
a["1위"]=a.idxmax(axis=1)
a

Unnamed: 0_level_0,Unnamed: 1_level_0,10세 이하,10대,20대,30대,40대,50대,60대,70세 이상,1위
시군구명,행정동명,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
강남구,개포1동,17540,22877,16668,29373,39909,32293,25548,23582,40대
강남구,개포2동,44411,90693,55632,68767,109744,91002,67523,67893,40대
강남구,개포4동,27023,40258,44466,62266,67303,61981,44894,33748,40대
강남구,논현1동,25207,39173,226528,257413,185112,114170,70034,62174,30대
강남구,논현2동,43829,46364,178802,243296,198869,119037,67817,66808,30대
...,...,...,...,...,...,...,...,...,...,...
중랑구,상봉2동,25514,29881,119234,125717,93236,103448,73681,63497,30대
중랑구,신내1동,90587,79908,91447,114561,130306,131412,103762,99518,50대
중랑구,신내2동,30597,41999,46413,60160,61858,68746,56253,59174,50대
중랑구,중화1동,36916,34735,50751,61848,56506,62661,52592,54053,50대


In [20]:
# 모든 순위
a=age_lp.groupby(["시군구명","행정동명"]).mean()
a_list=a.apply(np.argsort,axis=1)
a_list.values

array([[2, 0, 1, ..., 3, 5, 4],
       [0, 2, 6, ..., 1, 5, 4],
       [0, 7, 1, ..., 5, 3, 4],
       ...,
       [0, 1, 2, ..., 3, 4, 5],
       [1, 0, 2, ..., 4, 3, 5],
       [0, 1, 2, ..., 7, 3, 5]])

In [21]:
a_cols=a.columns.to_numpy()[a_list.values[:,::-1]]
a_cols

array([['40대', '50대', '30대', ..., '10대', '10세 이하', '20대'],
       ['40대', '50대', '10대', ..., '60대', '20대', '10세 이하'],
       ['40대', '30대', '50대', ..., '10대', '70세 이상', '10세 이하'],
       ...,
       ['50대', '40대', '30대', ..., '20대', '10대', '10세 이하'],
       ['50대', '30대', '40대', ..., '20대', '10세 이하', '10대'],
       ['50대', '30대', '70세 이상', ..., '20대', '10대', '10세 이하']],
      dtype=object)

In [22]:
a[['1위','2위','3위','4위','5위','6위','7위','8위']]=pd.DataFrame(a_cols, index=a.index)
a

Unnamed: 0_level_0,Unnamed: 1_level_0,10세 이하,10대,20대,30대,40대,50대,60대,70세 이상,1위,2위,3위,4위,5위,6위,7위,8위
시군구명,행정동명,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
강남구,개포1동,17540,22877,16668,29373,39909,32293,25548,23582,40대,50대,30대,60대,70세 이상,10대,10세 이하,20대
강남구,개포2동,44411,90693,55632,68767,109744,91002,67523,67893,40대,50대,10대,30대,70세 이상,60대,20대,10세 이하
강남구,개포4동,27023,40258,44466,62266,67303,61981,44894,33748,40대,30대,50대,60대,20대,10대,70세 이상,10세 이하
강남구,논현1동,25207,39173,226528,257413,185112,114170,70034,62174,30대,20대,40대,50대,60대,70세 이상,10대,10세 이하
강남구,논현2동,43829,46364,178802,243296,198869,119037,67817,66808,30대,40대,20대,50대,60대,70세 이상,10대,10세 이하
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
중랑구,상봉2동,25514,29881,119234,125717,93236,103448,73681,63497,30대,20대,50대,40대,60대,70세 이상,10대,10세 이하
중랑구,신내1동,90587,79908,91447,114561,130306,131412,103762,99518,50대,40대,30대,60대,70세 이상,20대,10세 이하,10대
중랑구,신내2동,30597,41999,46413,60160,61858,68746,56253,59174,50대,40대,30대,70세 이상,60대,20대,10대,10세 이하
중랑구,중화1동,36916,34735,50751,61848,56506,62661,52592,54053,50대,30대,40대,70세 이상,60대,20대,10세 이하,10대
