# 서울시 생활인구
## 서울 생활인구 현황 (2022.10.05. 기준)
### url: https://data.seoul.go.kr/dataVisual/seoul/seoulLivingPopulation.do
### 서울시 생활인구란? 
    - 서울시와 KT가 공공빅데이터와 통신데이터를 이용하여 추계한 서울의 특정지역, 특정시점에 존재하는 모든 인구

#### 데이터: 행정동 단위 서울 생활인구(내국인)
    - url: https://data.seoul.go.kr/dataList/OA-14979/F/1/datasetView.do
    - 설명: 서울시가 보유한 공공데이터와 통신데이터로 측정한 특정시점에 서울의 특정 지역에 존재하는 인구 중 내국인

<!-- <br> -->



※ 개인정보 비 식별화를 위하여 ‘3명’ 이하인 경우 “ * ” 처리


In [1]:
import os
import glob
import pandas as pd
import numpy as np

In [2]:
from tqdm.auto import tqdm, trange
from time import sleep

In [3]:
# 폴더 안 파일명 리스트 가져오기
local_people_list = []

long_foreigner_list = []
temp_foreinger_list= []
pbar = tqdm(os.listdir(r"./local_people_dong/"))
for file in pbar:
    if file.endswith(".csv"):
        pbar.set_description(file)
        local_people_list.append(file)


local_people_list.sort()


print("no. of local_people >>", len(local_people_list))

  0%|          | 0/13 [00:00<?, ?it/s]

no. of local_people >> 12


In [4]:
def data_process(file_list, path):
    df_sum_per_day_행정동_list = []
    problem_list = []
    pbar = tqdm(file_list)
    for file in pbar:
        pbar.set_description(file) # pbar description에 작업중인 파일명 업데이트
        try:df_temp = pd.read_csv(path + file, encoding="utf-8", na_values="*")
        # except UnicodeDecodeError:df_temp = pd.read_csv(path + file, encoding="euc-kr", na_values="*", index_col=0)
        except Exception as e: 
            # df_temp = pd.read_csv(path + file, encoding="euc-kr", na_values="*")
            print(file, ">>", e)
            problem_list.append(file)
            continue
        df_temp.rename(columns={'?"기준일ID"' : '기준일ID'})
        #결측값 0으로 대체
        df_temp.fillna(0, inplace=True)
        df_temp = create_dates(df_temp)        
        
        # 연령별로 컬럼 정리
        df_sum_per_day_행정동 = combine_ages(df_temp)
        df_sum_per_day_행정동_list.append(df_sum_per_day_행정동)
    
    # 데이터 합치기
    df_concat_행정동 = pd.concat(df_sum_per_day_행정동_list)

    return df_concat_행정동

In [5]:
# 기준일ID 컬럼을 date형식으로 변환
# 년, 월 컬럼 생성
def create_dates(df_temp):
    df_temp.insert(0, '날짜', pd.to_datetime(df_temp['기준일ID'], format='%Y%m%d'))
    df_temp.insert(1, '년', df_temp['날짜'].dt.year)
    df_temp.insert(2, '월', df_temp['날짜'].dt.month)

    # 주말 여부 컬럼 생성
    df_temp.insert(3, "주말여부", df_temp['날짜'].dt.dayofweek > 4)
    df_temp['주말여부'] = df_temp['주말여부'].apply(lambda x: '주말' if x==True else '평일')

    # 기준일ID컬럼 drop
    df_temp.drop(columns=['기준일ID', '시간대구분'], inplace=True)
    return df_temp

In [6]:
# 연령별로 컬럼 정리
def combine_ages(df_temp):
    cols = [
    '남자0세부터9세생활인구수', '여자0세부터9세생활인구수', 
    '남자10세부터14세생활인구수', '남자15세부터19세생활인구수', '여자10세부터14세생활인구수', '여자15세부터19세생활인구수', 
    '남자20세부터24세생활인구수', '남자25세부터29세생활인구수', '여자20세부터24세생활인구수', '여자25세부터29세생활인구수',
    '남자30세부터34세생활인구수', '남자35세부터39세생활인구수', '여자30세부터34세생활인구수', '여자35세부터39세생활인구수', 
    '남자40세부터44세생활인구수', '남자45세부터49세생활인구수', '여자40세부터44세생활인구수', '여자45세부터49세생활인구수', 
    '남자50세부터54세생활인구수', '남자55세부터59세생활인구수', '여자50세부터54세생활인구수', '여자55세부터59세생활인구수', 
    '남자60세부터64세생활인구수', '남자65세부터69세생활인구수', '여자60세부터64세생활인구수', '여자65세부터69세생활인구수', 
    '남자70세이상생활인구수', '여자70세이상생활인구수'
    ]
    df_temp['10세 이하'] = df_temp.남자0세부터9세생활인구수 + df_temp.여자0세부터9세생활인구수
    df_temp['10대'] = df_temp.남자10세부터14세생활인구수 + df_temp.남자15세부터19세생활인구수 + df_temp.여자10세부터14세생활인구수 + df_temp.여자15세부터19세생활인구수
    df_temp['20대'] = df_temp.남자20세부터24세생활인구수 + df_temp.남자25세부터29세생활인구수 + df_temp.여자20세부터24세생활인구수 + df_temp.여자25세부터29세생활인구수
    df_temp['30대'] = df_temp.남자30세부터34세생활인구수 + df_temp.남자35세부터39세생활인구수 + df_temp.여자30세부터34세생활인구수 + df_temp.여자35세부터39세생활인구수
    df_temp['40대'] = df_temp.남자40세부터44세생활인구수 + df_temp.남자45세부터49세생활인구수 + df_temp.여자40세부터44세생활인구수 + df_temp.여자45세부터49세생활인구수
    df_temp['50대'] = df_temp.남자50세부터54세생활인구수 + df_temp.남자55세부터59세생활인구수 + df_temp.여자50세부터54세생활인구수 + df_temp.여자55세부터59세생활인구수
    df_temp['60대'] = df_temp.남자60세부터64세생활인구수 + df_temp.남자65세부터69세생활인구수 + df_temp.여자60세부터64세생활인구수 + df_temp.여자65세부터69세생활인구수
    df_temp['70세 이상'] = df_temp.남자70세이상생활인구수 + df_temp.여자70세이상생활인구수
    df_temp.drop(columns=cols, inplace=True)
    df_temp = df_temp.groupby(['월', '주말여부', '행정동코드']).sum()

    return df_temp

In [7]:
local_people = data_process(local_people_list, "./local_people_dong/")

  0%|          | 0/12 [00:00<?, ?it/s]

In [8]:
local_people

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,년,총생활인구수,Unnamed: 32,10세 이하,10대,20대,30대,40대,50대,60대,70세 이상
월,주말여부,행정동코드,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
10,주말,11110515,485040,4.188575e+06,0.0,2.695534e+05,3.878401e+05,6.622971e+05,6.509952e+05,7.564015e+05,6.149330e+05,3.748064e+05,4.717482e+05
10,주말,11110530,485040,4.176157e+06,0.0,2.009019e+05,2.660735e+05,6.736866e+05,8.028526e+05,7.171601e+05,6.220885e+05,4.385951e+05,4.547991e+05
10,주말,11110540,485040,1.288043e+06,0.0,4.320595e+04,9.062858e+04,2.786974e+05,2.269798e+05,2.115088e+05,1.839588e+05,1.368973e+05,1.161662e+05
10,주말,11110550,485040,3.541879e+06,0.0,1.886874e+05,3.622477e+05,4.207683e+05,4.520556e+05,6.144544e+05,5.846390e+05,4.383305e+05,4.806959e+05
10,주말,11110560,485040,4.596364e+06,0.0,2.963599e+05,4.426485e+05,4.755127e+05,5.250389e+05,7.371652e+05,8.183755e+05,6.500545e+05,6.512091e+05
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9,평일,11740650,1067616,1.260566e+07,0.0,7.732879e+05,7.118918e+05,1.727547e+06,2.222775e+06,2.025177e+06,1.997333e+06,1.578916e+06,1.568729e+06
9,평일,11740660,1067616,1.372329e+07,0.0,7.653786e+05,1.223291e+06,1.858218e+06,2.100119e+06,2.349021e+06,2.205274e+06,1.655411e+06,1.566580e+06
9,평일,11740685,1067616,2.866566e+07,0.0,2.034015e+06,2.011901e+06,3.515219e+06,4.484748e+06,4.507744e+06,4.521821e+06,3.780187e+06,3.810026e+06
9,평일,11740690,1067616,2.375180e+06,0.0,2.018202e+05,4.799438e+05,1.907481e+05,2.569999e+05,3.689892e+05,3.379687e+05,2.633925e+05,2.753179e+05


In [9]:
local_people.drop(columns=['Unnamed: 32', '년'], inplace=True)
local_people

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,총생활인구수,10세 이하,10대,20대,30대,40대,50대,60대,70세 이상
월,주말여부,행정동코드,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
10,주말,11110515,4.188575e+06,2.695534e+05,3.878401e+05,6.622971e+05,6.509952e+05,7.564015e+05,6.149330e+05,3.748064e+05,4.717482e+05
10,주말,11110530,4.176157e+06,2.009019e+05,2.660735e+05,6.736866e+05,8.028526e+05,7.171601e+05,6.220885e+05,4.385951e+05,4.547991e+05
10,주말,11110540,1.288043e+06,4.320595e+04,9.062858e+04,2.786974e+05,2.269798e+05,2.115088e+05,1.839588e+05,1.368973e+05,1.161662e+05
10,주말,11110550,3.541879e+06,1.886874e+05,3.622477e+05,4.207683e+05,4.520556e+05,6.144544e+05,5.846390e+05,4.383305e+05,4.806959e+05
10,주말,11110560,4.596364e+06,2.963599e+05,4.426485e+05,4.755127e+05,5.250389e+05,7.371652e+05,8.183755e+05,6.500545e+05,6.512091e+05
...,...,...,...,...,...,...,...,...,...,...,...
9,평일,11740650,1.260566e+07,7.732879e+05,7.118918e+05,1.727547e+06,2.222775e+06,2.025177e+06,1.997333e+06,1.578916e+06,1.568729e+06
9,평일,11740660,1.372329e+07,7.653786e+05,1.223291e+06,1.858218e+06,2.100119e+06,2.349021e+06,2.205274e+06,1.655411e+06,1.566580e+06
9,평일,11740685,2.866566e+07,2.034015e+06,2.011901e+06,3.515219e+06,4.484748e+06,4.507744e+06,4.521821e+06,3.780187e+06,3.810026e+06
9,평일,11740690,2.375180e+06,2.018202e+05,4.799438e+05,1.907481e+05,2.569999e+05,3.689892e+05,3.379687e+05,2.633925e+05,2.753179e+05


In [10]:
pd.options.display.float_format = '{:,.0f}'.format
df_dong = local_people.reset_index()

In [11]:
df_dong.rename(columns={'총생활인구수' : '일일평균생활인구'}, inplace=True)

In [12]:
df_code = pd.read_excel("../code/data/집계구 단위 서울 생활인구/행정동코드_매핑정보_20200325.xlsx")
df_code.drop(0, inplace=True)
df_dong['행정동코드']=df_dong['행정동코드'].astype(str)
df_code['행자부행정동코드']=df_code['행자부행정동코드'].astype(str)
df_dong = pd.merge(df_dong, df_code, how='inner',left_on='행정동코드', right_on='행자부행정동코드')
df_dong.drop(columns='행정동코드', inplace=True)

In [13]:
col_order = ['시도명', '시군구명', '행정동명', '통계청행정동코드', '행자부행정동코드', '월', '주말여부', '일일평균생활인구', '10세 이하', '10대', '20대', '30대', '40대',
       '50대', '60대', '70세 이상']

In [14]:
df_dong = df_dong.reindex(columns=col_order)

In [15]:
df_dong.to_csv("서울_월별_일일평균생활인구.csv")

In [16]:
df_dong

Unnamed: 0,시도명,시군구명,행정동명,통계청행정동코드,행자부행정동코드,월,주말여부,일일평균생활인구,10세 이하,10대,20대,30대,40대,50대,60대,70세 이상
0,서울,종로구,청운효자동,1101072,11110515,10,주말,4188575,269553,387840,662297,650995,756402,614933,374806,471748
1,서울,종로구,청운효자동,1101072,11110515,10,평일,8976499,653591,1010679,1247347,1338158,1628032,1326014,789301,983376
2,서울,종로구,청운효자동,1101072,11110515,11,주말,3498545,225182,315633,587509,547749,614348,518237,309324,380564
3,서울,종로구,청운효자동,1101072,11110515,11,평일,9745093,706218,1071396,1463148,1472419,1724930,1424740,847809,1034433
4,서울,종로구,청운효자동,1101072,11110515,12,주말,3220621,215402,304805,493789,475398,576381,483912,298773,372163
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10171,서울,강동구,둔촌2동,1125071,11740700,7,평일,13795005,1203844,1379589,1565751,1906143,2029248,2060555,1747364,1902512
10172,서울,강동구,둔촌2동,1125071,11740700,8,주말,5041135,423389,495918,577517,698223,760381,786785,643790,655133
10173,서울,강동구,둔촌2동,1125071,11740700,8,평일,15062622,1303303,1515236,1698056,2076496,2221282,2283428,1878644,2086178
10174,서울,강동구,둔촌2동,1125071,11740700,9,주말,5069338,437422,498334,560861,701254,774647,785411,649313,662096


In [17]:
#총 일평균 생활인구
df_dong.pivot_table("일일평균생활인구", index=["시군구명","행정동명"],aggfunc = 'sum').sort_values(by=["일일평균생활인구"],ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,일일평균생활인구
시군구명,행정동명,Unnamed: 2_level_1
강남구,역삼1동,898469531
영등포구,여의동,784421734
마포구,서교동,601451142
서초구,서초3동,573157078
은평구,진관동,548311447
...,...,...
종로구,삼청동,44892735
종로구,창신3동,43246904
강동구,둔촌1동,40640930
양천구,신월6동,39491343


In [18]:
#순위 분석 : 각 평균에 따른 합 비교
age_lp=df_dong.filter(regex='\d|시군구명|행정동명')
age_lp

Unnamed: 0,시군구명,행정동명,10세 이하,10대,20대,30대,40대,50대,60대,70세 이상
0,종로구,청운효자동,269553,387840,662297,650995,756402,614933,374806,471748
1,종로구,청운효자동,653591,1010679,1247347,1338158,1628032,1326014,789301,983376
2,종로구,청운효자동,225182,315633,587509,547749,614348,518237,309324,380564
3,종로구,청운효자동,706218,1071396,1463148,1472419,1724930,1424740,847809,1034433
4,종로구,청운효자동,215402,304805,493789,475398,576381,483912,298773,372163
...,...,...,...,...,...,...,...,...,...,...
10171,강동구,둔촌2동,1203844,1379589,1565751,1906143,2029248,2060555,1747364,1902512
10172,강동구,둔촌2동,423389,495918,577517,698223,760381,786785,643790,655133
10173,강동구,둔촌2동,1303303,1515236,1698056,2076496,2221282,2283428,1878644,2086178
10174,강동구,둔촌2동,437422,498334,560861,701254,774647,785411,649313,662096


In [19]:
# 1위
a=age_lp.groupby(["시군구명","행정동명"]).mean()
a["1위"]=a.idxmax(axis=1)
a

Unnamed: 0_level_0,Unnamed: 1_level_0,10세 이하,10대,20대,30대,40대,50대,60대,70세 이상,1위
시군구명,행정동명,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
강남구,개포1동,271405,353692,254430,448983,606593,497891,397010,360729,40대
강남구,개포2동,692000,1426232,835498,1033741,1639952,1370742,1030792,1030528,40대
강남구,개포4동,419520,620986,687812,962727,1038365,946433,687222,513803,40대
강남구,논현1동,371899,582811,3637276,4166664,3015055,1821386,1099417,965690,30대
강남구,논현2동,660170,697324,3003663,4057377,3306627,1927931,1074343,1043524,30대
...,...,...,...,...,...,...,...,...,...,...
중랑구,상봉2동,366848,437587,1771490,1872351,1407513,1574771,1123151,968129,30대
중랑구,신내1동,1364298,1198688,1390275,1733818,1975765,1990924,1570120,1510432,50대
중랑구,신내2동,467436,633410,701707,906151,934323,1035673,851116,900994,50대
중랑구,중화1동,586586,544592,764379,926231,850274,950546,796913,819637,50대


In [20]:
# 모든 순위
a=age_lp.groupby(["시군구명","행정동명"]).mean()
a_list=a.apply(np.argsort,axis=1)
a_list.values

array([[2, 0, 1, ..., 3, 5, 4],
       [0, 2, 7, ..., 5, 1, 4],
       [0, 7, 1, ..., 5, 3, 4],
       ...,
       [0, 1, 2, ..., 3, 4, 5],
       [1, 0, 2, ..., 4, 3, 5],
       [0, 1, 2, ..., 3, 7, 5]])

In [21]:
a_cols=a.columns.to_numpy()[a_list.values[:,::-1]]
a_cols

array([['40대', '50대', '30대', ..., '10대', '10세 이하', '20대'],
       ['40대', '10대', '50대', ..., '70세 이상', '20대', '10세 이하'],
       ['40대', '30대', '50대', ..., '10대', '70세 이상', '10세 이하'],
       ...,
       ['50대', '40대', '30대', ..., '20대', '10대', '10세 이하'],
       ['50대', '30대', '40대', ..., '20대', '10세 이하', '10대'],
       ['50대', '70세 이상', '30대', ..., '20대', '10대', '10세 이하']],
      dtype=object)

In [22]:
a[['1위','2위','3위','4위','5위','6위','7위','8위']]=pd.DataFrame(a_cols, index=a.index)
a

Unnamed: 0_level_0,Unnamed: 1_level_0,10세 이하,10대,20대,30대,40대,50대,60대,70세 이상,1위,2위,3위,4위,5위,6위,7위,8위
시군구명,행정동명,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
강남구,개포1동,271405,353692,254430,448983,606593,497891,397010,360729,40대,50대,30대,60대,70세 이상,10대,10세 이하,20대
강남구,개포2동,692000,1426232,835498,1033741,1639952,1370742,1030792,1030528,40대,10대,50대,30대,60대,70세 이상,20대,10세 이하
강남구,개포4동,419520,620986,687812,962727,1038365,946433,687222,513803,40대,30대,50대,20대,60대,10대,70세 이상,10세 이하
강남구,논현1동,371899,582811,3637276,4166664,3015055,1821386,1099417,965690,30대,20대,40대,50대,60대,70세 이상,10대,10세 이하
강남구,논현2동,660170,697324,3003663,4057377,3306627,1927931,1074343,1043524,30대,40대,20대,50대,60대,70세 이상,10대,10세 이하
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
중랑구,상봉2동,366848,437587,1771490,1872351,1407513,1574771,1123151,968129,30대,20대,50대,40대,60대,70세 이상,10대,10세 이하
중랑구,신내1동,1364298,1198688,1390275,1733818,1975765,1990924,1570120,1510432,50대,40대,30대,60대,70세 이상,20대,10세 이하,10대
중랑구,신내2동,467436,633410,701707,906151,934323,1035673,851116,900994,50대,40대,30대,70세 이상,60대,20대,10대,10세 이하
중랑구,중화1동,586586,544592,764379,926231,850274,950546,796913,819637,50대,30대,40대,70세 이상,60대,20대,10세 이하,10대
