# 지역별 연봉 분석
- https://www.data.go.kr/data/15063424/fileData.do

## 개요
- 근로소득자는 월급의 9%(기업과 근로소득자가 반반 부담)를 국민연금으로 납부한다. 그래서 기업이 납부하는 국민연금 데이터를 이용해 

In [1]:
import pandas as pd
import cufflinks as cf
import numpy as np

In [2]:
data = pd.read_csv('./large_data/국민연금공단_국민연금 가입 사업장 내역_20220121.csv',
                   encoding='cp949', low_memory=False)

In [3]:
data.columns = [x.strip() for x in data.columns]

In [4]:
data.columns

Index(['자료생성년월', '사업장명', '사업자등록번호', '사업장가입상태코드 1 등록 2 탈퇴', '우편번호', '사업장지번상세주소',
       '사업장도로명상세주소', '고객법정동주소코드', '고객행정동주소코드', '법정동주소광역시도코드', '법정동주소광역시시군구코드',
       '법정동주소광역시시군구읍면동코드', '사업장형태구분코드 1 법인 2 개인', '사업장업종코드', '사업장업종코드명',
       '적용일자', '재등록일자', '탈퇴일자', '가입자수', '당월고지금액', '신규취득자수', '상실가입자수'],
      dtype='object')

In [5]:
# 데이터 전처리
pay = data['당월고지금액']/data['가입자수']
pay = pay * 12 * 100 / 9
data['pay'] = pay

In [10]:
gasan = data[data['사업장지번상세주소'].str.contains('서울특별시 금천구 가산동')]
bundang = data[data['사업장지번상세주소'].str.contains('경기도 성남시 분당구')]

In [11]:
gasan['pay'].mean()

35474168.37737428

In [12]:
bundang['pay'].mean()

37507188.20297679

In [13]:
gasan['가입자수'].mean()

17.883481040244078

In [14]:
bundang['가입자수'].mean()

37.61972977372619

In [16]:
(gasan['pay']*gasan['가입자수']).sum() / gasan['가입자수'].sum()

37856288.75962695

In [17]:
(bundang['pay']*bundang['가입자수']).sum() / bundang['가입자수'].sum()

45639462.02332632

In [18]:
temp = data.groupby('법정동주소광역시시군구코드').mean()

In [19]:
temp.sort_values('pay')

Unnamed: 0_level_0,사업자등록번호,사업장가입상태코드 1 등록 2 탈퇴,고객법정동주소코드,법정동주소광역시도코드,법정동주소광역시시군구읍면동코드,사업장형태구분코드 1 법인 2 개인,가입자수,당월고지금액,신규취득자수,상실가입자수,pay
법정동주소광역시시군구코드,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
920,477108.436782,1.218391,4.792030e+09,47.000000,295.103448,1.0,6.397701,1.443518e+06,0.335632,0.680460,2.661479e+07
900,460699.028986,1.148551,4.751020e+09,46.609903,297.898551,1.0,8.727053,2.047722e+06,0.400966,0.753623,2.774596e+07
890,488891.461027,1.117871,4.786558e+09,46.975285,298.498099,1.0,8.296578,1.962228e+06,0.316540,0.724335,2.787418e+07
780,352440.904398,1.164436,4.489883e+09,44.118547,279.440727,1.0,8.152964,1.913185e+06,0.343212,0.736138,2.790224e+07
280,472481.811350,1.141104,4.728020e+09,47.000000,196.483129,1.0,11.003067,2.658476e+06,0.473926,0.901840,2.798759e+07
...,...,...,...,...,...,...,...,...,...,...,...
560,283074.861020,1.018849,1.156012e+09,11.000000,117.026116,1.0,47.613376,1.630344e+07,1.391053,2.162144,3.612721e+07
199,163140.035714,1.000000,4.119911e+09,41.000000,105.107143,1.0,16.321429,4.462468e+06,0.392857,0.785714,3.646701e+07
173,280937.410178,1.024534,4.117310e+09,41.000000,102.537307,1.0,16.717762,5.073946e+06,0.567518,0.861111,3.659484e+07
680,337551.234826,1.032502,1.168011e+09,11.000000,105.564243,1.0,27.708611,8.454860e+06,1.115259,1.448766,3.681756e+07


In [20]:
data['code'] = data['고객법정동주소코드'].astype(str).str[:5]

In [21]:
temp = data.groupby('code').mean()

In [23]:
data['pay_sum'] = data['pay'] * data['가입자수']

In [24]:
temp = data.groupby('code').mean()

In [25]:
temp['pay_mean'] = temp['pay_sum'] / temp['가입자수']

In [27]:
address_code = pd.read_csv('./data/국토교통부_전국 법정동_20211217.csv', encoding='cp949')

In [28]:
address_code = address_code.fillna('')

In [29]:
address_code['주소'] = address_code[['시도명', '시군구명', '읍면동명', '리명']].agg(' '.join, axis=1)

In [30]:
address_code['주소길이'] = address_code['주소'].str.len()

In [31]:
address_code = address_code.sort_values('주소길이')

In [32]:
address_code['code'] = (address_code['법정동코드'] // 100000).astype(str)

In [33]:
address_code = address_code.drop_duplicates('code', keep='first')[['code', '주소']].reset_index(drop=True)

In [34]:
address_code

Unnamed: 0,code,주소
0,49000,제주도
1,41000,경기도
2,42000,강원도
3,45000,전라북도
4,47000,경상북도
...,...,...
482,28116,인천광역시 중구용유출장
483,27235,대구광역시 북구칠곡출장
484,27145,대구광역시 동구안심출장
485,48127,경상남도 창원시마산회원구


In [46]:
z = temp.sort_values('pay_mean').tail(10)

In [47]:
z = z.reset_index()

In [49]:
z = z.merge(address_code, on='code')

In [50]:
z.head(2)

Unnamed: 0,code,사업자등록번호,사업장가입상태코드 1 등록 2 탈퇴,고객법정동주소코드,법정동주소광역시도코드,법정동주소광역시시군구코드,법정동주소광역시시군구읍면동코드,사업장형태구분코드 1 법인 2 개인,가입자수,당월고지금액,신규취득자수,상실가입자수,pay,pay_sum,pay_mean,주소
0,42770,303314.529412,1.15213,4277028000.0,42.0,770.0,280.146045,1.0,18.547667,5788263.0,0.63286,0.975659,28792840.0,888976200.0,47929270.0,강원도 정선군
1,44825,356778.276276,1.153153,4482528000.0,44.0,825.0,276.426426,1.0,14.051051,4412706.0,0.411411,0.903904,30619060.0,682662500.0,48584440.0,충청남도 태안군


In [52]:
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)

In [53]:
z.iplot(kind='bar', x='주소', y='pay_mean')

In [54]:
z

Unnamed: 0,code,사업자등록번호,사업장가입상태코드 1 등록 2 탈퇴,고객법정동주소코드,법정동주소광역시도코드,법정동주소광역시시군구코드,법정동주소광역시시군구읍면동코드,사업장형태구분코드 1 법인 2 개인,가입자수,당월고지금액,신규취득자수,상실가입자수,pay,pay_sum,pay_mean,주소
0,42770,303314.529412,1.15213,4277028000.0,42.0,770.0,280.146045,1.0,18.547667,5788263.0,0.63286,0.975659,28792840.0,888976200.0,47929270.0,강원도 정선군
1,44825,356778.276276,1.153153,4482528000.0,44.0,825.0,276.426426,1.0,14.051051,4412706.0,0.411411,0.903904,30619060.0,682662500.0,48584440.0,충청남도 태안군
2,31170,539525.369048,1.056122,3117010000.0,31.0,170.0,103.528912,1.0,55.794218,19548550.0,1.739796,2.0,33407720.0,2756486000.0,49404500.0,울산광역시 동구
3,44210,356099.799523,1.096062,4421023000.0,44.0,210.0,233.025656,1.0,20.745823,7019617.0,0.989857,1.26611,34570950.0,1028623000.0,49582180.0,충청남도 서산시
4,28140,273511.686916,1.067757,2814010000.0,28.0,140.0,104.646028,1.0,17.75,6217014.0,0.495327,0.745327,33805230.0,889183700.0,50094850.0,인천광역시 동구
5,44270,360566.780284,1.107896,4427026000.0,44.0,270.0,258.490436,1.0,20.931829,7057206.0,0.974497,1.070132,35463500.0,1051298000.0,50224860.0,충청남도 당진시
6,46170,426769.682645,1.056749,4617022000.0,46.0,170.0,219.070523,1.0,39.857851,14228530.0,0.641873,1.563636,31085090.0,2008929000.0,50402340.0,전라남도 나주시
7,41500,277806.915728,1.057029,4150026000.0,41.0,500.0,264.500908,1.0,28.336724,10174230.0,1.61097,0.990919,35388820.0,1436945000.0,50709640.0,경기도 이천시
8,47111,491092.405553,1.084981,4711117000.0,47.0,111.0,172.899874,1.0,31.645772,11688020.0,0.862432,1.093816,34734310.0,1702355000.0,53794070.0,경상북도 포항시남구
9,41117,317476.345181,1.043746,4111710000.0,41.0,117.0,103.739576,1.0,59.950786,24642710.0,0.91285,1.192413,35688340.0,3432325000.0,57252380.0,경기도 수원시영통구
