# Chapter4 - Example 3

![image](https://github.com/kks00/data-science/assets/68108664/f65d06a2-747e-4267-8ae1-02e29af3fa20)

In [39]:
import numpy as np
from datascience import *

## URL로부터 데이터 읽어오기

In [2]:
table = Table.read_table("http://www2.census.gov/programs-surveys/popest/technical-documentation/file-layouts/2010-2019/nc-est2019-agesex-res.csv")
table

SEX,AGE,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,POPESTIMATE2013,POPESTIMATE2014,POPESTIMATE2015,POPESTIMATE2016,POPESTIMATE2017,POPESTIMATE2018,POPESTIMATE2019
0,0,3944153,3944160,3951430,3963092,3926570,3931258,3954787,3983981,3954773,3893990,3815343,3783052
0,1,3978070,3978090,3957730,3966225,3977549,3942698,3948891,3973133,4002903,3972711,3908830,3829599
0,2,4096929,4096939,4090621,3970654,3978925,3991740,3958711,3966321,3991349,4020045,3987032,3922044
0,3,4119040,4119051,4111688,4101644,3981531,3991017,4005928,3974351,3982984,4006946,4033038,3998665
0,4,4063170,4063186,4077346,4121488,4111490,3992502,4004032,4020292,3989750,3997280,4018719,4043323
0,5,4056858,4056872,4064521,4087054,4131049,4121876,4004576,4017589,4035033,4003452,4008443,4028281
0,6,4066381,4066412,4072904,4074531,4096631,4141126,4133372,4017388,4031568,4048018,4014057,4017227
0,7,4030579,4030594,4042990,4082821,4084175,4106756,4152666,4145872,4030888,4044139,4058370,4022319
0,8,4046486,4046497,4025501,4052773,4092559,4094513,4118349,4165033,4158848,4042924,4054236,4066194
0,9,4148353,4148369,4125312,4035319,4062726,4103052,4106068,4130887,4177895,4170813,4053179,4061874


---

## Label 중 2014와 2019를 포함하는 라벨 찾기

In [8]:
target_labels = [i for i in table.labels if i.find("2014") >= 0 or i.find("2019") >= 0]
target_labels

['POPESTIMATE2014', 'POPESTIMATE2019']

---

## 연령, 성별, 2014년, 2019년 자료만 추출

In [14]:
selected_table = table.select(["SEX", "AGE"] + target_labels)
selected_table = selected_table.relabeled("POPESTIMATE2014", "2014").relabeled("POPESTIMATE2019", "2019")
selected_table

SEX,AGE,2014,2019
0,0,3954787,3783052
0,1,3948891,3829599
0,2,3958711,3922044
0,3,4005928,3998665
0,4,4004032,4043323
0,5,4004576,4028281
0,6,4133372,4017227
0,7,4152666,4022319
0,8,4118349,4066194
0,9,4106068,4061874


---

## 증가율, 감소율을 포함한 테이블 만들기

In [24]:
arr_2014 = selected_table.column("2014")
arr_2019 = selected_table.column("2019")

increase_data = (arr_2019 - arr_2014) / arr_2019
decrease_data = (arr_2014 - arr_2019) / arr_2019

selected_table = selected_table.with_columns("증가율", increase_data).set_format("증가율", PercentFormatter)
selected_table = selected_table.with_columns("감소율", decrease_data).set_format("감소율", PercentFormatter)
selected_table

SEX,AGE,2014,2019,증가율,감소율
0,0,3954787,3783052,-4.54%,4.54%
0,1,3948891,3829599,-3.11%,3.11%
0,2,3958711,3922044,-0.93%,0.93%
0,3,4005928,3998665,-0.18%,0.18%
0,4,4004032,4043323,0.97%,-0.97%
0,5,4004576,4028281,0.59%,-0.59%
0,6,4133372,4017227,-2.89%,2.89%
0,7,4152666,4022319,-3.24%,3.24%
0,8,4118349,4066194,-1.28%,1.28%
0,9,4106068,4061874,-1.09%,1.09%


---

## 해당 기간에서 가장 많이 증가한 연령은?

In [31]:
max_increased_index = selected_table.column("증가율").argmax()

selected_table.column("AGE")[max_increased_index]

100

---

## 해당 기간에서 가장 감소한 연령은?

In [32]:
max_decreased_index = selected_table.column("감소율").argmax()

selected_table.column("AGE")[max_decreased_index]

51

---

## 해당 기간에서 전체 인구는 증가 하였는가? 아님 감소 하였는가?
- #### SEX=0, AGE=999인 행 추출

In [36]:
selected_table.where("SEX", are.equal_to(0)).where("AGE", are.equal_to(999))

SEX,AGE,2014,2019,증가율,감소율
0,999,318301008,328239523,3.03%,-3.03%



####    증가율이 양수이므로 증가하였다.