#### OpenAPI 데이터 요청
- 요청 방식
    1. params 사용: URL 끝에 ?key=value로 추가되는 방식
        - response = requests.get(url, params=params)
    2. Path Parameter: URL 경로 내에 직접 데이터 타입, 값을 넣는 방식
        - response = requests.get(url)

- 데이터 유형별 라이브러리
    1. data
        - xml -> xmltodict
        - json -> json
    2. HTML
        - 정적 -> HTML
        - 동적 -> HTML(DB, csv, xls 등)
            - 동기식: 사용자가 요청 시 전체 페이지 로드
            - 비동기식: 필요한 데이터만 요청

#### 지역별 주민등록인구 API 받아오기
- https://www.data.go.kr/data/15107303/openapi.do#/API%20%EB%AA%A9%EB%A1%9D/getRegistrationPopulationByRegion

In [7]:
import requests
import os
from dotenv import load_dotenv

In [3]:
load_dotenv()

True

- params 사용해 OpenAPI 데이터 받아오기

In [None]:
# service_key 값이 외부에 노출되지 않음
service_key = os.getenv('service_key')
url = 'https://apis.data.go.kr/1741000/RegistrationPopulationByRegion/getRegistrationPopulationByRegion'
params = {      # 해당 주소에 보낼 데이터 값
    'ServiceKey' : service_key,
    'pageNo' : 1,
    'numOfRows' : 30
}

In [9]:
# 요청 보내는 부분
response = requests.get(url, params=params)

In [10]:
print(response)

<Response [200]>


In [11]:
print(response.content)

b'<?xml version="1.0" encoding="UTF-8"?>\n\n<RegistrationPopulationByRegion>\n  <head>\n    <totalCount>284</totalCount>\n    <numOfRows>30</numOfRows>\n    <pageNo>1</pageNo>\n    <type>XML</type>\n    <RESULT>\n      <resultCode>INFO-0</resultCode>\n      <resultMsg>NOMAL SERVICE</resultMsg>\n    </RESULT>\n  </head>\n  <row>\n    <wrttimeid>2008</wrttimeid>\n    <seq>1</seq>\n    <regi>\xea\xb3\x84</regi>\n    <population_tot>49540367</population_tot>\n    <population_man>24822897</population_man>\n    <population_female>24717470</population_female>\n    <houshol>19005339</houshol>\n  </row>\n  <row>\n    <wrttimeid>2008</wrttimeid>\n    <seq>2</seq>\n    <regi>\xec\x84\x9c\xec\x9a\xb8</regi>\n    <population_tot>10200827</population_tot>\n    <population_man>5061809</population_man>\n    <population_female>5139018</population_female>\n    <houshol>4097562</houshol>\n  </row>\n  <row>\n    <wrttimeid>2008</wrttimeid>\n    <seq>3</seq>\n    <regi>\xeb\xb6\x80\xec\x82\xb0</regi>\n    

In [None]:
# # xml 데이터를 dict 형태로 바꿔주는 라이브러리 설치
# !pip install xmltodict

Collecting xmltodict
  Downloading xmltodict-0.14.2-py2.py3-none-any.whl.metadata (8.0 kB)
Downloading xmltodict-0.14.2-py2.py3-none-any.whl (10.0 kB)
Installing collected packages: xmltodict
Successfully installed xmltodict-0.14.2


In [13]:
# xml 데이터를 dict 형태로 바꿔주는 xmltodict 라이브러리 로드
import xmltodict

In [15]:
dict_data = xmltodict.parse(response.content)
dict_data

{'RegistrationPopulationByRegion': {'head': {'totalCount': '284',
   'numOfRows': '30',
   'pageNo': '1',
   'type': 'XML',
   'RESULT': {'resultCode': 'INFO-0', 'resultMsg': 'NOMAL SERVICE'}},
  'row': [{'wrttimeid': '2008',
    'seq': '1',
    'regi': '계',
    'population_tot': '49540367',
    'population_man': '24822897',
    'population_female': '24717470',
    'houshol': '19005339'},
   {'wrttimeid': '2008',
    'seq': '2',
    'regi': '서울',
    'population_tot': '10200827',
    'population_man': '5061809',
    'population_female': '5139018',
    'houshol': '4097562'},
   {'wrttimeid': '2008',
    'seq': '3',
    'regi': '부산',
    'population_tot': '3564577',
    'population_man': '1773154',
    'population_female': '1791423',
    'houshol': '1311724'},
   {'wrttimeid': '2008',
    'seq': '4',
    'regi': '대구',
    'population_tot': '2492724',
    'population_man': '1246873',
    'population_female': '1245851',
    'houshol': '894969'},
   {'wrttimeid': '2008',
    'seq': '5',
   

In [16]:
import pprint

In [17]:
pprint.pprint(dict_data)

{'RegistrationPopulationByRegion': {'head': {'RESULT': {'resultCode': 'INFO-0',
                                                        'resultMsg': 'NOMAL '
                                                                     'SERVICE'},
                                             'numOfRows': '30',
                                             'pageNo': '1',
                                             'totalCount': '284',
                                             'type': 'XML'},
                                    'row': [{'houshol': '19005339',
                                             'population_female': '24717470',
                                             'population_man': '24822897',
                                             'population_tot': '49540367',
                                             'regi': '계',
                                             'seq': '1',
                                             'wrttimeid': '2008'},
                                

In [19]:
pprint.pprint(dict_data['RegistrationPopulationByRegion']['row'])

[{'houshol': '19005339',
  'population_female': '24717470',
  'population_man': '24822897',
  'population_tot': '49540367',
  'regi': '계',
  'seq': '1',
  'wrttimeid': '2008'},
 {'houshol': '4097562',
  'population_female': '5139018',
  'population_man': '5061809',
  'population_tot': '10200827',
  'regi': '서울',
  'seq': '2',
  'wrttimeid': '2008'},
 {'houshol': '1311724',
  'population_female': '1791423',
  'population_man': '1773154',
  'population_tot': '3564577',
  'regi': '부산',
  'seq': '3',
  'wrttimeid': '2008'},
 {'houshol': '894969',
  'population_female': '1245851',
  'population_man': '1246873',
  'population_tot': '2492724',
  'regi': '대구',
  'seq': '4',
  'wrttimeid': '2008'},
 {'houshol': '1014755',
  'population_female': '1336223',
  'population_man': '1356473',
  'population_tot': '2692696',
  'regi': '인천',
  'seq': '5',
  'wrttimeid': '2008'},
 {'houshol': '513021',
  'population_female': '716885',
  'population_man': '705817',
  'population_tot': '1422702',
  'regi': 

In [20]:
import pandas as pd

In [21]:
df = pd.DataFrame(dict_data['RegistrationPopulationByRegion']['row'])
df.head()

Unnamed: 0,wrttimeid,seq,regi,population_tot,population_man,population_female,houshol
0,2008,1,계,49540367,24822897,24717470,19005339
1,2008,2,서울,10200827,5061809,5139018,4097562
2,2008,3,부산,3564577,1773154,1791423,1311724
3,2008,4,대구,2492724,1246873,1245851,894969
4,2008,5,인천,2692696,1356473,1336223,1014755


In [None]:
# csv로 저장
df.to_csv('./인구정보.csv', index=False)

In [None]:
# df에서 seq 컬럼 삭제
df.drop('seq', axis=1, inplace=True)
df.head()

Unnamed: 0,wrttimeid,regi,population_tot,population_man,population_female,houshol
0,2008,계,49540367,24822897,24717470,19005339
1,2008,서울,10200827,5061809,5139018,4097562
2,2008,부산,3564577,1773154,1791423,1311724
3,2008,대구,2492724,1246873,1245851,894969
4,2008,인천,2692696,1356473,1336223,1014755


In [28]:
# df에서 wrttimeid 컬럼 이름을 year로 변경
df.rename(
    columns = {
        'wrttimeid' : 'year'
    },
    inplace=True
)
df.head()

Unnamed: 0,year,regi,population_tot,population_man,population_female,houshol
0,2008,계,49540367,24822897,24717470,19005339
1,2008,서울,10200827,5061809,5139018,4097562
2,2008,부산,3564577,1773154,1791423,1311724
3,2008,대구,2492724,1246873,1245851,894969
4,2008,인천,2692696,1356473,1336223,1014755


In [29]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30 entries, 0 to 29
Data columns (total 6 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   year               30 non-null     object
 1   regi               30 non-null     object
 2   population_tot     30 non-null     object
 3   population_man     30 non-null     object
 4   population_female  30 non-null     object
 5   houshol            30 non-null     object
dtypes: object(6)
memory usage: 1.5+ KB


In [30]:
# database 전체 가져오기
# import database    
# database 내의 특정 클래스만 가져오기
from database import MyDB

In [31]:
db1 = MyDB()

In [34]:
table_query = '''
    CREATE TABLE `population`
    (
        `No` int auto_increment primary key,
        `year` int not null,
        `regi` varchar(32) not null,
        `total` int not null,
        `man` int not null,
        `female` int not null,
        `houshol` int not null
    )
'''

In [35]:
db1.sql_query(table_query)

Query OK!


In [46]:
insert_query = '''
    INSERT INTO
    `population`
    (`year`, `regi`, `total`, `man`, `female`, `houshol`)
    VALUES  
    (%s, %s, %s, %s, %s, %s)
'''

In [44]:
list(df.iloc[0, ])

['2008', '계', '49540367', '24822897', '24717470', '19005339']

In [None]:
# db1.sql_query(insert_query, *list(df.iloc[0, ]))
# 인자에 * -> 들어오는 데이터가 하나가 아닌 여러 개의 데이터임을 나타냄

Query OK!


In [50]:
for idx in range(0, len(df)):
    db1.sql_query(insert_query, *list(df.iloc[idx, ]))
    db1.commit_db()

Query OK!
커밋 완료
Close 완료
Query OK!
커밋 완료
Close 완료
Query OK!
커밋 완료
Close 완료
Query OK!
커밋 완료
Close 완료
Query OK!
커밋 완료
Close 완료
Query OK!
커밋 완료
Close 완료
Query OK!
커밋 완료
Close 완료
Query OK!
커밋 완료
Close 완료
Query OK!
커밋 완료
Close 완료
Query OK!
커밋 완료
Close 완료
Query OK!
커밋 완료
Close 완료
Query OK!
커밋 완료
Close 완료
Query OK!
커밋 완료
Close 완료
Query OK!
커밋 완료
Close 완료
Query OK!
커밋 완료
Close 완료
Query OK!
커밋 완료
Close 완료
Query OK!
커밋 완료
Close 완료
Query OK!
커밋 완료
Close 완료
Query OK!
커밋 완료
Close 완료
Query OK!
커밋 완료
Close 완료
Query OK!
커밋 완료
Close 완료
Query OK!
커밋 완료
Close 완료
Query OK!
커밋 완료
Close 완료
Query OK!
커밋 완료
Close 완료
Query OK!
커밋 완료
Close 완료
Query OK!
커밋 완료
Close 완료
Query OK!
커밋 완료
Close 완료
Query OK!
커밋 완료
Close 완료
Query OK!
커밋 완료
Close 완료
Query OK!
커밋 완료
Close 완료


In [None]:
# DataFrame을 sql에 대입하기 위한 라이브러리 설치
# !pip install sqlalchemy

Collecting sqlalchemy
  Downloading sqlalchemy-2.0.41-cp39-cp39-win_amd64.whl.metadata (9.8 kB)
Collecting greenlet>=1 (from sqlalchemy)
  Downloading greenlet-3.2.3-cp39-cp39-win_amd64.whl.metadata (4.2 kB)
Downloading sqlalchemy-2.0.41-cp39-cp39-win_amd64.whl (2.1 MB)
   ---------------------------------------- 0.0/2.1 MB ? eta -:--:--
   ---------------------------------------- 2.1/2.1 MB 57.8 MB/s eta 0:00:00
Downloading greenlet-3.2.3-cp39-cp39-win_amd64.whl (296 kB)
Installing collected packages: greenlet, sqlalchemy

   ---------------------------------------- 0/2 [greenlet]
   -------------------- ------------------- 1/2 [sqlalchemy]
   -------------------- ------------------- 1/2 [sqlalchemy]
   -------------------- ------------------- 1/2 [sqlalchemy]
   -------------------- ------------------- 1/2 [sqlalchemy]
   -------------------- ------------------- 1/2 [sqlalchemy]
   -------------------- ------------------- 1/2 [sqlalchemy]
   -------------------- ------------------- 1

In [53]:
# sql server와의 연결 주소 생성
from sqlalchemy import create_engine

engine = create_engine('mysql+pymysql://root:1234@localhost:3306/ubion')

In [None]:
# to_sql()
# name : 테이블 이름 지정
# con : 데이터베이스 주소
# index : 인덱스 포함 여부(기본값: True)
# if_exists : replace(대체: 삭제->재생성)
#             append(데이터 추가 -> 행 추가)
#             fail(기본값, 실패 처리 -> 기존 테이블 유지, 새로운 데이터 대입X)
df.to_sql(
    name = 'test_table',
    con = engine
)

30