## SQL에서 Dataframe으로

- sqlite3 드라이버를 사용해서 SQLite 데이터베이스를 이용할 수 있다.

### Sqlite3 Driver

In [1]:
import sqlite3

In [64]:
query = """
    CREATE Table BestSellers
    (title TEXT,
    author TEXT,
    price REAL,
    edition INTEGER);
"""

con = sqlite3.connect("/Users/grace/workspace/SQLite/databases/test.db")
con.execute(query)
con.commit()

<sqlite3.Cursor at 0x10f817960>

In [65]:
# 데이터 입력
# 데이터는 튜플 형태로 작성한다
best_selling_amazon_books = [('Fire and Fury', 'Michael Wolff', 14.99, 1),
                             ('12 Rules for Life: An antidote to Chaos', 'Jordan B. Peterson', 15.54, 1),
                             ('A Higher Loyalty: Truth, Lies, and Leadership', 'James Comey', 8.47, 1)]

insert_query = """
    INSERT INTO BestSellers
    VALUES(?, ?, ?, ?)
"""

con.executemany(insert_query, best_selling_amazon_books)
con.commit()

<sqlite3.Cursor at 0x10fb36180>

In [66]:
# 데이터 추출
# 입력과 동일하게 튜플 형태로 반환된다
select_query = """SELECT * from bestSellers"""

cursor = con.execute(select_query)

rows = cursor.fetchall()
rows

[('Fire and Fury', 'Michael Wolff', 14.99, 1),
 ('12 Rules for Life: An antidote to Chaos', 'Jordan B. Peterson', 15.54, 1),
 ('A Higher Loyalty: Truth, Lies, and Leadership', 'James Comey', 8.47, 1)]

In [67]:
# 컬럼의 이름을 확인 할 수 있다
cursor.description

(('title', None, None, None, None, None, None),
 ('author', None, None, None, None, None, None),
 ('price', None, None, None, None, None, None),
 ('edition', None, None, None, None, None, None))

### DataFrame으로 변환

In [68]:
import pandas as pd

In [69]:
pd.DataFrame(rows, columns=list(zip(*cursor.description))[0])

Unnamed: 0,title,author,price,edition
0,Fire and Fury,Michael Wolff,14.99,1
1,12 Rules for Life: An antidote to Chaos,Jordan B. Peterson,15.54,1
2,"A Higher Loyalty: Truth, Lies, and Leadership",James Comey,8.47,1


### read_sql_query()

- 위의 단계를 간편하게 할 수 있도록 pandas에서는 read_sql_query라는 함수로 쿼리문과 데이터베이스 커넥션을 인자로 넘기는 것이 가능하다
- pandas.io.sql

In [70]:
import pandas.io.sql as sql

In [71]:
sql.read_sql_query(select_query, con)

Unnamed: 0,title,author,price,edition
0,Fire and Fury,Michael Wolff,14.99,1
1,12 Rules for Life: An antidote to Chaos,Jordan B. Peterson,15.54,1
2,"A Higher Loyalty: Truth, Lies, and Leadership",James Comey,8.47,1


In [72]:
sql.read_sql_query('select * from BestSellers where price > 10;', con)

Unnamed: 0,title,author,price,edition
0,Fire and Fury,Michael Wolff,14.99,1
1,12 Rules for Life: An antidote to Chaos,Jordan B. Peterson,15.54,1


### execute()

In [74]:
sql.execute('update BestSellers set edition=2', con)

<sqlite3.Cursor at 0x10fb360a0>

In [75]:
# 데이터가 제대로 없데이트 되었는지 확인
sql.read_sql_query(select_query, con)

Unnamed: 0,title,author,price,edition
0,Fire and Fury,Michael Wolff,14.99,2
1,12 Rules for Life: An antidote to Chaos,Jordan B. Peterson,15.54,2
2,"A Higher Loyalty: Truth, Lies, and Leadership",James Comey,8.47,2


### 대형 DataFrame을 만들기

- SQLite의 경우 그럴 경우가 매우 희박하지만, 다른 RDMBS를 활용하는 경우 데이터의 양이 많아지면 메모리 부족으로 DataFrame 생성이 안될 수 있다.

In [73]:
for chunk in sql.read_sql_query(select_query, con, chunksize=1):
    print(chunk)

           title         author  price  edition
0  Fire and Fury  Michael Wolff  14.99        1
                                     title              author  price  edition
0  12 Rules for Life: An antidote to Chaos  Jordan B. Peterson  15.54        1
                                           title       author  price  edition
0  A Higher Loyalty: Truth, Lies, and Leadership  James Comey   8.47        1
