## SQL에서 Dataframe으로

- sqlite3 드라이버를 사용해서 SQLite 데이터베이스를 이용할 수 있다.

### Sqlite3 Driver

In [48]:
import sqlite3

In [49]:
query = """
    CREATE Table IF NOT EXISTS BestSellers
    (title TEXT,
    author TEXT,
    price REAL,
    edition INTEGER);
"""

con = sqlite3.connect("/Users/grace/workspace/SQLite/databases/books.db")
con.execute(query)
con.commit()

<sqlite3.Cursor at 0x10bc6b880>

In [50]:
# 데이터 입력
# 데이터는 튜플 형태로 작성한다
best_selling_amazon_books = [('Fire and Fury', 'Michael Wolff', 14.99, 1),
                             ('12 Rules for Life: An antidote to Chaos', 'Jordan B. Peterson', 15.54, 1),
                             ('A Higher Loyalty: Truth, Lies, and Leadership', 'James Comey', 8.47, 1)]

insert_query = """
    INSERT INTO BestSellers
    VALUES(?, ?, ?, ?)
"""

con.executemany(insert_query, best_selling_amazon_books)
con.commit()

<sqlite3.Cursor at 0x10bc6bf80>

In [51]:
# 테이터 추출
# 1개짜리
select_one_query = "SELECT * from BestSellers where title like '%Fire and Fury%';"

cursor = con.execute(select_one_query)
cursor.fetchone()

('Fire and Fury', 'Michael Wolff', 14.99, 1)

In [52]:
# 데이터 추출
# 입력과 동일하게 튜플 형태로 반환된다
select_query = """SELECT * from BestSellers"""

cursor = con.execute(select_query)

rows = cursor.fetchall()
rows

[('Fire and Fury', 'Michael Wolff', 14.99, 1),
 ('12 Rules for Life: An antidote to Chaos', 'Jordan B. Peterson', 15.54, 1),
 ('A Higher Loyalty: Truth, Lies, and Leadership', 'James Comey', 8.47, 1)]

In [53]:
# 컬럼의 이름을 확인 할 수 있다
cursor.description

(('title', None, None, None, None, None, None),
 ('author', None, None, None, None, None, None),
 ('price', None, None, None, None, None, None),
 ('edition', None, None, None, None, None, None))

### DataFrame으로 변환

In [54]:
import pandas as pd

In [55]:
pd.DataFrame(rows, columns=list(zip(*cursor.description))[0])

Unnamed: 0,title,author,price,edition
0,Fire and Fury,Michael Wolff,14.99,1
1,12 Rules for Life: An antidote to Chaos,Jordan B. Peterson,15.54,1
2,"A Higher Loyalty: Truth, Lies, and Leadership",James Comey,8.47,1


### read_sql_query()

- 위의 단계를 간편하게 할 수 있도록 pandas에서는 read_sql_query라는 함수로 쿼리문과 데이터베이스 커넥션을 인자로 넘기는 것이 가능하다
- pandas.io.sql

In [56]:
import pandas.io.sql as sql

In [57]:
sql.read_sql_query(select_query, con)

Unnamed: 0,title,author,price,edition
0,Fire and Fury,Michael Wolff,14.99,1
1,12 Rules for Life: An antidote to Chaos,Jordan B. Peterson,15.54,1
2,"A Higher Loyalty: Truth, Lies, and Leadership",James Comey,8.47,1


In [58]:
sql.read_sql_query('SELECT * FROM BestSellers WHERE price > 10;', con)

Unnamed: 0,title,author,price,edition
0,Fire and Fury,Michael Wolff,14.99,1
1,12 Rules for Life: An antidote to Chaos,Jordan B. Peterson,15.54,1


### execute()

In [59]:
sql.execute('update BestSellers set edition=2', con)

<sqlite3.Cursor at 0x10bcee110>

In [60]:
# 데이터가 제대로 없데이트 되었는지 확인
update_data = sql.read_sql_query(select_query, con)
update_data

Unnamed: 0,title,author,price,edition
0,Fire and Fury,Michael Wolff,14.99,2
1,12 Rules for Life: An antidote to Chaos,Jordan B. Peterson,15.54,2
2,"A Higher Loyalty: Truth, Lies, and Leadership",James Comey,8.47,2


In [61]:
# 새로운 데이터 추가
update_data.loc[3] = ['new book', 'new author', 20.55, 1]
update_data

Unnamed: 0,title,author,price,edition
0,Fire and Fury,Michael Wolff,14.99,2
1,12 Rules for Life: An antidote to Chaos,Jordan B. Peterson,15.54,2
2,"A Higher Loyalty: Truth, Lies, and Leadership",James Comey,8.47,2
3,new book,new author,20.55,1


### 데이터 테이블에 저장하기

In [66]:
# if_exists는 기존 테이블이 존재할 때 어떻게 처리 할지 선택할 수 있습니다
# fail = 해당 테이블이 존재한다면 아무것도 하지 않는다
# replace = 기존 테이블을 삭제하고 새로운 데이터를 삽입
# append = 데이터만을 추가
update_data.to_sql('BestSellers', con, if_exists='replace', index=False)

In [67]:
sql.read_sql_query(select_query, con)

Unnamed: 0,title,author,price,edition
0,Fire and Fury,Michael Wolff,14.99,2
1,12 Rules for Life: An antidote to Chaos,Jordan B. Peterson,15.54,2
2,"A Higher Loyalty: Truth, Lies, and Leadership",James Comey,8.47,2
3,new book,new author,20.55,1


### 대형 DataFrame을 만들기

- SQLite의 경우 그럴 경우가 매우 희박하지만, 다른 RDMBS를 활용하는 경우 데이터의 양이 많아지면 메모리 부족으로 DataFrame 생성이 안될 수 있다.

In [73]:
for chunk in sql.read_sql_query(select_query, con, chunksize=1):
    print(chunk)

           title         author  price  edition
0  Fire and Fury  Michael Wolff  14.99        1
                                     title              author  price  edition
0  12 Rules for Life: An antidote to Chaos  Jordan B. Peterson  15.54        1
                                           title       author  price  edition
0  A Higher Loyalty: Truth, Lies, and Leadership  James Comey   8.47        1
