In [6]:
import pandas as pd
import sqlite3
import os
from numpy import random

from pandas.io import sql

def test_sql_write(df):
    if os.path.exists('tmp/test.sql'):
        os.remove('tmp/test.sql')
    sql_db = sqlite3.connect('tmp/test.sql')
    df.to_sql(name='test_table', con=sql_db)
    sql_db.close()

def test_sql_read():
    sql_db = sqlite3.connect('tmp/test.sql')
    pd.read_sql_query("select * from test_table", sql_db)
    sql_db.close()

def test_hdf_fixed_write(df):
    df.to_hdf('tmp/test_fixed.hdf','test',mode='w')

def test_hdf_fixed_read():
    pd.read_hdf('tmp/test_fixed.hdf','test')

def test_hdf_fixed_write_compress(df):
    df.to_hdf('tmp/test_fixed_compress.hdf','test',mode='w',complib='blosc')

def test_hdf_fixed_read_compress():
    pd.read_hdf('tmp/test_fixed_compress.hdf','test')

def test_hdf_table_write(df):
    df.to_hdf('tmp/test_table.hdf','test',mode='w',format='table')
    
def test_hdf_table_read():
    pd.read_hdf('tmp/test_table.hdf','test')

def test_hdf_table_write_compress(df):
    df.to_hdf('tmp/test_table_compress.hdf','test',mode='w',complib='blosc',format='table')

def test_hdf_table_read_compress():
    pd.read_hdf('tmp/test_table_compress.hdf','test')

def test_csv_write(df):
    df.to_csv('tmp/test.csv',mode='w')

def test_csv_read():
    pd.read_csv('tmp/test.csv',index_col=0)

In [16]:
df = pd.DataFrame(random.randn(1000000,5),columns=list('ABCDE'))
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000000 entries, 0 to 999999
Data columns (total 5 columns):
A    1000000 non-null float64
B    1000000 non-null float64
C    1000000 non-null float64
D    1000000 non-null float64
E    1000000 non-null float64
dtypes: float64(5)
memory usage: 38.1 MB


In [17]:
%timeit test_csv_write(df)

1 loop, best of 3: 9.68 s per loop


In [18]:
%timeit test_sql_write(df)

1 loop, best of 3: 5.49 s per loop


In [19]:
%timeit test_hdf_fixed_write(df)

10 loops, best of 3: 35.7 ms per loop


In [20]:
%timeit test_hdf_table_write(df)

1 loop, best of 3: 465 ms per loop


In [21]:
%timeit test_csv_read()

1 loop, best of 3: 1.61 s per loop


In [22]:
%timeit test_sql_read()

1 loop, best of 3: 2.66 s per loop


In [23]:
%timeit test_hdf_fixed_read()

10 loops, best of 3: 18.6 ms per loop


In [24]:
%timeit test_hdf_table_read()

10 loops, best of 3: 30.2 ms per loop
