# Bulk Loading

In [None]:
import sqlalchemy as sa
import pandas as pd
engine = sa.create_engine('sqlite:///flight.db')
connection = engine.connect()

In [None]:
flight_data = pd.read_csv('flight-data.csv')

In [8]:
def load_data(connection, data):
    sql = """
        INSERT INTO readings
            (flight, ts, temp, pressure, humidity,
            accel_x, accel_y, accel_z)
        VALUES
            ('hab1', ?, ?, ?, ?, ?, ?, ?)
    """
    for row in data.itertuples():
        connection.execute(sql, (
            row.timestamp, row.temp_h, row.pressure,
            min(100, max(0, row.humidity)),
            row.accel_x, row.accel_y, row.accel_z
        ))

connection.execute("DELETE FROM readings")     
%timeit -n1 -r1 load_data(connection, flight_data)

5min 26s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [9]:
def load_data(connection, data):
    insert = """
        INSERT INTO readings
            (flight, ts, temp, pressure, humidity,
            accel_x, accel_y, accel_z)
        VALUES
            ('hab1', ?, ?, ?, ?, ?, ?, ?)
    """
    with connection.begin():
        for row in data.itertuples():
            connection.execute(insert, (
                row.timestamp, row.temp_h, row.pressure,
                min(100, max(0, row.humidity)),
                row.accel_x, row.accel_y, row.accel_z
            ))
            
connection.execute("DELETE FROM readings")     
%timeit -n1 -r1 load_data(connection, flight_data)

12 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [15]:
def load_data(connection, data):
    insert = """
        INSERT INTO readings
            (flight, ts, temp, pressure, humidity,
            accel_x, accel_y, accel_z)
        VALUES
            ('hab1', ?, ?, ?, ?, ?, ?, ?)
    """
    data = [
        (row.timestamp, row.temp_h, row.pressure,
         min(100, max(0, row.humidity)),
         row.accel_x, row.accel_y, row.accel_z)
        for row in data.itertuples()
    ]
    with connection.begin():
        connection.execute(insert, data)
        
connection.execute("DELETE FROM readings")     
%timeit -n1 -r1 load_data(connection, flight_data)

3.2 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
