## Download the zip file in memory, then extract

In [1]:
from io import BytesIO
from zipfile import ZipFile
from urllib import request

url = request.urlopen('http://www-odi.nhtsa.dot.gov/downloads/folders/Complaints/FLAT_CMPL.zip')
zipfile_in_memory = ZipFile(BytesIO(url.read()))
zipfile_in_memory.extractall('/home/pybokeh/temp/')
zipfile_in_memory.close()
print("zip download and extraction complete")

zip download and extraction complete


## Normal imports

In [59]:
import sqlite3
import pandas as pd
import datetime as dt
import numpy as np
pd.set_option("display.max_rows",1000)
pd.set_option("display.max_columns",50)
pd.set_option('max_colwidth',40)

## Since the flat file doesn't contain column headers, had to create one

In [11]:
columns = [
    'CMPLID',
    'ODINO',
    'MFR_NAME',
    'MAKETXT',
    'MODELTXT',
    'YEARTXT',
    'CRASH',
    'FAILDATE',
    'FIRE',
    'INJURED',
    'DEATHS',
    'COMPDESC',
    'CITY',
    'STATE',
    'VIN',
    'DATEA',
    'LDATE',
    'MILES',
    'OCCURENCES',
    'CDESCR',
    'CMPL_TYPE',
    'POLICE_RPT_YN',
    'PURCH_DT',
    'ORIG_OWNER_YN',
    'ANTI_BRAKES_YN',
    'CRUISE_CONT_YN',
    'NUM_CYLS',
    'DRIVE_TRAIN',
    'FUEL_SYS',
    'FUEL_TYPE',
    'TRANS_TYPE',
    'VEH_SPEED',
    'DOT',
    'TIRE_SIZE',
    'LOC_OF_TIRE',
    'TIRE_FAIL_TYPE',
    'ORIG_EQUIP_YN',
    'MANUF_DT',
    'SEAT_TYPE',
    'RESTRAINT_TYPE',
    'DEALER_NAME',
    'DEALER_TEL',
    'DEALER_CITY',
    'DEALER_STATE',
    'DEALER_ZIP',
    'PROD_TYPE',
    'REPAIRED_YN',
    'MEDICAL_ATTN',
    'VEHICLES_TOWED_YN'
]

## Connect to the sqlite3 database and read in the flat file in chunks

In [62]:
conn = sqlite3.connect('/home/pybokeh/temp/nhtsa.db')

start = dt.datetime.now()
chunksize = 20000
j = 0

for df in pd.read_csv('/home/pybokeh/temp/FLAT_CMPL.txt', names=columns, dtype=object, chunksize=chunksize, 
                      delimiter='\t', iterator=True, encoding='ISO-8859-1', error_bad_lines=False):    
    j+=1
    print('\r'+'{} seconds: completed {} rows'.format((dt.datetime.now() - start).seconds, j*chunksize),end='')

    df.to_sql('complaints', conn, flavor='sqlite', if_exists='append', index=False)

395 seconds: completed 1280000 rows

In [63]:
sample = pd.read_sql_query('SELECT MFR_NAME, MAKETXT, MODELTXT, YEARTXT, CRASH, FIRE, INJURED, DEATHS, '
                           'COMPDESC, MILES, LDATE, OCCURENCES, CDESCR '
                           
                           'FROM complaints '
                           
                           'WHERE '
                           "LDATE like '2016%' "
                           "AND MAKETXT IN('HONDA','ACURA') "
                           "AND (CRASH = 'Y' "
                           "OR FIRE = 'Y' "
                           "OR INJURED = 'Y' "
                           "OR DEATHS = 'Y' "
                           "OR MEDICAL_ATTN = 'Y' "
                           "OR VEHICLES_TOWED_YN = 'Y') "
                           'limit 5', conn)

In [64]:
sample

Unnamed: 0,MFR_NAME,MAKETXT,MODELTXT,YEARTXT,CRASH,FIRE,INJURED,DEATHS,COMPDESC,MILES,LDATE,OCCURENCES,CDESCR
0,Honda (American Honda Motor Co.),HONDA,CIVIC,2008,Y,N,0.0,0.0,WHEELS,109000,20160106,,TL* THE CONTACT OWNED A 2008 HONDA C...
1,Honda (American Honda Motor Co.),HONDA,CIVIC,2008,Y,N,0.0,0.0,AIR BAGS,109000,20160106,,TL* THE CONTACT OWNED A 2008 HONDA C...
2,Honda (American Honda Motor Co.),HONDA,CIVIC,2008,Y,N,0.0,0.0,STEERING,109000,20160106,,TL* THE CONTACT OWNED A 2008 HONDA C...
3,Honda (American Honda Motor Co.),ACURA,MDX,2003,N,Y,,,ELECTRICAL SYSTEM,136000,20160107,,THE HEATED SEAT FUNCTION IN OUR ACUR...
4,Honda (American Honda Motor Co.),HONDA,CR-V,2007,Y,N,0.0,0.0,STEERING,107,20160108,,POWER STEERING HORSE CRACK AND LEAKA...


In [65]:
for year, make, model, cdescr in sample[['YEARTXT','MAKETXT','MODELTXT','CDESCR']].values:
    print(year + ' ' + make + ' ' + model + ' ' + cdescr)
    print("****************************************************************")

2008 HONDA CIVIC TL* THE CONTACT OWNED A 2008 HONDA CIVIC. WHILE DRIVING 30 MPH IN INCLEMENT WEATHER, THE WHEELS AND THE STEERING COLUMN SEIZED. AS A RESULT, THE CONTACT CRASHED INTO A GUARDRAIL. THE AIR BAGS FAILED TO DEPLOY. THE VEHICLE WAS DESTROYED AND TOWED TO A SALVAGE YARD. A POLICE REPORT WAS FILED AND THERE WERE NO INJURIES. THE MANUFACTURER WAS NOTIFIED. THE FAILURE MILEAGE WAS APPROXIMATELY 109,000.
****************************************************************
2008 HONDA CIVIC TL* THE CONTACT OWNED A 2008 HONDA CIVIC. WHILE DRIVING 30 MPH IN INCLEMENT WEATHER, THE WHEELS AND THE STEERING COLUMN SEIZED. AS A RESULT, THE CONTACT CRASHED INTO A GUARDRAIL. THE AIR BAGS FAILED TO DEPLOY. THE VEHICLE WAS DESTROYED AND TOWED TO A SALVAGE YARD. A POLICE REPORT WAS FILED AND THERE WERE NO INJURIES. THE MANUFACTURER WAS NOTIFIED. THE FAILURE MILEAGE WAS APPROXIMATELY 109,000.
****************************************************************
2008 HONDA CIVIC TL* THE CONTACT OWNED A 2