# Creating SQLite database from csv files

Reference:

https://discussions.udacity.com/t/creating-db-file-from-csv-files-with-non-ascii-unicode-characters/174958/7

Import modules needed:

In [1]:
import sqlite3
import csv
from pprint import pprint

Connect to the database (if it doesn't exist, it will be created in the folder that your notebook is in):

In [2]:
sqlite_file = "osm_atx.db"

In [3]:
conn = sqlite3.connect(sqlite_file)

Create the cursor object

In [4]:
cur = conn.cursor()

Create the table from nodes.csv:

In [5]:
cur.execute('''CREATE TABLE nodes(id INTEGER PRIMARY KEY, lat REAL, lon REAL, user TEXT, uid INTEGER, version TEXT, 
                changeset INTEGER, timestamp TEXT)''')

<sqlite3.Cursor at 0x104194500>

Commit the changes

In [6]:
conn.commit()

REad in csv file as a dictionary, format the data as a list of tuples:

In [7]:
with open('nodes.csv', 'rb') as fin:
    dr = csv.DictReader(fin) # comma is default delimiter
    to_db = [(i['id'], i['lat'], i['lon'], i['user'].decode('utf-8'), i['uid'], i['version'].decode('utf-8'),
              i['changeset'], i['timestamp'].decode('utf-8')) for i in dr]
    # ".decode('utf-8')" is used mostly for text datatype otherwise, a ProgrammingError will 
    # sometimes occur during insertion of data
    # https://discussions.udacity.com/t/case-study-quiz-preparing-for-database-help/169697/19

Insert data

In [8]:
cur.executemany("INSERT INTO nodes(id, lat, lon, user, uid, version, changeset, timestamp) VALUES (?, ?, ?, ?, ?, ?, ?, ?);", to_db)

<sqlite3.Cursor at 0x104194500>

Commit the changes

In [9]:
conn.commit()

Checking that the data imported correctly. (Since I don't want to make my notebook too large, I will just check this in sqlite, in the command line.)

In [10]:
cur.execute('SELECT * FROM nodes limit 20')

<sqlite3.Cursor at 0x104194500>

In [11]:
first20 = cur.fetchall()

In [14]:
print 'first 20:'
pprint(first20)

first 20:
[(26546004,
  30.4695355,
  -97.7972587,
  u'Tylan',
  388279,
  u'15',
  8497118,
  u'2011-06-20T18:36:15Z'),
 (26546005,
  30.4713386,
  -97.7975919,
  u'APD',
  105002,
  u'19',
  767484,
  u'2009-03-09T09:03:33Z'),
 (26546006,
  30.4711721,
  -97.798579,
  u'HJD',
  75480,
  u'24',
  533807,
  u'2009-02-18T16:44:40Z'),
 (26546008,
  30.469115,
  -97.7966751,
  u'claysmalley',
  119881,
  u'28',
  13420621,
  u'2012-10-09T01:08:42Z'),
 (26546009,
  30.4688175,
  -97.7976688,
  u'Tylan',
  388279,
  u'38',
  8497118,
  u'2011-06-20T18:22:32Z'),
 (26546010,
  30.469413,
  -97.797558,
  u'Tylan',
  388279,
  u'17',
  8497118,
  u'2011-06-20T18:36:15Z'),
 (26546011,
  30.4714758,
  -97.7980443,
  u'Tylan',
  388279,
  u'4',
  8497118,
  u'2011-06-20T18:36:15Z'),
 (26546012,
  30.4714208,
  -97.798367,
  u'Tylan',
  388279,
  u'14',
  8497118,
  u'2011-06-20T18:36:15Z'),
 (26546025,
  30.4751578,
  -97.799145,
  u'richlv',
  47892,
  u'24',
  18948024,
  u'2013-11-17T08:34:54Z'