# SQLite database from csv files

The database from the csv files created from extraction of data from the austin osm file was made based on the schema according to the following:

https://gist.github.com/swwelch/f1144229848b407e0a5d13fcb7fbbd6f

References:

https://discussions.udacity.com/t/creating-db-file-from-csv-files-with-non-ascii-unicode-characters/174958/7

http://stackoverflow.com/questions/19877344/near-syntax-error-when-trying-to-create-a-table-with-a-foreign-key-in-sqlit


In [1]:
import sqlite3
import csv
from pprint import pprint

In [2]:
sqlite_file = "atx_osm.db"

In [3]:
conn = sqlite3.connect(sqlite_file)

In [4]:
cur = conn.cursor()

### Creation of all tables:

In [5]:
cur.execute('''CREATE TABLE nodes(id INTEGER PRIMARY KEY NOT NULL, lat REAL, lon REAL, user TEXT, uid INTEGER, version INTEGER, changeset INTEGER, timestamp TEXT)''')
cur.execute('''CREATE TABLE nodes_tags(id INTEGER, key TEXT, value TEXT, type TEXT, FOREIGN KEY (id) REFERENCES nodes (id))''')
cur.execute('''CREATE TABLE ways(id INTEGER PRIMARY KEY NOT NULL, user TEXT, uid INTEGER, version TEXT, changeset INTEGER, timestamp TEXT)''')
cur.execute('''CREATE TABLE ways_tags(id INTEGER NOT NULL, key TEXT NOT NULL, value TEXT NOT NULL, type TEXT, FOREIGN KEY (id) REFERENCES ways(id))''')
cur.execute('''CREATE TABLE ways_nodes(id INTEGER NOT NULL, node_id INTEGER NOT NULL, position INTEGER NOT NULL, FOREIGN KEY (id) REFERENCES ways (id), FOREIGN KEY (node_id) REFERENCES nodes (id))''')

<sqlite3.Cursor at 0x1040539d0>

In [6]:
conn.commit()

### Inserting the nodes data:

In [7]:
with open('nodes.csv', 'rb') as fin:
    dr = csv.DictReader(fin)
    to_nodes = [(i['id'], i['lat'], i['lon'], i['user'].decode('utf-8'), i['uid'], i['version'].decode('utf-8'), i['changeset'], i['timestamp'].decode('utf-8')) for i in dr]

In [8]:
cur.executemany("INSERT INTO nodes(id, lat, lon, user, uid, version, changeset, timestamp) VALUES (?, ?, ?, ?, ?, ?, ?, ?);", to_nodes)

<sqlite3.Cursor at 0x1040539d0>

In [9]:
conn.commit()

Checking for data insertion:

In [11]:
cur.execute('SELECT * FROM nodes limit 10')
cur.fetchall()

[(26546004,
  30.4695355,
  -97.7972587,
  u'Tylan',
  388279,
  15,
  8497118,
  u'2011-06-20T18:36:15Z'),
 (26546005,
  30.4713386,
  -97.7975919,
  u'APD',
  105002,
  19,
  767484,
  u'2009-03-09T09:03:33Z'),
 (26546006,
  30.4711721,
  -97.798579,
  u'HJD',
  75480,
  24,
  533807,
  u'2009-02-18T16:44:40Z'),
 (26546008,
  30.469115,
  -97.7966751,
  u'claysmalley',
  119881,
  28,
  13420621,
  u'2012-10-09T01:08:42Z'),
 (26546009,
  30.4688175,
  -97.7976688,
  u'Tylan',
  388279,
  38,
  8497118,
  u'2011-06-20T18:22:32Z'),
 (26546010,
  30.469413,
  -97.797558,
  u'Tylan',
  388279,
  17,
  8497118,
  u'2011-06-20T18:36:15Z'),
 (26546011,
  30.4714758,
  -97.7980443,
  u'Tylan',
  388279,
  4,
  8497118,
  u'2011-06-20T18:36:15Z'),
 (26546012,
  30.4714208,
  -97.798367,
  u'Tylan',
  388279,
  14,
  8497118,
  u'2011-06-20T18:36:15Z'),
 (26546025,
  30.4751578,
  -97.799145,
  u'richlv',
  47892,
  24,
  18948024,
  u'2013-11-17T08:34:54Z'),
 (26546026,
  30.4727626,
  -97.79

### Inserting the nodes_tags data:

In [12]:
with open('nodes_tags.csv', 'rb') as f:
    d = csv.DictReader(f) 
    to_nodes_tags = [(i['id'], i['key'].decode('utf-8'), i['value'].decode('utf-8'), i['type'].decode('utf-8')) for i in d]

In [13]:
cur.executemany("INSERT INTO nodes_tags(id, key, value, type) VALUES (?, ?, ?, ?);", to_nodes_tags)

<sqlite3.Cursor at 0x1040539d0>

In [14]:
conn.commit()

Checking for data insertion:

In [15]:
cur.execute("SELECT * FROM nodes_tags LIMIT 10")
cur.fetchall()

[(26546008, u'highway', u'traffic_signals', u'regular'),
 (26546009, u'highway', u'traffic_signals', u'regular'),
 (26546041, u'highway', u'traffic_signals', u'regular'),
 (26546043, u'highway', u'traffic_signals', u'regular'),
 (26546065, u'highway', u'traffic_signals', u'regular'),
 (26546067, u'highway', u'traffic_signals', u'regular'),
 (26546082, u'noref', u'yes', u'regular'),
 (26546082, u'exit_to', u'Avery Ranch Boulevard', u'regular'),
 (26546082, u'highway', u'motorway_junction', u'regular'),
 (26546091, u'noref', u'yes', u'regular')]

### Inserting the ways data

In [16]:
with open('ways.csv', 'rb') as g:
    r = csv.DictReader(g) 
    to_ways = [(i['id'], i['user'].decode('utf-8'), i['uid'], i['version'].decode('utf-8'), i['changeset'], i['timestamp'].decode('utf-8')) for i in r]

In [17]:
cur.executemany("INSERT INTO ways(id, user, uid, version, changeset, timestamp) VALUES (?, ?, ?, ?, ?, ?);", to_ways)

<sqlite3.Cursor at 0x1040539d0>

Checking for data insertion

In [18]:
cur.execute("SELECT * FROM ways LIMIT 10")
cur.fetchall()

[(4358672, u'claysmalley', 119881, u'19', 10974544, u'2012-03-14T05:28:59Z'),
 (4358673, u'NE2', 207745, u'8', 10343099, u'2012-01-09T16:14:46Z'),
 (4358675, u'claysmalley', 119881, u'8', 10212599, u'2011-12-27T02:55:41Z'),
 (4358677, u'tompkinsjs', 517514, u'12', 15084279, u'2013-02-19T01:43:36Z'),
 (4531212, u'claysmalley', 119881, u'10', 10974544, u'2012-03-14T05:28:59Z'),
 (4531214, u'ChrisZontine', 1376118, u'6', 25101150, u'2014-08-29T16:39:04Z'),
 (4531215, u'ChrisZontine', 1376118, u'5', 16556381, u'2013-06-14T21:27:39Z'),
 (4531217, u'Cam4rd98', 1132286, u'14', 15537280, u'2013-03-29T12:05:08Z'),
 (4531218, u'nammala', 3479270, u'13', 40336781, u'2016-06-28T07:21:55Z'),
 (4531219, u'Iowa Kid', 703517, u'8', 17120434, u'2013-07-28T03:13:58Z')]

### Inserting the ways_tags data:

In [19]:
with open('ways_tags.csv', 'rb') as h:
    s = csv.DictReader(h)
    to_ways_tags = [(i['id'], i['key'].decode('utf-8'), i['value'].decode('utf-8'), i['type'].decode('utf-8')) for i in s]

In [21]:
cur.executemany("INSERT INTO ways_tags(id, key, value, type) VALUES (?, ?, ?, ?);", to_ways_tags)

<sqlite3.Cursor at 0x1040539d0>

In [22]:
conn.commit()

Checking for data insertion:

In [23]:
cur.execute("SELECT * FROM ways_tags LIMIT 10")
cur.fetchall()

[(4358672, u'NHS', u'yes', u'regular'),
 (4358672, u'ref', u'US 183', u'regular'),
 (4358672, u'toll', u'no', u'regular'),
 (4358672, u'lanes', u'3', u'regular'),
 (4358672, u'layer', u'1', u'regular'),
 (4358672, u'bridge', u'yes', u'regular'),
 (4358672, u'oneway', u'yes', u'regular'),
 (4358672, u'highway', u'motorway', u'regular'),
 (4358672, u'old_ref', u'SH 29', u'regular'),
 (4358673, u'NHS', u'yes', u'regular')]

### Inserting the ways_nodes data:

In [24]:
with open("ways_nodes.csv", "rb") as j:
    t = csv.DictReader(j)
    to_ways_nodes = [(i['id'], i['node_id'], i['position']) for i in t]

In [25]:
cur.executemany("INSERT INTO ways_nodes(id, node_id, position) VALUES (?, ?, ?);", to_ways_nodes)

<sqlite3.Cursor at 0x1040539d0>

Checking for data insertion

In [26]:
conn.commit()
cur.execute("SELECT * FROM ways_nodes LIMIT 10")
cur.fetchall()

[(4358672, 26546039, 0),
 (4358672, 1674200199, 1),
 (4358673, 26546203, 0),
 (4358673, 1332583222, 1),
 (4358675, 1332583222, 0),
 (4358675, 26546004, 1),
 (4358677, 26546038, 0),
 (4358677, 26546046, 1),
 (4358677, 26546048, 2),
 (4531212, 1619623774, 0)]

In [27]:
conn.close()