# Day 9
---

## DataStore

- RDMS (SQLite, MySQL, PostgreSQL)
- Text Format (CSV, JSON)
- NoSQL ( MongoDB, CouchDB)
- Fast Binary (HDF5)

Iterator

### CSV

In [1]:
import csv

In [5]:
with open('names.csv') as csvfile:
    reader = csv.DictReader(csvfile)
    print(list(reader))

[OrderedDict([('id', '1'), (' firstname', ' harry'), (' lastname', ' potter'), (' age', ' 17')]), OrderedDict([('id', '2'), (' firstname', ' Hermaine'), (' lastname', ' Granger'), (' age', ' 17')]), OrderedDict([('id', '3'), (' firstname', ' Ron'), (' lastname', ' Weasly'), (' age', ' 17')]), OrderedDict([('id', '4'), (' firstname', ' Neville'), (' lastname', ' Longbottom'), (' age', ' 17')])]


In [6]:
from collections import OrderedDict

In [8]:
from collections import namedtuple

In [11]:
Color = namedtuple('Color', ['red', 'green', 'blue'])

In [12]:
c = Color(123, 189, 65)

In [13]:
c.red

123

In [35]:
with open('names.csv') as csvfile:
    reader = csv.DictReader(csvfile, fieldnames=['id', 'firstname', 'lastname', 'age'])
#     next(reader)
    for line in reader:
        print(line['firstname'])

 firstname
 harry
 Hermaine
 Ron
 Neville


In [36]:
with open('names.csv') as csvfile:
    reader = csv.DictReader(csvfile, fieldnames=['id', 'firstname', 'lastname', 'age'])
    next(reader)
    for line in reader:
        print(line['firstname'])

 harry
 Hermaine
 Ron
 Neville


In [28]:
lst = iter([1, 3, 4])

In [29]:
lst

<list_iterator at 0x7f0fe6dd13c8>

In [30]:
next(lst)

1

In [31]:
next(lst)

3

In [32]:
next(lst)

4

In [33]:
next(lst)

StopIteration: 

In [38]:
with open('names.csv', 'a') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=['id', 'firstname', 'lastname', 'age'])
    writer.writerow({'id': 5, 'firstname': 'Luna', 'lastname': 'Lovegood', 'age': 17})

In [39]:
with open('names.csv', 'a') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=['id', 'firstname', 'lastname', 'age'])
    writer.writerows([
        {'id': 6, 'firstname': 'Tom', 'lastname': 'Weasly', 'age': 19},
        {'id': 7, 'firstname': 'Tom', 'lastname': 'Weasly', 'age': 19}
    ])

### SQLite

In [41]:
import sqlite3

In [42]:
conn = sqlite3.connect('data.sqlite3')

In [43]:
cur = conn.cursor()

In [44]:
cur.execute('CREATE TABLE characters(id integer, firstname text, lastname text, age integer)')

<sqlite3.Cursor at 0x7f0fe6d84b90>

In [45]:
cur.execute('INSERT INTO characters(id, firstname, lastname, age) VALUES(11, "Harry", "Potter", 17)')

<sqlite3.Cursor at 0x7f0fe6d84b90>

In [46]:
conn.commit()

"INSERT INTO characters(id) VALUES(" + line['id'] + ")"

In [48]:
sql = """INSERT INTO characters(id, firstname, lastname, age) VALUES(?, ?, ?, ?)"""
with open('names.csv') as csvfile:
    reader = csv.DictReader(csvfile, fieldnames=['id', 'firstname', 'lastname', 'age'])
    next(reader)
    for line in reader:
        cur.execute(sql, (line['id'], line['firstname'], line['lastname'], line['age']))

In [49]:
conn.commit()

In [50]:
cur.execute('UPDATE characters SET firstname="Bob" WHERE firstname="Tom"')

<sqlite3.Cursor at 0x7f0fe6d84b90>

In [51]:
conn.commit()

### HDF5

In [52]:
import h5py

In [53]:
hf = h5py.File('datasets.h5', 'w')

In [54]:
g1 = hf.create_group('ktm')

In [55]:
hf

<HDF5 file "datasets.h5" (mode r+)>

In [56]:
g1

<HDF5 group "/ktm" (0 members)>

In [57]:
g1.create_group('substation1')

<HDF5 group "/ktm/substation1" (0 members)>

In [58]:
sg2 = g1.create_group('substation2')

In [59]:
dt = sg2.create_dataset('statellite_image', (100,), dtype='i')

In [60]:
dt

<HDF5 dataset "statellite_image": shape (100,), type "<i4">

In [61]:
dt?

In [62]:
help(dt)

Help on Dataset in module h5py._hl.dataset object:

class Dataset(h5py._hl.base.HLObject)
 |  Represents an HDF5 dataset
 |  
 |  Method resolution order:
 |      Dataset
 |      h5py._hl.base.HLObject
 |      h5py._hl.base.CommonStateObject
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __array__(self, dtype=None)
 |      Create a Numpy array containing the whole dataset.  DON'T THINK
 |      THIS MEANS DATASETS ARE INTERCHANGABLE WITH ARRAYS.  For one thing,
 |      you have to read the whole dataset everytime this method is called.
 |  
 |  __getitem__(self, args)
 |      Read a slice from the HDF5 dataset.
 |      
 |      Takes slices and recarray-style field names (more than one is
 |      allowed!) in any order.  Obeys basic NumPy rules, including
 |      broadcasting.
 |      
 |      Also supports:
 |      
 |      * Boolean "mask" array indexing
 |  
 |  __init__(self, bind)
 |      Create a new Dataset object by binding to a low-level DatasetID.
 |  
 |  __i

In [63]:
dt.ndim

1

In [64]:
dt.shape

(100,)

In [66]:
dt.value

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [67]:
import numpy as np

In [68]:
dt2 = sg2.create_dataset('random', data=np.arange(0, 12).reshape(2, 6))

In [69]:
dt2

<HDF5 dataset "random": shape (2, 6), type "<i8">

In [70]:
dt2.value

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11]])