### Read in raw data

In [1]:
import pandas

dfa = pandas.read_csv('../data/animals.csv')
dfa.head()

Unnamed: 0,animal_ID,weight_lbs,zoo_ID
0,9992,1040,12
1,99929,1090,12
2,12993,2190,11
3,9821,750,11
4,96673,580,11


In [2]:
dfz = pandas.read_csv('../data/zoos.csv')
dfz.head()

Unnamed: 0,zoo_ID,zoo_name
0,12,Detroit
1,11,LA
2,9,NY


### Pydantic models

The Pydantic models are in `utils/schema.py`.  I am using Pydantic here for two things: 

 1. rename the columns by using a field alias (`id` field can also be `animal_ID`)
 2. Data transformation/validation (cast fields to int or str)

In [3]:
import sys
sys.path.insert(0, "..")
from utils import schemas
schemas

<module 'utils.schemas' from '../utils/schemas.py'>

In [4]:
pyanimals = [schemas.Animal(**row) for row in dfa.to_dict(orient="records")]
pyanimals

[Animal(id=9992, weight=1040, zoo_id=12),
 Animal(id=99929, weight=1090, zoo_id=12),
 Animal(id=12993, weight=2190, zoo_id=11),
 Animal(id=9821, weight=750, zoo_id=11),
 Animal(id=96673, weight=580, zoo_id=11),
 Animal(id=1411, weight=690, zoo_id=12),
 Animal(id=1415, weight=695, zoo_id=9),
 Animal(id=1410, weight=690, zoo_id=9),
 Animal(id=1117, weight=1000, zoo_id=9)]

In [5]:
pyzoos = [schemas.Zoo(**row) for row in dfz.to_dict(orient="records")]
pyzoos

[Zoo(id=12, name='Detroit'), Zoo(id=11, name='LA'), Zoo(id=9, name='NY')]

### SQLAlchemy models

SQLAlchemy models are in `utils/models.py`.  

In [6]:
from utils import models
models

<module 'utils.models' from '../utils/models.py'>

In [7]:
animals = [models.Animal(**item.dict()) for item in pyanimals]
animals

[Animal(id=9992, weight=1040, zoo_id=12),
 Animal(id=99929, weight=1090, zoo_id=12),
 Animal(id=12993, weight=2190, zoo_id=11),
 Animal(id=9821, weight=750, zoo_id=11),
 Animal(id=96673, weight=580, zoo_id=11),
 Animal(id=1411, weight=690, zoo_id=12),
 Animal(id=1415, weight=695, zoo_id=9),
 Animal(id=1410, weight=690, zoo_id=9),
 Animal(id=1117, weight=1000, zoo_id=9)]

In [8]:
animals[0].__dict__

{'_sa_instance_state': <sqlalchemy.orm.state.InstanceState at 0x7fcff2788e20>,
 'id': 9992,
 'weight': 1040,
 'zoo_id': 12}

In [9]:
zoos = [models.Zoo(**item.dict()) for item in pyzoos]
zoos

[Zoo(id=12, name='Detroit'), Zoo(id=11, name='LA'), Zoo(id=9, name='NY')]

In [10]:
zoos[0].__dict__

{'_sa_instance_state': <sqlalchemy.orm.state.InstanceState at 0x7fcff2d26190>,
 'id': 12,
 'name': 'Detroit'}

### Write to db

In [11]:
import sqlalchemy as sa
import sqlalchemy.orm

engine = sa.create_engine("sqlite:///../zoo_animals.db", echo=True)
engine

Engine(sqlite:///../zoo_animals.db)

In [12]:
# Create tables
models.Base.metadata.create_all(engine)

2021-04-30 21:11:42,798 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
2021-04-30 21:11:42,800 INFO sqlalchemy.engine.base.Engine ()
2021-04-30 21:11:42,801 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
2021-04-30 21:11:42,801 INFO sqlalchemy.engine.base.Engine ()
2021-04-30 21:11:42,804 INFO sqlalchemy.engine.base.Engine PRAGMA main.table_info("zoos")
2021-04-30 21:11:42,805 INFO sqlalchemy.engine.base.Engine ()
2021-04-30 21:11:42,806 INFO sqlalchemy.engine.base.Engine PRAGMA temp.table_info("zoos")
2021-04-30 21:11:42,807 INFO sqlalchemy.engine.base.Engine ()
2021-04-30 21:11:42,808 INFO sqlalchemy.engine.base.Engine PRAGMA main.table_info("animals")
2021-04-30 21:11:42,809 INFO sqlalchemy.engine.base.Engine ()
2021-04-30 21:11:42,810 INFO sqlalchemy.engine.base.Engine PRAGMA temp.table_info("animals")
2021-04-30 21:11:42,811 INFO sqlalchemy.engine.base.Engine ()
2021-04-30 21:11:42,813 

In [13]:
LocalSession = sa.orm.sessionmaker(bind=engine)
db = LocalSession()

In [14]:
db.add_all(zoos)
db.commit()

2021-04-30 21:11:44,257 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2021-04-30 21:11:44,259 INFO sqlalchemy.engine.base.Engine INSERT INTO zoos (id, name) VALUES (?, ?)
2021-04-30 21:11:44,259 INFO sqlalchemy.engine.base.Engine ((12, 'Detroit'), (11, 'LA'), (9, 'NY'))
2021-04-30 21:11:44,261 INFO sqlalchemy.engine.base.Engine COMMIT


In [15]:
db.add_all(animals)
db.commit()

2021-04-30 21:11:44,630 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2021-04-30 21:11:44,636 INFO sqlalchemy.engine.base.Engine INSERT INTO animals (id, weight, zoo_id) VALUES (?, ?, ?)
2021-04-30 21:11:44,639 INFO sqlalchemy.engine.base.Engine ((9992, 1040, 12), (99929, 1090, 12), (12993, 2190, 11), (9821, 750, 11), (96673, 580, 11), (1411, 690, 12), (1415, 695, 9), (1410, 690, 9), (1117, 1000, 9))
2021-04-30 21:11:44,645 INFO sqlalchemy.engine.base.Engine COMMIT


In [16]:
db.close()

### Sanity check that the data is in there

In [17]:
db = LocalSession()
item = db.query(models.Animal).first()
item

2021-04-30 21:11:45,223 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2021-04-30 21:11:45,225 INFO sqlalchemy.engine.base.Engine SELECT animals.id AS animals_id, animals.weight AS animals_weight, animals.zoo_id AS animals_zoo_id 
FROM animals
 LIMIT ? OFFSET ?
2021-04-30 21:11:45,226 INFO sqlalchemy.engine.base.Engine (1, 0)


Animal(id=1117, weight=1000, zoo_id=9)

In [18]:
item.__dict__

{'_sa_instance_state': <sqlalchemy.orm.state.InstanceState at 0x7fcff17f4c40>,
 'zoo_id': 9,
 'weight': 1000,
 'id': 1117}

In [19]:
item.zoo.__dict__

2021-04-30 21:11:45,540 INFO sqlalchemy.engine.base.Engine SELECT zoos.id AS zoos_id, zoos.name AS zoos_name 
FROM zoos 
WHERE zoos.id = ?
2021-04-30 21:11:45,541 INFO sqlalchemy.engine.base.Engine (9,)


{'_sa_instance_state': <sqlalchemy.orm.state.InstanceState at 0x7fcff1802760>,
 'id': 9,
 'name': 'NY'}

In [20]:
db.query(models.Animal).count()

2021-04-30 21:11:45,691 INFO sqlalchemy.engine.base.Engine SELECT count(*) AS count_1 
FROM (SELECT animals.id AS animals_id, animals.weight AS animals_weight, animals.zoo_id AS animals_zoo_id 
FROM animals) AS anon_1
2021-04-30 21:11:45,692 INFO sqlalchemy.engine.base.Engine ()


9

In [21]:
db.query(models.Zoo).count()

2021-04-30 21:11:45,850 INFO sqlalchemy.engine.base.Engine SELECT count(*) AS count_1 
FROM (SELECT zoos.id AS zoos_id, zoos.name AS zoos_name 
FROM zoos) AS anon_1
2021-04-30 21:11:45,851 INFO sqlalchemy.engine.base.Engine ()


3

In [22]:
df = pandas.read_sql("select * from animals", engine)
df

2021-04-30 21:11:45,991 INFO sqlalchemy.engine.base.Engine PRAGMA main.table_info("select * from animals")
2021-04-30 21:11:45,992 INFO sqlalchemy.engine.base.Engine ()
2021-04-30 21:11:45,994 INFO sqlalchemy.engine.base.Engine PRAGMA temp.table_info("select * from animals")
2021-04-30 21:11:45,994 INFO sqlalchemy.engine.base.Engine ()
2021-04-30 21:11:45,996 INFO sqlalchemy.engine.base.OptionEngine select * from animals
2021-04-30 21:11:45,997 INFO sqlalchemy.engine.base.OptionEngine ()


Unnamed: 0,id,weight,zoo_id
0,1117,1000,9
1,1410,690,9
2,1411,690,12
3,1415,695,9
4,9821,750,11
5,9992,1040,12
6,12993,2190,11
7,96673,580,11
8,99929,1090,12
