# SQLAlchemy ORM

In [1]:
from sqlalchemy import create_engine, ForeignKey, Table, Column
from sqlalchemy import Integer, String, Numeric, Date, DateTime
from sqlalchemy.orm import sessionmaker, relationship
from sqlalchemy.ext.declarative import declarative_base
from datetime import date, datetime

In [2]:
Base = declarative_base()

## Definition of Data Model

In [3]:
class Author(Base):
    __tablename__ = 'authors'
    id = Column(Integer, primary_key=True)
    name = Column(String, unique=True)
    birthday = Column(Date)
    
    def __init__(self, name, birthday):
        self.name = name
        self.birthday = birthday

In [4]:
class Book(Base):
    __tablename__ = 'books'
    id = Column(Integer, primary_key=True)
    title = Column(String, unique=True)
    published_datetime = Column(DateTime)
    author_id = Column(Integer, ForeignKey('authors.id')) # 1:many relationship
    author = relationship("Author", backref='books')
    
    def __init__(self, title, author, published_datetime):
        self.title = title
        self.author = author
        self.published_datetime = published_datetime

In [5]:
book_store_mapping = Table('books_stores', Base.metadata,
                           Column('book_id', Integer, ForeignKey('books.id')),
                           Column('store_id', Integer, ForeignKey('stores.id')))

class Store(Base):
    __tablename__ = 'stores'
    id = Column(Integer, primary_key=True)
    name = Column(String, unique=True)
    books = relationship("Book", secondary=book_store_mapping)
    
    def __init__(self, name):
        self.name = name

## Adding Data

In [6]:
alan_smithee = Author('Alan Smithee', date(1978, 4, 17))
jane_doe = Author('Jane Doe', date(1984, 2, 28))

In [7]:
python_tutorial = Book('Python Tutorial', alan_smithee, datetime.utcnow())
guide = Book("Hitchhiker's Guide to Python", jane_doe, 
             datetime.fromisoformat('2019-01-23T14:34:21'))
almanach = Book('Almanach of Python Wisdom', jane_doe, datetime(2019,1,23,14,14,21))

In [8]:
inn = Store("Bookkeeper's Inn")
library = Store('Great Library')

In [9]:
inn.books = [python_tutorial, almanach]
library.books = [guide, python_tutorial, almanach]

## Storing Data in DB

In [10]:
con_str = 'sqlite://' # SQLite database in memory
engine = create_engine(con_str)
Session = sessionmaker(bind=engine)

In [11]:
Base.metadata.create_all(engine)

In [12]:
session = Session()

In [13]:
session.add_all((python_tutorial, guide, almanach, inn))
session.add(library)

In [14]:
session.commit()

## Retrieving Data

### Raw SQL

In [15]:
sql = "select * from books"
engine.execute(sql).fetchall()

[(1, 'Python Tutorial', '2019-08-14 19:12:44.811844', 1),
 (2, "Hitchhiker's Guide to Python", '2019-01-23 14:34:21.000000', 2),
 (3, 'Almanach of Python Wisdom', '2019-01-23 14:14:21.000000', 2)]

In [16]:
sql = """select * from books 
inner join authors on books.author_id = authors.id"""
engine.execute(sql).fetchall()

[(1, 'Python Tutorial', '2019-08-14 19:12:44.811844', 1, 1, 'Alan Smithee', '1978-04-17'),
 (2, "Hitchhiker's Guide to Python", '2019-01-23 14:34:21.000000', 2, 2, 'Jane Doe', '1984-02-28'),
 (3, 'Almanach of Python Wisdom', '2019-01-23 14:14:21.000000', 2, 2, 'Jane Doe', '1984-02-28')]

### ORM

In [17]:
for author in session.query(Author).all():
    print(f'Author name: {author.name}, birthday: {author.birthday}')

Author name: Alan Smithee, birthday: 1978-04-17
Author name: Jane Doe, birthday: 1984-02-28


## Cleanup

In [18]:
session.close()

In [19]:
Base.metadata.drop_all(engine)

## Import from Pandas

The following section shows methods how to import data into database from Pandas using SQLAlchemy.

In [20]:
import pandas as pd
import numpy as np

### Set up Data Model

In [21]:
class MeasurementTypes(Base):
    __tablename__ = 'measurement_types'
    id = Column(Integer, primary_key=True)
    name = Column(String, unique=True)

class Measurements(Base):
    __tablename__ = 'measurements'
    time_tick = Column(Integer, primary_key=True)
    type_id = Column(Integer, ForeignKey('measurement_types.id'), primary_key=True)
    val1 = Column(Numeric, nullable=False)
    val2 = Column(Numeric)
    val3 = Column(Integer)

Note that the data model contains several features / constraints:

* a single primary key
* a composite primary key
* a foreign key
* a non-nullable column
* a unique column

### Define Test Data

In [22]:
type_df = pd.DataFrame(np.arange(1,101), columns=['id'])
type_df['name'] = 'measurement_type_' + type_df.id.astype(str)

In [23]:
type_df.head()

Unnamed: 0,id,name
0,1,measurement_type_1
1,2,measurement_type_2
2,3,measurement_type_3
3,4,measurement_type_4
4,5,measurement_type_5


In [24]:
type_df.tail()

Unnamed: 0,id,name
95,96,measurement_type_96
96,97,measurement_type_97
97,98,measurement_type_98
98,99,measurement_type_99
99,100,measurement_type_100


In [25]:
rows = 10000
val_df = pd.DataFrame(np.random.randn(rows,3), columns=['val1', 'val2', 'val3'])
val_df.val3 = (val_df.val3 * 10).astype(np.int64)
val_df['time_tick'] = np.arange(1, rows+1) // 100
val_df['type_id'] = np.arange(1, rows+1) % 100 + 1

In [26]:
val_df.head()

Unnamed: 0,val1,val2,val3,time_tick,type_id
0,0.555223,0.813219,1,0,2
1,2.24752,-1.611584,0,0,3
2,0.312003,-0.601715,11,0,4
3,0.49534,-1.259773,12,0,5
4,-1.408515,-1.040054,8,0,6


The DataFrames above are constructed such that they are compliant to the defined model.

In [27]:
val_invalid = val_df.head().copy()

In [28]:
val_invalid.val3 = val_invalid.val3 / 10
val_invalid.type_id = val_invalid.type_id + 1000

In [29]:
val_invalid.head()

Unnamed: 0,val1,val2,val3,time_tick,type_id
0,0.555223,0.813219,0.1,0,1002
1,2.24752,-1.611584,0.0,0,1003
2,0.312003,-0.601715,1.1,0,1004
3,0.49534,-1.259773,1.2,0,1005
4,-1.408515,-1.040054,0.8,0,1006


This DataFrame is not compliant to the model because the foreign key is not defined in the type table.

### Setup Postgres Engine

In [30]:
import psycopg2

In [31]:
username = 'postgres'
password = 'J8IzgZj3iFhYClDl' # 'python_tutorial_5432'
db_url = 'postgres_db:5432' # using Docker url alias
db_schema = 'postgres'
con_str = f'postgres://{username}:{password}@{db_url}/{db_schema}'

In [32]:
engine = create_engine(con_str)
Session = sessionmaker(bind=engine)

### Fill in Data using Pandas

Here, the Pandas built-in *to_sql()* method is used.

In [57]:
Base.metadata.create_all(engine)

Generate all tables defined in SQLAlchemy model

In [58]:
to_sql_method = 'multi'

In [59]:
%%time
type_df.to_sql(MeasurementTypes.__tablename__, engine, index=False, if_exists='append',
              method=to_sql_method)
val_df.to_sql(Measurements.__tablename__, engine, index=False, if_exists='append',
              method=to_sql_method)

CPU times: user 2.07 s, sys: 43 ms, total: 2.11 s
Wall time: 2.72 s


Very important: always set

    if_exists='append'
    
If this parameter is not set, Pandas will raise an exception because the table already exists.
If the parameter is set to *'replace'*, the existing table is dropped and re-generated by Pandas, loosing all constraints, etc. defined in the SQLAlchemy model.

In [60]:
try:
    val_invalid.to_sql(Measurements.__tablename__, engine, index=False, if_exists='append')
except Exception as e:
    print(e)

(psycopg2.errors.ForeignKeyViolation) insert or update on table "measurements" violates foreign key constraint "measurements_type_id_fkey"
DETAIL:  Key (type_id)=(1002) is not present in table "measurement_types".

[SQL: INSERT INTO measurements (val1, val2, val3, time_tick, type_id) VALUES (%(val1)s, %(val2)s, %(val3)s, %(time_tick)s, %(type_id)s)]
[parameters: ({'val1': 0.5552233320367923, 'val2': 0.813218959052893, 'val3': 0.1, 'time_tick': 0, 'type_id': 1002}, {'val1': 2.247520146611636, 'val2': -1.6115842357229266, 'val3': 0.0, 'time_tick': 0, 'type_id': 1003}, {'val1': 0.3120030629218393, 'val2': -0.6017151932966848, 'val3': 1.1, 'time_tick': 0, 'type_id': 1004}, {'val1': 0.4953399337553277, 'val2': -1.2597730320723395, 'val3': 1.2, 'time_tick': 0, 'type_id': 1005}, {'val1': -1.4085153828191306, 'val2': -1.0400536168291372, 'val3': 0.8, 'time_tick': 0, 'type_id': 1006})]
(Background on this error at: http://sqlalche.me/e/gkpj)


Data with invalid foreign key cannot be inserted.

In [61]:
df_from_db = pd.read_sql_table(Measurements.__tablename__, engine)
df_from_db.head()

Unnamed: 0,time_tick,type_id,val1,val2,val3
0,0,2,0.555223,0.813219,1
1,0,3,2.24752,-1.611584,0
2,0,4,0.312003,-0.601715,11
3,0,5,0.49534,-1.259773,12
4,0,6,-1.408515,-1.040054,8


In [62]:
df_from_db.tail()

Unnamed: 0,time_tick,type_id,val1,val2,val3
9995,99,97,-0.533778,-0.132487,-1
9996,99,98,0.031455,-1.285644,-12
9997,99,99,-1.269113,0.434566,-1
9998,99,100,-0.758415,-0.507833,5
9999,100,1,0.085585,-3.018355,-2


In [63]:
Base.metadata.drop_all(engine)

Delete all tables of the SQLAlchemy model.

### Fill using SQLAlchemy Bulk Insert

Here, the SQLAlchemy Bulk Insert method is used to insert a list of dictionaries created from the Pandas DataFrames.

In [48]:
Base.metadata.create_all(engine)

In [49]:
df_from_db = pd.read_sql_table(MeasurementTypes.__tablename__, engine)
df_from_db.head()

Unnamed: 0,id,name


In [50]:
session = Session()

In [51]:
%%time
session.bulk_insert_mappings(MeasurementTypes, type_df.drop(columns=['id']).to_dict(
    orient='records'))
session.bulk_insert_mappings(Measurements, val_df.to_dict(orient='records'))
session.commit()

CPU times: user 1.22 s, sys: 434 ms, total: 1.66 s
Wall time: 4.11 s


In [52]:
df_from_db = pd.read_sql_table(Measurements.__tablename__, engine)
df_from_db.head()

Unnamed: 0,time_tick,type_id,val1,val2,val3
0,0,2,0.555223,0.813219,1
1,0,3,2.24752,-1.611584,0
2,0,4,0.312003,-0.601715,11
3,0,5,0.49534,-1.259773,12
4,0,6,-1.408515,-1.040054,8


In [53]:
try:
    session.bulk_insert_mappings(Measurements, val_invalid.to_dict(orient='records'))
except Exception as e:
    print(e)

(psycopg2.errors.ForeignKeyViolation) insert or update on table "measurements" violates foreign key constraint "measurements_type_id_fkey"
DETAIL:  Key (type_id)=(1002) is not present in table "measurement_types".

[SQL: INSERT INTO measurements (time_tick, type_id, val1, val2, val3) VALUES (%(time_tick)s, %(type_id)s, %(val1)s, %(val2)s, %(val3)s)]
[parameters: ({'time_tick': 0, 'type_id': 1002, 'val1': 0.5552233320367923, 'val2': 0.813218959052893, 'val3': 0.1}, {'time_tick': 0, 'type_id': 1003, 'val1': 2.247520146611636, 'val2': -1.6115842357229266, 'val3': 0.0}, {'time_tick': 0, 'type_id': 1004, 'val1': 0.3120030629218393, 'val2': -0.6017151932966848, 'val3': 1.1}, {'time_tick': 0, 'type_id': 1005, 'val1': 0.4953399337553277, 'val2': -1.2597730320723395, 'val3': 1.2}, {'time_tick': 0, 'type_id': 1006, 'val1': -1.4085153828191306, 'val2': -1.0400536168291372, 'val3': 0.8})]
(Background on this error at: http://sqlalche.me/e/gkpj)


In [54]:
df_from_db = pd.read_sql_table(Measurements.__tablename__, engine)
df_from_db.tail()

Unnamed: 0,time_tick,type_id,val1,val2,val3
9995,99,97,-0.533778,-0.132487,-1
9996,99,98,0.031455,-1.285644,-12
9997,99,99,-1.269113,0.434566,-1
9998,99,100,-0.758415,-0.507833,5
9999,100,1,0.085585,-3.018355,-2


In [55]:
Base.metadata.drop_all(engine)

### Conclusion

Both Pandas *to_sql()* and SQLAlchemy *bulk_insert_mappings()* can be used to insert data from DataFrames into tables created by an SQLAlchemy model.
The performance of Pandas *to_sql()* is slightly better.