# SQL Alchemy : usage of union with order by and relationship




In [7]:
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

from testcontainers.mysql import MySqlContainer

MYSQL_VERSION = '5.7.17'

def run_db():
    """This function will run an instance of mysql db container and yield a testcontainer object
    This object will be used to CRUD data and profile the performance
    """
    with MySqlContainer(f'mysql:{MYSQL_VERSION}') as mysql:
        yield mysql

# Mysql Test container
mysql = run_db()
# Connection string
conn_string = next(mysql).get_connection_url()
# Engine object
engine = create_engine(conn_string)
# Session object
Session = sessionmaker(bind=engine)
print(f"DB ready for connection at URL : {conn_string}")

Pulling image mysql:5.7.17
Container started: e17d6a7465
Waiting to be ready...


DB ready for connection at URL : mysql+pymysql://test:test@localhost:49153/test


## Declare all needed table

In [10]:

import time
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column
from sqlalchemy import Integer
from sqlalchemy import Text
from sqlalchemy import ForeignKey
from sqlalchemy.orm import relationship
# Declarative base object
Base = declarative_base()


def get_epoch_time_milliseconds() :
    return int(time.time() * 1000)


class Kv(Base):
    __tablename__ = 'kv'
    id = Column(Integer, primary_key=True, autoincrement=True)
    org_id = Column(Integer)
    position = Column(Integer)
    kvis = relationship("Kvi", back_populates='kv')

class Kvi(Base):
    __tablename__ = 'kvi'
    id = Column(Integer, primary_key=True, autoincrement=True)
    vid = Column(Integer, ForeignKey('kv.id'))
    interpretation = Column(Text(65535))
    kv = relationship("Kv", back_populates='kvis')

    
Base.metadata.create_all(engine)


In [13]:
import random
import string

def random_string():
    return ''.join(random.choice(string.ascii_letters) for i in range(10))

ORG_IDS = [1,2,3]

with Session() as session:
    for i in range(1000):
        random_kv = Kv(org_id=random.choice(ORG_IDS), position=random.randint(15000,50000))
        random_kvi = Kvi(kv=random_kv, interpretation=random_string())
        session.add(random_kv)
        session.add(random_kvi)

    # Add some duplicate row by position accross different organization
    SAME_POSITION = 1000
    random_kv_1 = Kv(org_id=1, position=SAME_POSITION)
    random_kv_2 = Kv(org_id=2, position=SAME_POSITION)
    random_kv_3 = Kv(org_id=2, position=SAME_POSITION)
    random_kvi_1 = Kvi(kv=random_kv_1, interpretation=random_string())
    random_kvi_2 = Kvi(kv=random_kv_2, interpretation=random_string())
    random_kvi_3 = Kvi(kv=random_kv_3, interpretation=random_string())

    session.add(random_kv_1)
    session.add(random_kv_2)
    session.add(random_kv_3)
    session.add(random_kvi_1)
    session.add(random_kvi_2)
    session.add(random_kvi_3)

    session.commit()

## Session Query syntax

In [21]:
from sqlalchemy.orm import contains_eager
from sqlalchemy.orm import aliased
from sqlalchemy import and_


specific_query = session.query(Kv).join(Kvi).options(contains_eager(Kv.kvis)).filter(Kv.org_id ==2)

with Session() as session :
    result = specific_query.all()
    print(f"Type of result without union : {type(result[0])}")

v1 = aliased(Kv)
v2 = aliased(Kv)
sub_exists = session.query(1).filter(and_(v2.position == v1.position, v2.org_id == 2)).exists()
exclude_query = session.query(Kv).join(Kvi).options(contains_eager(Kv.kvis)).filter(~sub_exists, v1.org_id.in_([1,3]))
with Session() as session :
    my_query = exclude_query.union(specific_query)
    result = my_query.all()
    print(f"Type of result with union : {type(result[0])}")


Type of result without union : <class '__main__.Kv'>
Type of result with union : <class '__main__.Kv'>


### Both queries (simple and union) return `Kv` instances. We can access to each attribute and to relationship attribute (`kvis`) as well

## 2.0 Syntax

In [31]:
from sqlalchemy.orm import contains_eager
from sqlalchemy.orm import aliased
from sqlalchemy import and_
from sqlalchemy import select


specific_query = select(Kv).join(Kvi).options(contains_eager(Kv.kvis)).filter(Kv.org_id ==2)

with Session() as session :
    result = session.execute(specific_query).unique().scalars().all()
    print(f"Type of result without union : {type(result[0])}")

v1 = aliased(Kv)
v2 = aliased(Kv)
sub_exists = select(1).filter(and_(v2.position == v1.position, v2.org_id == 2)).exists()
exclude_query = select(Kv).join(Kvi).options(contains_eager(Kv.kvis)).filter(~sub_exists, v1.org_id.in_([1,3]))
with Session() as session :
    my_query = specific_query.union(exclude_query)
    result =session.execute(my_query).unique().scalars().all()
    print(f"Type of result with union : {type(result[0])}")

Type of result without union : <class '__main__.Kv'>


  result =session.execute(my_query).unique().scalars().all()


Type of result with union : <class 'int'>


### So we can see that with the 2.0 syntax, the Union query return type is `integer` and not of `Kv` instance. So we cannot access instances attributes nor access to `kvis` relashionship column
### Let's try to get Rows

In [25]:
from sqlalchemy.orm import contains_eager
from sqlalchemy.orm import aliased
from sqlalchemy import and_
from sqlalchemy import select


specific_query = select(Kv).join(Kvi).options(contains_eager(Kv.kvis)).filter(Kv.org_id ==2)

with Session() as session :
    result = session.execute(specific_query).unique().scalars().all()
    print(f"Type of result without union : {type(result[0])}")

v1 = aliased(Kv)
v2 = aliased(Kv)
sub_exists = select(1).filter(and_(v2.position == v1.position, v2.org_id == 2)).exists()
exclude_query = select(Kv).join(Kvi).options(contains_eager(Kv.kvis)).filter(~sub_exists, v1.org_id.in_([1,3]))
with Session() as session :
    my_query = exclude_query.union(specific_query)
    result =session.execute(my_query).all()
    print(f"Type of result with union : {type(result[0])}")

Type of result without union : <class '__main__.Kv'>
Type of result with union : <class 'sqlalchemy.engine.row.Row'>


### Calling `all()` directly instead of `scalars()` we got list of Row instance. But Row instance does not have the relationship field (here "kvis")

## Order by : Session Query syntax

In [29]:
from sqlalchemy.orm import contains_eager
from sqlalchemy.orm import aliased
from sqlalchemy import and_


specific_query = session.query(Kv).join(Kvi).options(contains_eager(Kv.kvis)).filter(Kv.org_id ==2)

with Session() as session :
    my_query = specific_query.order_by(Kv.position)
    print(my_query)
    print()

v1 = aliased(Kv)
v2 = aliased(Kv)
sub_exists = session.query(1).filter(and_(v2.position == v1.position, v2.org_id == 2)).exists()
exclude_query = session.query(Kv).join(Kvi).options(contains_eager(Kv.kvis)).filter(~sub_exists, v1.org_id.in_([1,3]))
with Session() as session :
    my_query = exclude_query.union(specific_query).order_by(Kv.position)
    print(my_query)

SELECT kvi.id AS kvi_id, kvi.vid AS kvi_vid, kvi.interpretation AS kvi_interpretation, kv.id AS kv_id, kv.org_id AS kv_org_id, kv.position AS kv_position 
FROM kv INNER JOIN kvi ON kv.id = kvi.vid 
WHERE kv.org_id = %(org_id_1)s ORDER BY kv.position

SELECT kvi.id AS kvi_id, kvi.vid AS kvi_vid, kvi.interpretation AS kvi_interpretation, anon_1.kv_id AS anon_1_kv_id, anon_1.kv_org_id AS anon_1_kv_org_id, anon_1.kv_position AS anon_1_kv_position 
FROM kvi, (SELECT kv.id AS kv_id, kv.org_id AS kv_org_id, kv.position AS kv_position 
FROM kv AS kv_1, kv INNER JOIN kvi ON kv.id = kvi.vid 
WHERE NOT (EXISTS (SELECT 1 
FROM kv AS kv_2 
WHERE kv_2.position = kv_1.position AND kv_2.org_id = %(org_id_1)s)) AND kv_1.org_id IN (__[POSTCOMPILE_org_id_2]) UNION SELECT kv.id AS kv_id, kv.org_id AS kv_org_id, kv.position AS kv_position 
FROM kv INNER JOIN kvi ON kv.id = kvi.vid 
WHERE kv.org_id = %(org_id_3)s) AS anon_1 ORDER BY anon_1.kv_position


### We can notice that the order by keep the explicit call to the column : here `kv.position`

In [30]:
from sqlalchemy.orm import contains_eager
from sqlalchemy.orm import aliased
from sqlalchemy import and_


specific_query = select(Kv).join(Kvi).options(contains_eager(Kv.kvis)).filter(Kv.org_id ==2)

with Session() as session :
    my_query = specific_query.order_by(Kv.position)
    print(my_query)
    print()

v1 = aliased(Kv)
v2 = aliased(Kv)
sub_exists = select(1).filter(and_(v2.position == v1.position, v2.org_id == 2)).exists()
exclude_query = select(Kv).join(Kvi).options(contains_eager(Kv.kvis)).filter(~sub_exists, v1.org_id.in_([1,3]))
with Session() as session :
    my_query = exclude_query.union(specific_query).order_by(Kv.position)
    print(my_query)

SELECT kvi.id, kvi.vid, kvi.interpretation, kv.id AS id_1, kv.org_id, kv.position 
FROM kv JOIN kvi ON kv.id = kvi.vid 
WHERE kv.org_id = :org_id_1 ORDER BY kv.position

SELECT kv.id, kv.org_id, kv.position 
FROM kv AS kv_1, kv JOIN kvi ON kv.id = kvi.vid 
WHERE NOT (EXISTS (SELECT 1 
FROM kv AS kv_2 
WHERE kv_2.position = kv_1.position AND kv_2.org_id = :org_id_1)) AND kv_1.org_id IN (__[POSTCOMPILE_org_id_2]) UNION SELECT kv.id, kv.org_id, kv.position 
FROM kv JOIN kvi ON kv.id = kvi.vid 
WHERE kv.org_id = :org_id_3 ORDER BY position


### When using 2.0 syntax, the Union query is much better and clean (SQL syntax). However, on the union query, the order by field loose the explicit call to the column `kv.position` became`position` and the engine does not know to which column it refers. Here we use a field from the KV table which could be fine because we call all the Kv colums. However, What can we d if we want to order by a column ofthe relationship ?

In [32]:
from sqlalchemy.orm import contains_eager
from sqlalchemy.orm import aliased
from sqlalchemy import and_


specific_query = select(Kv).join(Kvi).options(contains_eager(Kv.kvis)).filter(Kv.org_id ==2)

with Session() as session :
    my_query = specific_query.order_by(Kv.position)
    print(my_query)
    print()

v1 = aliased(Kv)
v2 = aliased(Kv)
sub_exists = select(1).filter(and_(v2.position == v1.position, v2.org_id == 2)).exists()
exclude_query = select(Kv).join(Kvi).options(contains_eager(Kv.kvis)).filter(~sub_exists, v1.org_id.in_([1,3]))
with Session() as session :
    my_query = exclude_query.union(specific_query).order_by(Kvi.interpretation)
    print(my_query)

SELECT kvi.id, kvi.vid, kvi.interpretation, kv.id AS id_1, kv.org_id, kv.position 
FROM kv JOIN kvi ON kv.id = kvi.vid 
WHERE kv.org_id = :org_id_1 ORDER BY kv.position

SELECT kv.id, kv.org_id, kv.position 
FROM kv AS kv_1, kv JOIN kvi ON kv.id = kvi.vid 
WHERE NOT (EXISTS (SELECT 1 
FROM kv AS kv_2 
WHERE kv_2.position = kv_1.position AND kv_2.org_id = :org_id_1)) AND kv_1.org_id IN (__[POSTCOMPILE_org_id_2]) UNION SELECT kv.id, kv.org_id, kv.position 
FROM kv JOIN kvi ON kv.id = kvi.vid 
WHERE kv.org_id = :org_id_3 ORDER BY interpretation


### As we can see, since the explicit call to the table was removed, `interpretation` instead of `kvi.interpretation` : the engine will not know what to do with this because column `position` is not present on the SELECT statement