# imports

In [None]:
from itertools import islice
import warnings

import pandas as pd

from sqlalchemy import MetaData
from sqlalchemy import Table

from sqlalchemy.orm import Query

from src.connect import create_wos_engine
from src.connect import create_wos_session

# setup

In [None]:
# hide the float decimals -- if any pk_items is NULL somewhere in the query, pandas renders is as float to be able to show NaN
pd.set_option("display.float_format", lambda x: "%.0f" % x)

In [None]:
# TODO: create_engine von sqlalchemy konzeptionell erklären

In [None]:
engine = create_wos_engine()

# Was / Warum ist SQLAlchemy?

SQLAlchemy ist ein python-package, das es ermöglicht, Abfragen an relationale Datenbanken zu stellen und dabei 

1. unabhängig vom 'Dialekt' zu sein. Aktuell unterstützt SQLAlchemy folgende (Auswahl an) Datenbank-backends: SQLite, PostgreSQL, MySQL/MariaDB, Oracle, MS-SQL

2. die Abfragen und Ergebnisse in Python-Objekten zu verwalten

## 3 Wichtige Grundbegriffe

### ORM

SQLAlchemy ist ein **O**bject **R**elational **M**apper. 
Ziel aller ORMs ist, eine Abstraktion zum Datenbank-Layer zu schaffen, um python Code statt SQL zu schreiben. 
Andere ORMs in Python wären z.B. Django ORM, Peewee, Pony, Tortoise.

SQLAlchemy verfolgt dieses Ziel durch zwei verschiedene Konzepte, was das Package beim ersten Betrachten sehr unübersichtlich macht (aber größtmöglichen Freiraum ermöglicht).
Es existieren daher zwei verschiedene APIs in SQLAlchemy: 1. SQLAlchemy Core, 2. SQLAlchemy ORM

### Engine
### Session

# Beispiel: Eine Query aus pandas

In [None]:
query = """
SELECT pk_items
       , fk_sources
       , ut_eid
       , article_title
       , doctype
       , d_author_cnt
FROM WOS_B_2020.items
FETCH FIRST 10 ROWS ONLY
"""

pd.read_sql(query, engine)

Unnamed: 0,pk_items,fk_sources,ut_eid,article_title,doctype,d_author_cnt
0,12868002,139222,000075342700002,Influence of residential fungal contamination on peripheral blood lymphocyte populations in children,Article,5
1,12868122,145724,A1990DK73800001,TRANSESOPHAGEAL ECHOCARDIOGRAPHY,Review,7
2,12868325,103331,A1995QE40800018,"EXPRESSION OF LACZ FROM THE HTRA, NIRB AND GROE PROMOTERS IN A SALMONELLA VACCINE STRAIN - INFLUENCE OF GROWTH IN MAMMALIAN-...",Article,6
3,12868421,102737,A1993KM96600011,MAO-A AND MAO-B INHIBITORS SELECTIVELY ALTER XENOPUS MUCUS-INDUCED BEHAVIORS OF SNAKES,Article,3
4,12868536,89029,A1991FL10600004,RECURRENT AND DENOVO RENAL-DISEASE AFTER KIDNEY-TRANSPLANTATION WITH OR WITHOUT CYCLOSPORINE-A,Article,4
5,12868673,81306,000406038400005,The ADRON-RM Instrument Onboard the ExoMars Rover,Article,18
6,12868831,89029,000233933400001,Use of EPO in critically ill patients with acute renal failure requiring renal replacement therapy,Article,3
7,12868962,41904,A1987G182200046,REPETITIVE REGION OF CALPASTATIN IS A FUNCTIONAL UNIT OF THE PROTEINASE-INHIBITOR,Article,6
8,12869037,70462,A1991FF88400007,"EFFECTS OF NA0344, A NEW SMOOTH-MUSCLE RELAXANT, ON THE ACTIN MYOSIN ATP INTERACTION AND MYOSIN LIGHT CHAIN PHOSPHORYLATION ...",Article,6
9,12869166,40174,000447150200001,Antidepressant-Like Effects of Low- and High-Molecular Weight FGF-2 on Chronic Unpredictable Mild Stress Mice,Article,6


# Query mit SQLAlchemy Einführung

In [None]:
# das MetaData Objekt bildet alle Tabellen in einem Datenbank User ab

meta = MetaData(bind=engine, schema="wos_b_2020")

In [None]:
table_authors = Table("authors", meta, autoload=True)
table_items = Table("items", meta, autoload=True)
table_itauinst = Table("items_authors_institutions", meta, autoload=True)

In [None]:
# TODO: Dieselbe Query wie oben nochmal mit SQLAlchemy machen

In [None]:
d = (
    Query(t_items)
    .join(t_itauinst)
    .with_entities(t_items.c.article_title, t_itauinst.c.fk_authors)
    .limit(10)
)

pd.read_sql(d.statement, engine)

Unnamed: 0,article_title,fk_authors
0,Influence of residential fungal contamination on peripheral blood lymphocyte populations in children,14541115.0
1,Influence of residential fungal contamination on peripheral blood lymphocyte populations in children,21394516.0
2,Influence of residential fungal contamination on peripheral blood lymphocyte populations in children,28637542.0
3,Influence of residential fungal contamination on peripheral blood lymphocyte populations in children,29002553.0
4,Influence of residential fungal contamination on peripheral blood lymphocyte populations in children,29002553.0
5,Influence of residential fungal contamination on peripheral blood lymphocyte populations in children,31753127.0
6,Influence of residential fungal contamination on peripheral blood lymphocyte populations in children,
7,Influence of residential fungal contamination on peripheral blood lymphocyte populations in children,
8,Influence of residential fungal contamination on peripheral blood lymphocyte populations in children,
9,Influence of residential fungal contamination on peripheral blood lymphocyte populations in children,


In [None]:
query = (
    Query(table_items)
    .with_entities(
        table_items.c.pk_items,
        table_items.c.fk_sources,
        table_items.c.ut_eid,
        table_items.c.article_title,
        table_items.c.doctype,
        table_items.c.d_author_cnt,
    )
    .limit(10)
)

pd.read_sql(query.statement, engine)

Unnamed: 0,pk_items,fk_sources,ut_eid,article_title,doctype,d_author_cnt
0,12868002,139222,000075342700002,Influence of residential fungal contamination on peripheral blood lymphocyte populations in children,Article,5
1,12868122,145724,A1990DK73800001,TRANSESOPHAGEAL ECHOCARDIOGRAPHY,Review,7
2,12868325,103331,A1995QE40800018,"EXPRESSION OF LACZ FROM THE HTRA, NIRB AND GROE PROMOTERS IN A SALMONELLA VACCINE STRAIN - INFLUENCE OF GROWTH IN MAMMALIAN-...",Article,6
3,12868421,102737,A1993KM96600011,MAO-A AND MAO-B INHIBITORS SELECTIVELY ALTER XENOPUS MUCUS-INDUCED BEHAVIORS OF SNAKES,Article,3
4,12868536,89029,A1991FL10600004,RECURRENT AND DENOVO RENAL-DISEASE AFTER KIDNEY-TRANSPLANTATION WITH OR WITHOUT CYCLOSPORINE-A,Article,4
5,12868673,81306,000406038400005,The ADRON-RM Instrument Onboard the ExoMars Rover,Article,18
6,12868831,89029,000233933400001,Use of EPO in critically ill patients with acute renal failure requiring renal replacement therapy,Article,3
7,12868962,41904,A1987G182200046,REPETITIVE REGION OF CALPASTATIN IS A FUNCTIONAL UNIT OF THE PROTEINASE-INHIBITOR,Article,6
8,12869037,70462,A1991FF88400007,"EFFECTS OF NA0344, A NEW SMOOTH-MUSCLE RELAXANT, ON THE ACTIN MYOSIN ATP INTERACTION AND MYOSIN LIGHT CHAIN PHOSPHORYLATION ...",Article,6
9,12869166,40174,000447150200001,Antidepressant-Like Effects of Low- and High-Molecular Weight FGF-2 on Chronic Unpredictable Mild Stress Mice,Article,6


## Eine Query mit SQLAlchemy Core

In [None]:
query = Query(table_authors).filter(
    table_authors.c.firstname == "Niklas", table_authors.c.lastname == "Luhmann"
)

In [None]:
pd.read_sql(query.statement, engine)

Unnamed: 0,pk_authors,author_id,fullname,lastname,firstname,middlename,author_group,role,orcid_id,orcid_id_tr,r_id,r_id_tr
0,8495791,,"Luhmann, Niklas",Luhmann,Niklas,,,researcher_id,0000-0003-1108-058X,,,
1,31991735,,"Luhmann, N",Luhmann,Niklas,,,author,,,,
2,27758888,,"Luhmann, Niklas",Luhmann,Niklas,,,researcher_id,0000-0002-3912-0769,,,


## Queries können erweitert werden

In [None]:
extended_query = query.filter(table_authors.c.role == "author")

In [None]:
pd.read_sql(extended_query.statement, engine)

Unnamed: 0,pk_authors,author_id,fullname,lastname,firstname,middlename,author_group,role,orcid_id,orcid_id_tr,r_id,r_id_tr
0,31991735,,"Luhmann, N",Luhmann,Niklas,,,author,,,,


## Ein Join mit SQLAlchemy

In [None]:
items = (
    extended_query.join(
        table_itauinst, table_authors.c.pk_authors == table_itauinst.c.fk_authors
    )
    .join(table_items, table_itauinst.c.fk_items == table_items.c.pk_items)
    .with_entities(
        table_items.c.pk_items,
        table_items.c.pubyear,
        table_items.c.doi,
        table_items.c.doctype,
        table_items.c.article_title,
    )
    .distinct()
    .order_by(table_items.c.pubyear.asc())
)

In [None]:
pd.read_sql(items.statement, engine)

Unnamed: 0,pk_items,pubyear,doi,doctype,article_title
0,186493011,2013,10.1016/j.drugpo.2012.08.005,Article,An urgent need to scale-up injecting drug harm reduction services in Tanzania: Prevalence of blood-borne viruses among drug ...
1,5231207373,2014,10.1016/j.drugpo.2014.01.007,Article,"Hepatitis C among people who inject drugs in Tbilisi, Georgia: An urgent need for prevention and treatment"
2,340623368,2015,10.1016/j.drugpo.2015.07.016,Article,Access to hepatitis C treatment for people who inject drugs in low and middle income settings: Evidence from 5 countries in ...
3,326657354,2016,10.1016/j.drugpo.2016.02.010,Article,"Prevalence and risk factors associated with HIV and tuberculosis in people who use drugs in Abidjan, Ivory Coast"
4,241868001,2017,10.1186/s12879-017-2767-0,Article,Survey of programmatic experiences and challenges in delivery of hepatitis B and C testing in low- and middle-income countries
5,322384438,2017,10.1063/1.4989775,Article,Effect of oxygen plasma on nanomechanical silicon nitride resonators
6,15481434188,2017,10.1007/s11577-017-0430-9,Editorial Material,Action Theory and System Theory
7,15545487522,2017,10.5771/0038-6073-2017-1-5,Article,The inner differentiation of society: stratification and functional differentiation
8,94881331,2018,,Meeting Abstract,Modelling the Impact of Prevention and Treatment Interventions on HIV and Hepatitis C Virus Transmission Among PWID in Nairobi
9,248403551,2018,10.1016/j.drugpo.2017.11.014,Article,Harm reduction-based and peer-supported hepatitis C treatment for people who inject drugs in Georgia
