# imports

In [None]:
import pandas as pd
from sqlalchemy.orm import Query
from sqlalchemy import func
from sqlalchemy import and_

from src.connect import create_wos_session
from src.models import Author, ItemAuthorInstitution, Item, Source

# setup

In [None]:
engine, s = create_wos_session()

In [None]:
sources = [
    "astronomy and astrophysics review",
    "living reviews in solar physics",
    "astrophysical journal letters",
    "astrophysical journal, supplement series",
    "nature astronomy",
    "astronomical journal",
    "space science reviews",
    "astrophysical journal",
    "publications of the astronomical society of the pacific",
    "astronomy and astrophysics",
    "monthly notices of the royal astronomical society: letters",
    "monthly notices of the royal astronomical society",
    "publication of the astronomical society of japan",
    "new astronomy reviews",
    "icarus",
    "physics of the dark universe",
    "astrodynamics",
    "publications of the astronomical society of australia",
    "chinese physics c",
    "acta astronomica",
    "frontiers in astronomy and space sciences",
    "astroparticle physics",
    "revista mexicana de astronomia y astrofisica",
    "physics of the earth and planetary interiors",
    "journal of high energy astrophysics",
    "molecular astrophysics",
    "solar physics",
    "journal of cosmology and astroparticle physics",
    "international journal of modern physics d",
    "experimental astronomy",
]

In [None]:
len(sources)

30

In [None]:
base_query = (
    s.query(Source)
    .join(Source.items, isouter=True)
    .join(Item.authors, isouter=True)
    .filter(
        and_(
            Item.pubyear.between(2019, 2020),
            func.lower(Source.sourcetitle).in_(sources),
        )
    )
)

In [None]:
%%time
base_query.with_entities(Author.pk_authors).distinct().count()

CPU times: user 79.3 ms, sys: 19 ms, total: 98.3 ms
Wall time: 5min 44s


66449

In [None]:
%%time
base_query.with_entities(Item.pk_items).distinct().count()

CPU times: user 9.02 ms, sys: 4.74 ms, total: 13.8 ms
Wall time: 3min 8s


13895

In [None]:
%%time
base_query.with_entities(Source.pk_sources).distinct().count()

CPU times: user 11.1 ms, sys: 4.57 ms, total: 15.6 ms
Wall time: 3min 14s


23

In [None]:
engine.dispose()

In [None]:
import pandas as pd
from sqlalchemy.orm import Query

from src.connect import create_wos_session
from src.models import Author, ItemAuthorInstitution, Item

In [None]:
query = """
SELECT 
    a.PK_AUTHORS, a.FULLNAME, a.LASTNAME, a.FIRSTNAME,
    i.PK_ITEMS, i.ARTICLE_TITLE, i.PUBYEAR,
    s.SOURCETITLE
    FROM 
        wos_b_2020.SOURCES s
    LEFT JOIN
        wos_b_2020.ITEMS i
        ON
            s.PK_SOURCES = i.FK_SOURCES
    LEFT JOIN
        wos_b_2020.ITEMS_AUTHORS_INSTITUTIONS iai
        ON
            i.PK_ITEMS = iai.FK_ITEMS
    LEFT JOIN
        wos_b_2020.AUTHORS a
        ON
            iai.FK_AUTHORS = a.PK_AUTHORS
    WHERE 
        lower(s.SOURCETITLE) in (
            'astronomy and astrophysics review',
            'living reviews in solar physics',
            'astrophysical journal letters',
            'astrophysical journal, supplement series',
            'nature astronomy',
            'astronomical journal',
            'space science reviews',
            'astrophysical journal',
            'publications of the astronomical society of the pacific',
            'astronomy and astrophysics',
            'monthly notices of the royal astronomical society: letters',
            'monthly notices of the royal astronomical society',
            'publication of the astronomical society of japan',
            'new astronomy reviews',
            'icarus',
            'physics of the dark universe',
            'astrodynamics',
            'publications of the astronomical society of australia',
            'chinese physics c',
            'acta astronomica',
            'frontiers in astronomy and space sciences',
            'astroparticle physics',
            'revista mexicana de astronomia y astrofisica',
            'physics of the earth and planetary interiors',
            'journal of high energy astrophysics',
            'molecular astrophysics',
            'solar physics',
            'journal of cosmology and astroparticle physics',
            'international journal of modern physics d',
            'experimental astronomy'
            )
    AND
        i.PUBYEAR BETWEEN 2019 AND 2020
"""