# Add mapping of ISSN to ISSN-L to database

`mid.journal_issn_to_issnl`

In [1]:
import sys

In [2]:
sys.path.append('../..')

In [3]:
from datetime import datetime, timezone
import json
from tqdm import tqdm
from pathlib import Path
import gzip

In [4]:
import pandas as pd
import numpy as np

In [5]:
from app import db
from models import Source, ISSNtoISSNL
from sqlalchemy import text
from sqlalchemy.orm import Load
from sqlalchemy.exc import MultipleResultsFound

139626634576256: loading merged_into_institutions_dict
139626634576256: loading valid concept IDs
139626634576256: loading valid topic IDs


In [6]:
db.session.close()

In [7]:
%%time
sq = """select * from issn_audit_20240321.issn_ic_datafile_202402"""
df_datafile = pd.read_sql_query(sq, db.engine)
len(df_datafile)

CPU times: user 569 ms, sys: 170 ms, total: 739 ms
Wall time: 1.57 s


214606

In [8]:
df_issns = df_datafile[['issn', 'issnl', 'category', 'comment']].dropna(subset=['issn', 'issnl'])
print(len(df_issns))
ignore = [
    '0000-0000',
]
df_issns = df_issns[~(df_issns['issnl'].isin(ignore))]
print(len(df_issns))

213508
213489


In [9]:
df_issns.set_index('issn', verify_integrity=True, inplace=True)

In [10]:
df_issns.info()

<class 'pandas.core.frame.DataFrame'>
Index: 213489 entries, 2221-9781 to 8756-758X
Data columns (total 3 columns):
 #   Column    Non-Null Count   Dtype 
---  ------    --------------   ----- 
 0   issnl     213489 non-null  object
 1   category  213489 non-null  object
 2   comment   213489 non-null  object
dtypes: object(3)
memory usage: 6.5+ MB


In [11]:
# update db
try:
    updated_date = datetime.now(timezone.utc).isoformat()
    for issn, row in tqdm(df_issns.iterrows(), total=len(df_issns)):
        o = ISSNtoISSNL(
            issn=issn,
            issnl=row['issnl'],
            note=row['comment'] if row['comment'] else None,
            updated_date=updated_date
        )
        db.session.add(o)
    # commit once
    db.session.commit()
finally:
    db.session.close()

100%|██████████| 213489/213489 [00:13<00:00, 15882.40it/s]


In [12]:
# get dict of issnl to issn
x = df_datafile.dropna(subset=['issnl'])
x = x[x['category'].isin(['Register', 'Work', 'Free', 'Validation Request'])]
x = x[['issn', 'issnl']]
issnl_to_issn = {}
ignore = [
    '0000-0000',
]
x = x[~(x['issnl'].isin(ignore))]
for name, gbdf in x.groupby('issnl'):
    if name in ignore:
        continue
    issnl_to_issn[name] = gbdf['issn'].tolist()
len(issnl_to_issn)

140984