# Add Articles to DB
    1. Get the list of new articles from CSV file
    2. Get article data from crossref
    3. Add articles to DB

In [44]:
# Libraries
# library containign functions that read and write to csv files
import lib.handle_csv as csvh
# library for getting data from crossref
import lib.crossref_api as cr_api
# library for mapping json data
import lib.handle_json as hjson
# library for connecting to the db
import lib.handle_db as dbh

db_conn = dbh.DataBaseAdapter('ukch_articles.sqlite')

input_file = "processed_csv/AddNewArticles202002.csv"

csv_articles, _ = csvh.get_csv_data(input_file,'Num')


cr_articles = {} # Num (from CHUK), DOI, type, and other fiedls from CR.
article_columns=["Num"]
# list of article authors from cross ref
cr_authors = {} # AuthorNum, Firstname, Middle name, Last Name
author_columns = ["AuthorNum", "FirstName", "MiddleName", "LastName"]
# list of article-author links
cr_article_authour_link = {} # AuthorNum, DOI
article_authour_columns = ["DOI","AuthorNum"]

# get institutions list from affiliations table
institutions_list = db_conn.get_value_list("Affiliations", "institution")
# get coutries from affiliations table
countries_list = db_conn.get_value_list("Affiliations","country")
# get department list from affiliations table
department_list = db_conn.get_value_list("Affiliations","department")
# get faculty list from affiliations table
faculty_list = db_conn.get_value_list("Affiliations","faculty")
# get research group list from affiliations table
group_list = db_conn.get_value_list("Affiliations", "work_group")


for art_num in csv_articles:
    article_title = csv_articles[art_num]['Title']
    doi_text = csv_articles[art_num]['DOI']
    # get article data from CrossRef
    while True:
        try_n = 1
        article_data = cr_api.getBibData(doi_text)
        if article_data != {}:
            break
        else:  
            print("****************************************************")
            print("retry", try_n, article_data)
            try_n += 1
    # names for article data columns include/ignore
    data_keys = list(article_data.keys())
    for key in data_keys:
        if not (key in article_columns) and \
           key not in ['author', 'assertion', 'indexed', 'funder',
                       'content-domain','created','update-policy', 'source',
                       'is-referenced-by-count','prefix','member',
                       'reference','original-title','language','deposited',
                       'score', 'subtitle', 'short-title', 'issued',
                       'alternative-id','relation','ISSN','container-title-short']:
            article_columns.append(key)
    new_row = {}
    # map article data
    for key in article_columns:
        if key == 'Num':
            new_row['NumUKCH'] = art_num
        else:
            if key in article_data.keys():
                new_row[key] = hjson.jsonToPlainText(article_data[key])
    #print("*******************ARTICLE DATA*********************")
    #print(new_row)
    # map author data
    cr_articles[art_num] = new_row
        #print(cr_articles)
    for author in article_data['author']:
        new_author={}
        aut_num = len(cr_authors) + 1
        new_author["AuthorNum"] = aut_num
        new_author['LastName'] = author['family']
        if 'given' in author.keys():
            new_author['Name'] = author['given']
        if 'given' in author.keys():
            new_author['sequence'] = author['sequence']
        if 'ORCID' in author.keys():
            new_author['ORCID'] = author['ORCID']
        else:
            new_author['ORCID'] = "None"
        # parse affiliations and create affiliation links
        if 'affiliation'in author.keys():
            affiliations=""
            for affl in author['affiliation']:
                affiliations += affl['name'] + "|"
            new_author['affiliations'] = affiliations
            inspected = False
            while not inspected:
                #new_title = working_file[art_num]['Title']
                print('Affiliation:', affiliations)
                print('***************************************************************')
                print("Options:\n\ta) single\n\tb) multiple")
                print("selection:")
                usr_select = input()
                if usr_select == 'b':
                    #working_file[art_num]['ignore']=3 # visual inspection
                    inspected = True
                    print("parse multiple")
                elif usr_select == 'a':
                    inspected = True
                    prin("parse single")
            
            
            
            
        cr_authors[aut_num] = new_author
        art_auth_link = len(cr_article_authour_link)+1
        new_art_auth_link={}
        new_art_auth_link['DOI'] = doi_text
        new_art_auth_link['AuthorNum'] = aut_num
        cr_article_authour_link[art_auth_link] = new_art_auth_link
    #print("********************AUTHOR DATA*********************")
    #print(article_data['author'])

Affiliation: UK Catalysis Hub|Research Complex at Harwell|Rutherford Appleton Laboratory|Didcot OX11 0FA|UK|
***************************************************************
Options:
	a) single
	b) multiple
selection:
a


NameError: name 'prin' is not defined

In [45]:
    country_synonyms = {'UK':'United Kingdom','U.K.':'United Kingdom','G.B.':'United Kingdom'}
    print (author['affiliation'], len(author['affiliation']))
    inst_str = dept_str = faculty_str = group_str = ctry_str = ""
    qry_where_str = ""
    addr_list=[]
    for indx in range(0, len(author['affiliation'])):
        #print (author['affiliation'][indx]['name'])
        if author['affiliation'][indx]['name'] in institutions_list:
            inst_str = author['affiliation'][indx]['name']
        elif author['affiliation'][indx]['name'] in department_list:
            dept_str = author['affiliation'][indx]['name']
        elif author['affiliation'][indx]['name'] in faculty_list:
            faculty_str = author['affiliation'][indx]['name']
        elif author['affiliation'][indx]['name'] in group_list: 
            group_str = author['affiliation'][indx]['name']
        elif author['affiliation'][indx]['name'] in countries_list:
            ctry_str = author['affiliation'][indx]['name']
        elif author['affiliation'][indx]['name'] in country_synonyms:
            ctry_str = country_synonyms[author['affiliation'][indx]['name']]
        else:
            addr_list.append(author['affiliation'][indx]['name'])
    
    qry_where_str += "institution = '" + inst_str + "'"        
    if dept_str != "":
        qry_where_str += " AND department = '" + dept_str + "'"
    if faculty_str != "":
        qry_where_str += " AND faculty = '" + faculty_str + "'"
    if group_str != "":
        qry_where_str += " AND group = '" + group_str + "'"
    if ctry_str != "":
        qry_where_str += " AND country = '" + ctry_str + "'"
    print ('Institution:', inst_str)
    print ('Department:', dept_str)
    print ('Faculty:', faculty_str)
    print ('Group:', group_str)
    print ('Country:', ctry_str) 
    print ('Address',  addr_list)
    print (qry_where_str)
    db_conn.get_values("affiliations","id",qry_where_str)

[{'name': 'UK Catalysis Hub'}, {'name': 'Research Complex at Harwell'}, {'name': 'Rutherford Appleton Laboratory'}, {'name': 'Didcot OX11 0FA'}, {'name': 'UK'}] 5
Institution: Research Complex at Harwell
Department: UK Catalysis Hub
Faculty: 
Group: 
Country: United Kingdom
Address ['Rutherford Appleton Laboratory', 'Didcot OX11 0FA']
institution = 'Research Complex at Harwell' AND department = 'UK Catalysis Hub' AND country = 'United Kingdom'


AttributeError: 'DataBaseAdapter' object has no attribute 'get_values'