# Check fo duplicate articles and data objects
A list of publications is obtainded from the app database. This list will contain a titles, IDs and DOIs which need to be explored to look for duplicates. 
The steps of the process are: 
 1. get a Title, DOI, and URL for each publication
 2. revise each element of the list for duplicates

In [1]:
# Libraries
# library containign functions that read and write to csv files
import lib.handle_csv as csvh
# library for connecting to the db
import lib.handle_db as dbh
# library for handling text matchings
import lib.text_comp as txtc
# library for getting data from crossref
import lib.crossref_api as cr_api
# library for handling url searchs
import lib.handle_urls as urlh
# managing files and file paths
from pathlib import Path
# add aprogress bar
from tqdm import notebook
#library for handling json files
import json
# library for using regular expressions
import re
# library for handling http requests
import requests
# import custom functions (common to various notebooks)
import processing_functions as pr_fns

# datetime parsing
from datetime import datetime

current_step = 1

## Verify if there are duplicates in articles

1. Open the current publication list from the appdb
2. read each entry and check if there are duplicates in doi, url or title



In [2]:
# deal with no pdf_file column
def get_pubs_list(db_path):
    pubs_list = None
    try:
        pubs_list = pr_fns.get_pub_data(db_path)
    except Exception as inst:
        if 'pdf_file' in inst.args[0]:
            print('problem articles table does not have pdf_file column')
            pass
    try:
        if pubs_list == None:
            pubs_list = pr_fns.get_pub_app_data(db_path)
    except Exception as inst:
        print(type(inst))
        print(inst.args)
        print(inst)
        print('another problem')    
    return pubs_list

# 1 current app DB
db_name = 'production'
#ukchapp_db = "../mcc_data/"+db_name+".sqlite3"
ukchapp_db = "./db_files/"+db_name+".sqlite3"
while not Path(ukchapp_db).is_file():
    print('Please enter the name of app db file:')
    ukchapp_db = input()
    
#  get publication data from the ukch app
app_pubs = get_pubs_list(ukchapp_db)

In [3]:
    
# 2 read each entry and check if there are duplicates in doi, url or title
dup_list={}
dup_count = 0
if current_step == 1 and app_pubs != None:  
    dups = []
    for idx, a_pub in enumerate(notebook.tqdm(app_pubs)):
        pub_id = a_pub[0]
        pub_title = a_pub[1]
        pub_doi = a_pub[2]
        pub_link = a_pub[3]
        # verfy if dois are duplicated
        if pub_doi != None and pub_doi != "":
            for i_indx in range(idx+1, len(app_pubs)):
                #print(pub_doi, app_pubs[i_indx][2])
                if app_pubs[i_indx][2]!=None and pub_doi.strip().lower() ==  app_pubs[i_indx][2].strip().lower():
                    print("\nDOI", pub_doi, "duplicated at:", i_indx, app_pubs[i_indx], app_pubs[idx] )
                    a_dup = {'pub_comp': app_pubs[idx], 'pub_dup': app_pubs[i_indx],"dup_at":'DOI'}
                    dup_count+=1
                    dup_list[dup_count] = a_dup
        # verify if urls are all unique
        if pub_link != None:
            for i_indx in range(idx+1, len(app_pubs)):
                if app_pubs[i_indx][3]!=None and pub_link.strip().lower() ==  app_pubs[i_indx][3].strip().lower():
                    print("\nLink", pub_link, "duplicated at:", i_indx, app_pubs[i_indx], app_pubs[idx] )    
                    a_dup = {'pub_comp': app_pubs[idx], 'pub_dup': app_pubs[i_indx],"dup_at":'URL'}
                    dup_count+=1
                    dup_list[dup_count] = a_dup
        # verify if titles are all unique
        if pub_title != None:
            for i_indx in range(idx+1, len(app_pubs)):
                similarity = txtc.similar(pub_title.strip().lower(), app_pubs[i_indx][1].strip().lower())
                #print(similarity)
                if app_pubs[i_indx][1]!=None and pub_title.strip().lower() ==  app_pubs[i_indx][1].strip().lower():
                    print("\nTitle", pub_title, "duplicated at:", i_indx,app_pubs[i_indx][1], app_pubs[idx][1])     
                    print("Similarity:", similarity)
                    a_dup = {'pub_comp': app_pubs[idx], 'pub_dup': app_pubs[i_indx],"dup_at":'Title'}
                    dup_count+=1
                    dup_list[dup_count] = a_dup
                elif similarity > 0.8:
                    print("Title", similarity, ":\n\t", pub_id, pub_title, "\nSimilar to:\n\t ", i_indx,app_pubs[i_indx][1]) 
if len(dup_list) > 0:
        csvh.write_csv_data(dup_list, 'dup_'+db_name+'.csv')
else:
    print ("No duplicate articles in DB")

  0%|          | 0/750 [00:00<?, ?it/s]

Title 0.8016194331983806 :
	 26 Comparing ammonia diffusion in NH3-SCR zeolite catalysts: a quasielastic neutron scattering and molecular dynamics simulation study 
Similar to:
	  95 Methanol diffusion in zeolite HY: a combined quasielastic neutron scattering and molecular dynamics simulation study
Title 0.8110236220472441 :
	 111 Methanol diffusion in zeolite HY: a combined quasielastic neutron scattering and molecular dynamics simulation study 
Similar to:
	  649 Methanol diffusion and dynamics in zeolite H-ZSM-5 probed by quasi-elastic neutron scattering and classical molecular dynamics simulations
Title 0.8016877637130801 :
	 215 Dinuclear Zinc Salen Catalysts for the Ring Opening Copolymerization of Epoxides and Carbon Dioxide or Anhydrides 
Similar to:
	  549 Synergic Heterodinuclear Catalysts for the Ring-Opening Copolymerization (ROCOP) of Epoxides, Carbon Dioxide, and Anhydrides
Title 0.8865979381443299 :
	 582 Understanding the mechanochemical synthesis of the perovskite LaMn

## Verify if there are duplicates in data objects

1. get the current data objects list from the appdb
2. read each entry and check if there are duplicates in doi, url or title



In [4]:
# 1 current app DB
dup_list={}
#db_name = 'development'
#ukchapp_db = "db_files/"+db_name+".sqlite3"
#ukchapp_db = "../mcc_data/"+db_name+".sqlite3"
while not Path(ukchapp_db).is_file():
    print('Please enter the name of app db file:')
    ukchapp_db = input()
    
#  get datasets list from the ukch app
app_datasetes = pr_fns.get_dataset_data(ukchapp_db)

# 2 read each entry and check if there are duplicates in doi, url or title
if current_step == 1:  
    dup_list={}
    dup_count = 0
    for idx, a_ds in enumerate(notebook.tqdm(app_datasetes)):
        ds_id = a_ds[0]
        ds_doi = a_ds[1]
        ds_url = a_ds[2]
        ds_name = a_ds[3]
        #print (ds_id, ds_doi, ds_url, ds_name)
        # verfy if dois are duplicated
        if ds_doi != None and ds_doi != '':
            for i_indx in range(idx+1, len(app_datasetes)):
                #print(pub_doi, app_pubs[i_indx][1])
                if app_datasetes[i_indx][1]!=None and ds_doi.strip().lower() ==  app_datasetes[i_indx][1].strip().lower():
                    print("\nDuplicate DOI found",
                          "\nDO ID:", ds_id, "Title:", ds_name, "\nDOI:", ds_doi,
                          "\nDO ID:", app_datasetes[i_indx][0], "Title:",
                          app_datasetes[i_indx][3], "\nDOI:", app_datasetes[i_indx][1])
                    a_dup = {'pub_comp': app_datasetes[idx], 'pub_dup': app_datasetes[i_indx],"dup_at":'DOI'}
                    dup_count+=1
                    dup_list[dup_count] = a_dup
        # verify if urls are all unique, if doi not equal it is OK, in some cases
        if ds_url != None:
            for i_indx in range(idx+1, len(app_datasetes)):
                #print(app_datasetes[i_indx])
                if app_datasetes[i_indx][2]!=None and ds_url.strip().lower() ==  app_datasetes[i_indx][2].strip().lower():
                    print("\nDuplicate found URL:",
                          "\nDO ID:", ds_id, "Title:", ds_name, "\nURL:", ds_url,
                          "\nDO ID:", app_datasetes[i_indx][0], "Title:", 
                          app_datasetes[i_indx][3], "\nURL:", app_datasetes[i_indx][2]) 
                    a_dup = {'pub_comp': app_datasetes[idx], 'pub_dup': app_datasetes[i_indx],"dup_at":'URL'}
                    dup_count+=1
                    dup_list[dup_count] = a_dup
        # verify if titles are all unique, if doi not equal it is OK in some cases
        if ds_name != None:
            for i_indx in range(idx+1, len(app_datasetes)):
                similarity = txtc.similar(ds_name.strip().lower(), app_datasetes[i_indx][3].strip().lower())
                #print(similarity)
                if app_datasetes[i_indx][1]!=None and ds_name.strip().lower() ==  app_datasetes[i_indx][3].strip().lower():
                    if app_datasetes[i_indx][1] != None and app_datasetes[idx][1] != None and \
                    app_datasetes[i_indx][1] == app_datasetes[idx][1]:
                        print("\nDuplicate found Similarity:", similarity,
                              "\nDO ID:", ds_id, "Title:", ds_name,"\nDO ID:", 
                              app_datasetes[i_indx][0], "Title:", app_datasetes[i_indx][3])
                        a_dup = {'pub_comp': app_datasetes[idx], 'pub_dup': app_datasetes[i_indx],"dup_at":'Title'}
                        dup_count+=1
                        dup_list[dup_count] = a_dup
                #elif similarity > 0.8:
                #    print(similarity, "Title:\n\t", ds_name, "\n\t- similar at:\n\t", i_indx,app_datasetes[i_indx][3]) 
                #    break
if len(dup_list) > 0:
    csvh.write_csv_data(dup_list, 'dup_do_'+db_name+'.csv')
    print("duplicates saved to",'dup_do_'+db_name+'.csv')
else:
    print ("No duplicate DOs in DB")

  0%|          | 0/1108 [00:00<?, ?it/s]


Duplicate found Similarity: 1.0 
DO ID: 907 Title: SI for: Teaching Enzyme Catalysis Using Interactive Molecular Dynamics in Virtual Reality 
DO ID: 908 Title: SI for: Teaching Enzyme Catalysis Using Interactive Molecular Dynamics in Virtual Reality
duplicates saved to dup_do_production.csv


# Verify authors

1. verify that there are no authors with no articles in the DB
2. verify that authors are unique remove close matches (need to check spellings)

In [5]:
def make_like_str(a_string):
    like_str = "%" + re.sub(r'[^a-zA-Z\s:]', '%', a_string) + "%"
    return like_str

def get_all_authors():
    db_conn = dbh.DataBaseAdapter(ukchapp_db)
    s_table = 'authors'
    s_fields = 'id, given_name, last_name, orcid'
    s_where = 'isap IS NULL' # 1 displayed authors - 0/NULL the rest
    authors_list = db_conn.get_values(s_table, s_fields, s_where)
    return authors_list

def get_null_authors():
    db_conn = dbh.DataBaseAdapter(ukchapp_db)
    s_table = 'authors'
    s_fields = 'id, given_name, last_name, orcid'
    s_where = 'id NOT IN (SELECT article_authors.author_id FROM article_authors)'
    authors_list = db_conn.get_values(s_table, s_fields, s_where)
    return authors_list

def get_similar_authors(name,surname,orcid):
    db_conn = dbh.DataBaseAdapter(ukchapp_db)
    s_table = 'authors'
    s_fields = 'id, given_name, last_name, orcid'
    like_surname = make_like_str(surname)
    surname.replace("'","''")
    s_where = "(orcid = '%s' AND last_name = '%s')"%(orcid, surname)
    s_where += "OR(given_name = '%s' AND last_name = '%s')"%(name, surname)
    s_where += "OR(last_name = '%s')"%(surname)
    s_where += "OR(last_name LIKE '%s')"%(like_surname)
    authors_list = db_conn.get_values(s_table, s_fields, s_where)
    return authors_list

def count_linked(author_id):
    db_conn = dbh.DataBaseAdapter(ukchapp_db)
    s_table = 'article_authors'
    s_fields = 'id, author_id'
    s_where = "(author_id = %s)"%(author_id)
    aa_list = db_conn.get_values(s_table, s_fields, s_where)
    return len(aa_list)

In [6]:
from IPython.display import clear_output

def save_ok_list(values_list, file_name):
    with open(file_name, 'w') as f:
        for an_id in values_list:
            f.write(str(an_id)+'\n')

def open_ok_list(file_name):
    with open(file_name) as f:
        lines = f.readlines()
    from_file = []
    for a_line in lines:
        from_file.append(int(a_line.replace('\n','')))
    return from_file

def add_to_ok_list(a_value, file_name):
    with open(file_name, 'a') as f:
        f.write(str(a_value)+'\n')

In [7]:
def is_single_word(a_word, another_word):
    single_word = False
    in_word = another_word.lower().find(a_word.lower())
    if in_word >= 0:
        single_word = True
        if in_word > 0 and  another_word[in_word-1].isalpha():
            sinlge_word = False
        if in_word + len(a_word) < len(another_word)-1 and another_word[in_word + len(a_word)].isalpha():
            single_word = False
    return single_word

def prune_similar_surnames(the_similars, a_surname):
    pruned_list = []
    for a_simi in the_similars:
        if a_simi[2] == a_surname or is_single_word(a_surname,a_simi[2]) :
            pruned_list.append(a_simi)
    return pruned_list

def get_initials(given_names):
    initials_1 =  [a_letter for a_letter in given_names if a_letter.isupper() ] 
    names = given_names.split()
    initials_2 = [a_name[0] for a_name in given_names]
    ri_1 = ". ".join(initials_1)+"."
    ri_2 = " ".join(initials_1)
    return ri_1, ri_2
    
def prune_similar_names(the_similars, a_name):
    pruned_list = []
    dot_initials, initials = get_initials(a_name)
    for a_simi in the_similars:
        if a_simi[1] == a_name or is_single_word(a_name,a_simi[1]) :
            pruned_list.append(a_simi)
        elif initials == a_simi[1] or dot_initials == a_simi[1]:
            pruned_list.append(a_simi)    
    return pruned_list

In [8]:
def set_author_value(a_id, a_column, a_value):
    db_conn = dbh.DataBaseAdapter(ukchapp_db)
    db_conn.set_value_table('authors', a_id, a_column, a_value)


def update_author(old_author, a_id, a_name, a_surname, a_orcid):
    if a_id != old_author[0]:
        return # do not update different authors        
    if a_name != old_author[1]:
        set_author_value(a_id, 'given_name', a_name)
    if a_surname != old_author[2]:
        set_author_value(a_id, 'last_name', a_surname)
    if a_orcid != old_author[3]:
        set_author_value(a_id, 'given_name', a_orcid)

def update_article_authors(new_id, old_id):
    db_conn = dbh.DataBaseAdapter(ukchapp_db)
    s_where = "author_id = %s" % (old_id)
    aa_ids = db_conn.get_values('article_authors', 'id', s_where)
    print(aa_ids)
    for an_id in aa_ids:
        db_conn.set_value_table('article_authors', an_id[0], 'author_id', new_id)

def delete_author(a_id):
    db_conn = dbh.DataBaseAdapter(ukchapp_db)
    db_conn.connection.execute("DELETE FROM authors WHERE id = %s" % (a_id ))
    db_conn.connection.commit()
    
def merge_authors(an_author, auth_similars):
    auth_id = an_author[0]
    auth_name = an_author[1]
    auth_surname = an_author[2]
    auth_orcid = an_author[3]

    for a_result in auth_similars:
        if auth_id > a_result[0]:
            auth_id = a_result[0]
        if auth_name != a_result[1] and len(auth_name) < len(a_result[1]):
            print("Which name \n\t 1)", auth_name, "\n\t 2)", a_result[1])
            opt_name = input()
            if opt_name == '2': auth_name = a_result[1] 
        if auth_surname != a_result[2] and len(auth_surname) < len(a_result[2]):
            print("Which surname \n\t 1)", auth_surname, "\n\t 2)", a_result[2])
            opt_name = input()
            if opt_name == '2': auth_surname = a_result[2] 
        if auth_orcid != a_result[3]:
            print("Which ORCID \n\t 1)", auth_orcid, "\n\t 2)", a_result[3])
            opt_name = input()
            if opt_name == '2': auth_orcid = a_result[3]

    if auth_id != an_author[0] or auth_name != an_author[1] or auth_surname != an_author[2] or auth_orcid != an_author[3]:
        print ("Will update", an_author, "to", auth_id , auth_name , auth_surname,auth_orcid )

    for a_result in auth_similars:
        if auth_id != a_result[0]:
            print("will update all author articles from:", a_result[0], "to:", auth_id)
            print("will delete author:", a_result[0])
    print("Continue?\n\t 1) proceed \n\t 2) cancel")
    opt_go = input()
    if opt_go == '1':
        update_author(an_author, auth_id, auth_name , auth_surname,auth_orcid)
        for a_result in auth_similars:
            if auth_id != a_result[0]:
                update_article_authors(auth_id,a_result[0])
                delete_author(a_result[0])


## Check authors with no articles

In [9]:
# verify that there are no authors with no articles in the DB

no_artaut_authors = get_null_authors()

delete_these=[]
for an_author in notebook.tqdm(no_artaut_authors):
    dup_id = an_author[0]
    dup_name = an_author[1]
    dup_surname = an_author[2]
    dup_orcid = an_author[3] if an_author[3]!=None else "NULL"
    print(dup_id, dup_name, dup_surname, dup_orcid)
    similars = get_similar_authors(dup_name, dup_surname, dup_orcid)
    print('There are %s similar authors in DB'%(len(similars)))
    for idx, a_simil in enumerate(similars):
        art_count = count_linked(a_simil[0])
        if art_count == 0:
            print(idx, a_simil,"Links:", art_count, "DELETE")
            delete_these.append(a_simil[0])
        else:
            print(idx, a_simil,"Links:", art_count, "CHECK")

print(delete_these)

0it [00:00, ?it/s]

[]


## Review duplicate authors (by name and last name)

In [10]:

# Manually review probable duplicate authors 
all_authors = get_all_authors()
revise_these=[]

safe_list = open_ok_list('safe_list.txt')
pacer_idx = 0
for an_author in notebook.tqdm(all_authors):   
    dup_id = an_author[0]
    dup_name = an_author[1]
    dup_surname = an_author[2]
    dup_orcid = an_author[3] if an_author[3]!=None else "NULL"
    if not(int(dup_id) in safe_list):
        if "'" in dup_surname: dup_surname = dup_surname.replace("'","''")
        if ("’") in dup_surname: dup_surname = dup_surname.replace("’","''")
        all_similars = get_similar_authors(dup_name, dup_surname, dup_orcid)
        similars = prune_similar_surnames(all_similars, dup_surname)
        similars = prune_similar_names(similars, dup_name)
        if len(similars) > 1 and  len(dup_surname)>3 :
            print("*************************************************")
            print("Author:", dup_id, dup_name, dup_surname, dup_orcid)
            print('There are %s similar authors in DB'%(len(similars)))

            for idx, a_simil in enumerate(similars):
                art_count = count_linked(a_simil[0])
                print(idx, a_simil,"Links:", art_count)
            print ("Options:\n\t (1) Ignore \n\t (2) Merge\n\t (3) next")
            sel_action = input()
            if sel_action == '1':
                safe_list = sorted(list(set(safe_list + [sublist[0] for sublist in similars])))
            if sel_action == '2':
                merge_authors(an_author, similars)
            pacer_idx+=1
            clear_output()
            if pacer_idx == 10:
                break
print("OK:", len(safe_list))
print(safe_list)
save_ok_list(safe_list, 'safe_list.txt')



  0%|          | 0/2005 [00:00<?, ?it/s]

OK: 22
[37, 41, 256, 338, 371, 470, 1035, 1178, 1672, 1781, 2044, 2212, 2268, 2308, 2634, 2727, 2841, 2970, 3008, 3067, 3314, 3340]


### Verify affiliations and author affiliations agains crossref affiliations
1 get group of crossref affiliations
2 get assigned affiliation
3 verify if OK if not show and ask for action


In [61]:
import craffiparser

def get_parser(db_):
    cr_parse = craffiparser.crp(db_)
    cr_parse.start_lists()
    return cr_parse


def get_cr_affis_article_author_ids(db_name):
    db_conn = dbh.DataBaseAdapter(db_name)
    a_table = 'cr_affiliations'
    a_column = 'article_author_id'
    cr_affis_article_author_ids = db_conn.get_value_list(a_table, a_column)
    return cr_affis_article_author_ids

def get_cr_lines_for_article_author_ids(db_name, art_author_id):
    db_conn = dbh.DataBaseAdapter(db_name)
    s_table = 'cr_affiliations'
    s_fields = '*'
    s_where = "article_author_id = %s"%(art_author_id)
    authors_list = db_conn.get_values(s_table, s_fields, s_where)
    return authors_list

def get_affiliation_id(db_name, parsed_affi):
    db_conn = dbh.DataBaseAdapter(db_name)
    s_table = 'affiliations'
    s_field = 'id'
    for k,v in parsed_affi.items():
        if "'" in v :parsed_affi[k]=v.replace("'","''")
    list_where = [ k +" = '"+ v +"'" for k,v in parsed_affi.items() if k != 'address']
    s_where = " AND ".join(list_where) 
    s_where = s_where.replace("= ''", "IS NULL")
    print (s_where)
    affi_list = db_conn.get_values(s_table, s_field, s_where)
    affi_id = None
    if affi_list !=[]:
        affi_id = affi_list[0][0]
    return affi_id

# could correct the close affiliation to get all the ones with 
# same institution and compare closest match
def get_close_affiliation_id(db_name, parsed_affi):
    db_conn = dbh.DataBaseAdapter(db_name)
    s_table = 'affiliations'
    s_field = 'id'
    for k,v in parsed_affi.items():
        if "'" in v :parsed_affi[k]=v.replace("'","''")
    list_where = [ k +" = '"+ v +"'" for k,v in parsed_affi.items() if k != 'address']
    s_where = " AND ".join(list_where) 
    s_where = s_where.replace("= ''", "IS NULL")
    #print (s_where)
    affi_list = db_conn.get_values(s_table, s_field, s_where)
    affi_id = None
    if affi_list !=[]:
        affi_id = affi_list[0][0]
    return affi_id

#get the id of affiliation assigned to an author affiliation record
def get_auth_affi_affiliation_id(db_name, aut_affi_id):
    db_conn = dbh.DataBaseAdapter(db_name)
    s_table = 'author_affiliations'
    s_field = 'affiliation_id'
    s_where = " id = %i" %(aut_affi_id)
    #print (s_where)
    affi_list = db_conn.get_values(s_table, s_field, s_where)
    if affi_list !=[]:
        affi_list = list(set([an_id[0] for an_id in affi_list]))
    return affi_list

#get the ids the author affiliation records for a given author
def get_auth_affi_id_for_author(db_name, art_aut_id):
    db_conn = dbh.DataBaseAdapter(db_name)
    s_table = 'author_affiliations'
    s_field = 'id'
    s_where = " article_author_id = %i" %(art_aut_id)
    #print (s_where)
    affi_list = db_conn.get_values(s_table, s_field, s_where)
    if affi_list !=[]:
        affi_list = list(set([an_id[0] for an_id in affi_list]))
    return affi_list

def is_one_line_affi(cr_parser, str_affi):
    is_one_liner = False
    parsed_affi = cr_parser.split_single(str_affi)
    parsed_no_blanks = {k:v for k,v in parsed_affi.items() if v != ''}
    if len(parsed_no_blanks) > 1:
        is_one_liner = True
    return is_one_liner

def check_assigned_affi_ol(db_name, cr_parser, cr_affi):
    assigned_ok = False
    if cr_affi[3] == -1:
        assigned_ok = True
        print('non assigned line for', str(cr_affi))         
    elif cr_affi[3] != None:
        parsed_affi = cr_parser.split_single(cr_affi[1])
        parsed_no_blanks = {k:v for k,v in parsed_affi.items() if v != ''} 
        affi_id = get_affiliation_id(db_name, parsed_affi)
        if affi_id == None:
            affi_id = get_close_affiliation_id(db_name, parsed_no_blanks)
            
        ##############################################################################
        # if there is no close affiliation should ask if add, assign or ignore
        # in the case of orphan lines it is ignore
        print(cr_affi)
        assigned_affi_id = get_auth_affi_affiliation_id(db_name, cr_affi[3])[0]
        
        print('Assigned ID:', assigned_affi_id, "Recovered ID:", affi_id)
        
        if assigned_affi_id == affi_id:
            assigned_ok = True
    return assigned_ok

def check_assigned_affi_ml(db_name, cr_parser, cr_affi_lines, art_aut_id):
    assigned_ok = True
    just_affi_lines = [x[1] for x in cr_affi_lines]
    parsed_affis = cr_parser.parse_multiline(just_affi_lines)
    # all affiliations belong to same article author
    aut_affis = get_auth_affi_id_for_author(db_name, art_aut_id)
    assigned_affis = []
    for an_aut_affi_id in aut_affis:
        assigned_affis.append(get_auth_affi_affiliation_id(db_name, an_aut_affi_id)[0])
 
    for one_parsed in parsed_affis:
        affi_id = get_affiliation_id(db_name, one_parsed)
        if affi_id == None:
            parsed_no_blanks = {k:v for k,v in one_parsed.items() if v != ''}
            affi_id = get_close_affiliation_id(db_name, parsed_no_blanks)
        # if there is no close affiliation should ask if add, assign or ignore
        # in the case of orphan lines it is ignore
        
        if not affi_id in assigned_affis:
            print('Assigned ID:', affi_id, "not in recoverd IDs list:", assigned_affis)
            assigned_ok = False
        else:
            print('Assigned ID:', affi_id, "in recoverd IDs list:", assigned_affis)
    return assigned_ok

##############################################################################
# FIX AFFILIATION ISSUES
# Likely problems:
#   a) only one assigned to two affiliations
#      Fixes:
#        - add missing author affiliation
#        - correct exiting author affiliation 
#   b) Mismatch in assigned affiliation
#      Fixes:
#        - correct exiting author affiliation 
#   c) Affiliation not assigned
#      Fixes:
#        - try to assign from existing
#        - if no existing one, ask if new should be added

def correct_oneline(db_name, cr_parser, cr_affis):
    # get a list of parsed affis with the ids of the corresponding cr_records
    parsed_affis  =[]
    for a_cr_affi in cr_affis:
        parsed_affis += cr_parser.parse_and_map_single(a_cr_affi)
    print(parsed_affis)
    # all belong to same article author
    art_author_id = cr_affis[0][2]
    
    print ("verifying affiliations for article author", art_author_id)
    
    art_auth_affis = get_auth_affi_id_for_author(db_name, art_author_id)
    
    print ("Article author affiliations:", len(art_auth_affis), art_auth_affis )
    
    print ("Parsed article author affiliations:", len(parsed_affis) )

    for affi_idx, parsed_affi in enumerate(parsed_affis):
        print('processing', parsed_affi)
        affi_vals = parsed_affi[0]
        cr_affi_ids = parsed_affi[1]
        correct_this = 0
        if affi_idx < len(art_auth_affis):
            correct_this = art_auth_affis[affi_idx]#
        affi_id = get_affiliation_id(db_name, affi_vals)
        if affi_id == None:
            parsed_no_blanks = {k:v for k,v in affi_vals.items() if v != ''}
            affi_id = get_close_affiliation_id(db_name, parsed_no_blanks)
        if correct_this != 0:
            # the affiliation does not exist but something was assigned to author affi
            if affi_id == None:
                print('{0:*^80}'.format('Affi does not exist'))
                print(affi_vals)
                affi_id = add_new_affiliation(db_name, affi_vals)
            # if the affiliation exists    
            if affi_id != None:
                print('{0:*^80}'.format(' Update Author Affiliation '))
                print('Update ID:', correct_this, 'with values:', affi_vals )
                # verify that country is not empty
                update_author_affiliation(db_name, correct_this, affi_id, affi_vals)
                update_cr_aai(db_name, cr_affi_ids[0], correct_this)
                
                
        else:
            if affi_id != None :
                print("Add author affiliation for author: ", art_author_id, 'with affi:', affi_vals) 
                new_affi_id = add_author_affiliation(db_name, art_aut_id, affi_id, affi_vals)
                #update cr_affis (assign author_affi_id)
                for cr_id in cr_affi_ids:
                    update_cr_aai(db_name, cr_id, new_affi_id)

def correct_multiline(db_name, cr_parser, cr_affis):
    # get a list of parsed affis with the ids of the corresponding cr_records
    parsed_affis = cr_parser.parse_and_map_multiline(cr_affis)
    print(parsed_affis)
    # all belong to same article author
    art_author_id = cr_affis[0][2]
    
    print ("verifying affiliations for article author", art_author_id)
    
    art_auth_affis = get_auth_affi_id_for_author(db_name, art_author_id)
    
    print ("Article author affiliations:", len(art_auth_affis), art_auth_affis )
    
    print ("Parsed article author affiliations:", len(parsed_affis) )
    
    if len(parsed_affis) > len(art_auth_affis):
        missing_author_affi = True

    for affi_idx, parsed_affi in enumerate(parsed_affis):
        affi_vals = parsed_affi[0]
        cr_affi_ids = parsed_affi[1]
        correct_this = 0
        if affi_idx < len(art_auth_affis):
            correct_this = art_auth_affis[affi_idx]

        affi_id = get_affiliation_id(db_name, affi_vals)
        if affi_id == None:
            parsed_no_blanks = {k:v for k,v in affi_vals.items() if v != ''}
            affi_id = get_close_affiliation_id(db_name, parsed_no_blanks)
        if correct_this != 0:
            # if the affiliation exists
            if affi_id != None:
                print('Update author_affiliation:', correct_this, 'with affi:', affi_vals )
                # need to verify that country is not empty
                update_author_affiliation(db_name, correct_this, affi_id, affi_vals)
                for cr_id in cr_affi_ids:
                    update_cr_aai(db_name, cr_id, affi_id)
            else:
                print('Affi does not exist')
                print(affi_vals)
                
        else:
            if affi_id != None:
                print("Add author affiliation for author: ", art_author_id, 'with affi:', affi_vals) 
                new_affi_id = add_author_affiliation(db_name, art_aut_id, affi_id, affi_vals)
                #update cr_affis (assign author_affi_id)
                for cr_id in cr_affi_ids:
                    update_cr_aai(db_name, cr_id, new_affi_id)
                
def make_author_affiliation(art_aut_id, affi_values, addr_values):
    # get smallest unit
    smallest_unit = "" 
    #id Institution> Faculty > School > Department > Work_group + address + Country
    if affi_values[4] != None and  len(affi_values[4]) > 0: #'work_group'
        smallest_unit = affi_values[4]
    elif affi_values[2] != None and len(affi_values[2]) > 0 and smallest_unit == "": #'department'
        smallest_unit = affi_values[2]
    elif affi_values[9] != None and  len(affi_values[9]) > 0 and smallest_unit == "": #'school'
        smallest_unit = affi_values[9]
    elif affi_values[3] != None and len(affi_values[3]) > 0 and smallest_unit == "": #'faculty'
        smallest_unit = affi_values[3]
       
    ret_art_auth_affi = {}
    ret_art_auth_affi['article_author_id'] = art_aut_id
    if len(smallest_unit) > 0:
        ret_art_auth_affi['name'] = smallest_unit + ", "+  affi_values[1] #'institution'
    else:
        ret_art_auth_affi['name'] = affi_values[1]
    ret_art_auth_affi['short_name'] = affi_values[1]
    add_01 = ""
    if affi_values[3] != None and affi_values[3]  != "" and affi_values[3] != smallest_unit:
        add_01 = affi_values[3] 
    if affi_values[9] != None and affi_values[9] != "" and affi_values[9] != smallest_unit:
        if add_01 != "":
               add_01 += ", "+ affi_values[9]
        else:
               add_01 += affi_values[9]
    if affi_values[2] != None and affi_values[2] != "" and affi_values[2] != smallest_unit:
        if add_01 != "":
               add_01 += ", "+ affi_values[2]
        else:
               add_01 += affi_values[2]
    if add_01 != "":
        ret_art_auth_affi['add_01'] = add_01
        ret_art_auth_affi['add_02'] = addr_values[1] 
        ret_art_auth_affi['add_03'] = addr_values[2]
        ret_art_auth_affi['add_04'] = addr_values[3]
        ret_art_auth_affi['add_05'] = addr_values[4]
    else:
        ret_art_auth_affi['add_01'] = addr_values[1]
        ret_art_auth_affi['add_02'] = addr_values[2] 
        ret_art_auth_affi['add_03'] = addr_values[3]
        ret_art_auth_affi['add_04'] = addr_values[4]
    
    ret_art_auth_affi['country'] = addr_values[5]
    ret_art_auth_affi['affiliation_id'] = affi_values[0]
    ret_art_auth_affi['created_at'] = datetime.today().strftime('%Y-%m-%d %H:%M:%S')
    ret_art_auth_affi['updated_at'] = ret_art_auth_affi['created_at'] 
    return ret_art_auth_affi                

def build_address_row(affi, affi_vals):
    address_row = [0,None,None,None,None,None]
    if 'address' in affi_vals:
        address_row[1] = affi_vals['address']
    if 'country' in affi_vals:
        address_row[5] = affi_vals['country']
    else:
        address_row[5] = af
    return address_row

def add_author_affiliation(db_name, art_aut_id, affi_id, affi_values):
    print("Creating ", art_aut_id, affi_id, affi_values)
    db_conn = dbh.DataBaseAdapter(db_name)
    affiliation_row = list(db_conn.get_row("affiliations", affi_id))[0]
    address_row = build_address_row(affiliation_row, affi_values)
    print("Affiliation values", affiliation_row)
    print("Address values", address_row )
    new_auth_affi = make_author_affiliation(art_aut_id, affiliation_row, address_row)
    print('Adding:', new_auth_affi)
    new_aa_id = db_conn.put_values_table("author_affiliations", new_auth_affi.keys(), new_auth_affi.values())
    return new_aa_id

def is_affi_ok(an_affi):
    affi_ok = True
    print ("Cheking affi:", an_affi)
    # has institution and institution is not blank
    if an_affi['institution'] == '' or an_affi['institution'] == None:
        print('Affiliation Error: Missing institution')
        affi_ok = False
    if an_affi['country'] == '' or an_affi['country'] == None:
        print('Affiliation Error: country missing')
        affi_ok = False
        
    if an_affi['sector'] == '' or an_affi['sector'] == None:
        sector_sel = 0
        while not sector_sel in [1,2,3]:
            print('Affiliation Error: sector missing')
            print("\n1. Academia\n2. Industry\n3. Research Facility",
                  "\nSelect sector: ")
            sector_sel = int(input())
        an_affi['sector'] = sector_sel

    return affi_ok


def add_new_affiliation(db_name, affi_values):
    affi_values['sector']=''
    
    if not is_affi_ok(affi_values):
        return 0;
    db_conn = dbh.DataBaseAdapter(db_name)
    add_update_time = datetime.today().strftime('%Y-%m-%d %H:%M:%S')
    affiliation_new = affi_values
    del affiliation_new['address']
    if 'address' in affiliation_new.keys():
        del affiliation_new['address']
    if 'num' in affiliation_new.keys():
        del affiliation_new['num']
    affiliation_new['created_at'] = add_update_time
    affiliation_new['updated_at'] = add_update_time
    affiliation_id = db_conn.put_values_table("affiliations", affiliation_new.keys(), affiliation_new.values())
    return affiliation_id

# Update to correct diff with crossref record
def update_author_affiliation(db_name, aut_affi_id, affi_id, affi_values):
    print("Updating", aut_affi_id, affi_values)
    db_conn = dbh.DataBaseAdapter(db_name)
    affiliation_row = list(db_conn.get_row("affiliations", affi_id))[0]
    address_row = [0,None,None,None,None,None]
    if 'address' in affi_values:
        address_row[1] = affi_values['address']
    if 'country' in affi_values and affi_values['country'].strip() != '':
        address_row[5] = affi_values['country']
    else:
        address_row[5] = affiliation_row[5]
   
    auth_affi = make_author_affiliation(0, affiliation_row, address_row)
    update_time = datetime.today().strftime('%Y-%m-%d %H:%M:%S')
    
    print(auth_affi)
    for affi_col in auth_affi:
        if not affi_col in ["article_author_id", "created_at"]:
            new_value = auth_affi[affi_col]
            if isinstance(new_value, str):# new_value != None and not isinstance(new_value, int):
                if "'" in new_value: new_value = new_value.replace("'","''")
                if "’" in new_value: new_value = new_value.replace("’","''")
            print("updating aut_affi_id:", aut_affi_id, "column:", affi_col, "value:", new_value)
            db_conn.set_value_table('author_affiliations', aut_affi_id,  affi_col, new_value)
            

def update_cr_aai(db_name, cr_affi_id, auth_affi_id):
    db_conn = dbh.DataBaseAdapter(db_name)
    s_table = 'cr_affiliations'
    s_field = 'author_affiliation_id'
    db_conn = dbh.DataBaseAdapter(ukchapp_db)
    db_conn.set_value_table(s_table, cr_affi_id,  s_field , auth_affi_id)

In [62]:
       
#########################################################################
# VERIFY ARTICLE AUTHOR AFFILITIONS VS CR_AFFILIATIONS
# 0 Verify integrity of affiliations
# 1 Get list of article_author_ids from CR_affi
# 2 For each article_author_id:
#   1 Get CR_affi lines
#   2 verify CR_affi lines
#     1 check if 
#        a) one affiliation per cr_affi 
#           parse each single affiliation
#           check if assigned affiliation is OK (assigned ID matches calculated ID)
#        b) multiple lines form an affiliation (2+)
#           parse each multi-line affiliation
#           check if assigned affiliation is OK

affi_parser = get_parser(ukchapp_db)

list_art_aut_ids = get_cr_affis_article_author_ids(ukchapp_db)

already_ok = open_ok_list('ok_cr_affis.txt')

for art_aut_id in list_art_aut_ids:
    if not art_aut_id in already_ok:
        print ('Article Author: ', art_aut_id)
        cr_lines = get_cr_lines_for_article_author_ids(ukchapp_db, art_aut_id)
        print('{0:*^80}'.format('CR Affilitations found:'), "\n", cr_lines)
        all_one_liners = True
        print('{0:*^80}'.format('Check if CR lines are one liners:'))
        for a_cr_line in cr_lines:
            one_line_affi = is_one_line_affi(affi_parser, a_cr_line[1])
            print( a_cr_line[1], one_line_affi)
            if not one_line_affi:
                all_one_liners = False
        if all_one_liners:
            assigned_ok = False
            print('{0:*^80}'.format('verify one liners'))
            for a_cr_line in cr_lines:
                assigned_ok = check_assigned_affi_ol(ukchapp_db, affi_parser, a_cr_line)
                print(assigned_ok)
                if not assigned_ok:
                    print("Problems with ", a_cr_line[0])
                    correct_oneline(ukchapp_db, affi_parser, cr_lines)
                    
                elif not art_aut_id in already_ok:
                    already_ok.append(art_aut_id)
        else:
            print('verify multiline affi')
            assigned_ok = check_assigned_affi_ml(ukchapp_db, affi_parser, cr_lines, art_aut_id)
            if not assigned_ok:
                print("@"*80)
                print("Problems with:\n", cr_lines[0][2], art_aut_id)
                correct_multiline(ukchapp_db, affi_parser, cr_lines)
                #break
            else:
                already_ok.append(art_aut_id)

print("OK:", len(already_ok))
print(already_ok)
save_ok_list(already_ok, 'ok_affi_list.txt')                     

Refreshing lists
Article Author:  16
****************************CR Affilitations found:***************************** 
 [(192, 'Catalysis Institute and c*change (DST-NRF Centre of Excellence in Catalysis), Department of Chemical Engineering, University of Cape Town, Rondebosch 7701, South Africa', 16, 8, '2020-09-09 15:30:44.658678', '2020-10-14 11:25:11.632200')]
***********************Check if CR lines are one liners:************************
Catalysis Institute and c*change (DST-NRF Centre of Excellence in Catalysis), Department of Chemical Engineering, University of Cape Town, Rondebosch 7701, South Africa True
*******************************verify one liners********************************
institution = 'University of Cape Town' AND school IS NULL AND department = 'Department of Chemical Engineering' AND faculty IS NULL AND work_group = 'Catalysis Institute and c*change (DST-NRF Centre of Excellence in Catalysis)' AND country = 'South Africa'
(192, 'Catalysis Institute and c*change

****************************CR Affilitations found:***************************** 
 [(640, 'Department of ChemistryUniversity of Warwick Coventry CV4 7AL UK', 360, 121, '2020-09-23 10:35:30.003304', '2020-10-14 11:25:30.481166')]
***********************Check if CR lines are one liners:************************
Department of ChemistryUniversity of Warwick Coventry CV4 7AL UK True
*******************************verify one liners********************************
institution = 'University of Warwick' AND school IS NULL AND department = 'Department of Chemistry' AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(640, 'Department of ChemistryUniversity of Warwick Coventry CV4 7AL UK', 360, 121, '2020-09-23 10:35:30.003304', '2020-10-14 11:25:30.481166')
Assigned ID: 176 Recovered ID: 176
True
Article Author:  361
****************************CR Affilitations found:***************************** 
 [(641, 'Warwick Manufacturing GroupUniversity of Warwick Coventry CV4 7AL UK'

****************************CR Affilitations found:***************************** 
 [(730, 'The UK Catalysis Hub, Research Complex at Harwell, Harwell, Oxon OX11 0FA, United Kingdom', 440, 139, '2020-09-23 10:40:21.552479', '2020-10-14 11:25:32.779877')]
***********************Check if CR lines are one liners:************************
The UK Catalysis Hub, Research Complex at Harwell, Harwell, Oxon OX11 0FA, United Kingdom True
*******************************verify one liners********************************
institution = 'UK Catalysis Hub' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(730, 'The UK Catalysis Hub, Research Complex at Harwell, Harwell, Oxon OX11 0FA, United Kingdom', 440, 139, '2020-09-23 10:40:21.552479', '2020-10-14 11:25:32.779877')
Assigned ID: 67 Recovered ID: 67
True
Article Author:  441
****************************CR Affilitations found:***************************** 
 [(731, 'Cardiff Catalysis Ins

institution = 'Cardiff University' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group = 'Cardiff Catalysis Institute' AND country = 'United Kingdom'
(764, 'Cardiff Catalysis Institute; School of Chemistry; Cardiff University; Main Building, Park Place Cardiff CF10 3AT United Kingdom', 451, 242, '2020-09-23 10:41:09.756884', '2020-10-14 11:29:05.892255')
Assigned ID: 8 Recovered ID: 8
True
Article Author:  452
****************************CR Affilitations found:***************************** 
 [(765, 'Cardiff Catalysis Institute; School of Chemistry; Cardiff University; Main Building, Park Place Cardiff CF10 3AT United Kingdom', 452, 149, '2020-09-23 10:41:09.796535', '2020-10-14 11:25:34.035599')]
***********************Check if CR lines are one liners:************************
Cardiff Catalysis Institute; School of Chemistry; Cardiff University; Main Building, Park Place Cardiff CF10 3AT United Kingdom True
*******************************verify o

****************************CR Affilitations found:***************************** 
 [(779, 'Sasol Technology U.K., Ltd., Purdie Building, North Haugh, St Andrews, Fife KY16 9ST, U.K.', 466, 158, '2020-09-23 10:41:39.507997', '2020-10-14 11:25:35.183853')]
***********************Check if CR lines are one liners:************************
Sasol Technology U.K., Ltd., Purdie Building, North Haugh, St Andrews, Fife KY16 9ST, U.K. True
*******************************verify one liners********************************
institution IS NULL AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(779, 'Sasol Technology U.K., Ltd., Purdie Building, North Haugh, St Andrews, Fife KY16 9ST, U.K.', 466, 158, '2020-09-23 10:41:39.507997', '2020-10-14 11:25:35.183853')
Assigned ID: 5 Recovered ID: 5
True
Article Author:  467
****************************CR Affilitations found:***************************** 
 [(780, 'Institute of Chemical Sciences, H

Assigned ID: 181 in recoverd IDs list: [181]
Article Author:  479
****************************CR Affilitations found:***************************** 
 [(811, 'Inorganic Chemistry and Catalysis', 479, 857, '2020-09-23 10:42:27.872830', '2020-10-15 15:04:17.823236'), (812, 'Debye Institute for Nanomaterials Science', 479, 857, '2020-09-23 10:42:27.891017', '2020-10-15 15:04:17.865494'), (813, 'Utrecht University', 479, 857, '2020-09-23 10:42:27.909256', '2020-10-15 15:04:17.907809'), (814, 'Utrecht', 479, 857, '2020-09-23 10:42:27.931829', '2020-10-15 15:04:17.942032'), (815, 'The Netherlands', 479, 857, '2020-09-23 10:42:27.952590', '2020-10-15 15:04:17.978794')]
***********************Check if CR lines are one liners:************************
Inorganic Chemistry and Catalysis False
Debye Institute for Nanomaterials Science False
Utrecht University False
Utrecht False
The Netherlands False
verify multiline affi
institution = 'Utrecht University' AND school IS NULL AND department = 'Debye I

Assigned ID: 72 in recoverd IDs list: [72, 67]
Article Author:  567
****************************CR Affilitations found:***************************** 
 [(913, 'Department of Chemical and Biological Engineering', 567, 647, '2020-09-23 10:46:21.838521', '2020-10-15 14:47:33.508145'), (914, 'University of Sheffield', 567, 647, '2020-09-23 10:46:21.862836', '2020-10-15 14:47:33.554759'), (915, 'Sheffield', 567, 647, '2020-09-23 10:46:21.892365', '2020-10-15 14:47:33.600695'), (916, 'UK', 567, 647, '2020-09-23 10:46:21.922181', '2020-10-15 14:47:33.636813')]
***********************Check if CR lines are one liners:************************
Department of Chemical and Biological Engineering False
University of Sheffield False
Sheffield False
UK False
verify multiline affi
institution = 'University of Sheffield' AND school IS NULL AND department = 'Department of Chemical and Biological Engineering' AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
Assigned ID: 163 in recov

****************************CR Affilitations found:***************************** 
 [(938, 'UK Catalysis Hub; Research Complex at Harwell; Rutherford Appleton Laboratory; Harwell Oxon Didcot OX11 0FA UK', 575, 487, '2020-09-23 10:46:46.853242', '2020-10-15 13:05:48.874197'), (939, 'Cardiff Catalysis Institute; School of Chemistry; Cardiff University; Cardiff CF10 3AT UK', 575, 488, '2020-09-23 10:46:46.880318', '2020-10-15 13:05:48.943045')]
***********************Check if CR lines are one liners:************************
UK Catalysis Hub; Research Complex at Harwell; Rutherford Appleton Laboratory; Harwell Oxon Didcot OX11 0FA UK True
Cardiff Catalysis Institute; School of Chemistry; Cardiff University; Cardiff CF10 3AT UK True
*******************************verify one liners********************************
institution = 'UK Catalysis Hub' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(938, 'UK Catalysis Hub; Research

processing [{'institution': 'Diamond Light Source Ltd.', 'school': '', 'department': '', 'faculty': '', 'work_group': '', 'country': 'United Kingdom', 'address': 'Harwell Science and Innovation Campus; Chilton, Didcot OX11 0DE'}, [944]]
institution = 'Diamond Light Source Ltd.' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
************************** Update Author Affiliation ***************************
Update ID: 597 with values: {'institution': 'Diamond Light Source Ltd.', 'school': '', 'department': '', 'faculty': '', 'work_group': '', 'country': 'United Kingdom', 'address': 'Harwell Science and Innovation Campus; Chilton, Didcot OX11 0DE'}
Updating 597 {'institution': 'Diamond Light Source Ltd.', 'school': '', 'department': '', 'faculty': '', 'work_group': '', 'country': 'United Kingdom', 'address': 'Harwell Science and Innovation Campus; Chilton, Didcot OX11 0DE'}
{'article_author_id': 0, 'name': 'Diamond Light S

institution = 'University College London' AND school IS NULL AND department = 'Department of Chemistry' AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(956, 'Department of Chemistry, University College London, 20 Gordon Street, London WC1H 0AJ, United Kingdom', 586, 609, '2020-09-23 10:47:17.693214', '2020-10-15 13:21:21.393377')
Assigned ID: 114 Recovered ID: 114
True
institution = 'UK Catalysis Hub' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(957, 'UK Catalysis Hub, Research Complex at Harwell, Rutherford Appleton Laboratory, Harwell Oxon, Didcot OX11 0FA, United Kingdom', 586, 610, '2020-09-23 10:47:17.719099', '2020-10-15 13:21:21.461117')
Assigned ID: 67 Recovered ID: 67
True
institution = 'Cardiff University' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group = 'Cardiff Catalysis Institute' AND country = 'United Kingdom'
(958, 'Cardiff Cata

****************************CR Affilitations found:***************************** 
 [(967, 'UK Catalysis Hub, Research Complex at Harwell, Rutherford Appleton Laboratory, Harwell Oxon, Didcot OX11 0FA, United Kingdom', 594, 600, '2020-09-23 10:47:18.209366', '2020-10-15 13:18:42.411433'), (968, 'Diamond Light Source Ltd., Harwell Science and Innovation Campus, Chilton, Didcot OX11 0DE, United Kingdom', 594, 601, '2020-09-23 10:47:18.229490', '2020-10-15 13:18:42.491233'), (969, 'School of Chemistry, University of Southampton, University Road, Southampton SO17 1BJ, United Kingdom', 594, 599, '2020-09-23 10:47:18.264998', '2020-10-15 13:18:42.571622')]
***********************Check if CR lines are one liners:************************
UK Catalysis Hub, Research Complex at Harwell, Rutherford Appleton Laboratory, Harwell Oxon, Didcot OX11 0FA, United Kingdom True
Diamond Light Source Ltd., Harwell Science and Innovation Campus, Chilton, Didcot OX11 0DE, United Kingdom True
School of Chemistry

****************************CR Affilitations found:***************************** 
 [(970, 'UK Catalysis Hub', 595, 774, '2020-09-23 10:48:19.223375', '2020-10-15 15:03:53.779558'), (971, 'RCaH', 595, 774, '2020-09-23 10:48:19.243843', '2020-10-15 15:03:53.819912'), (972, 'Rutherford Appleton Laboratory', 595, 774, '2020-09-23 10:48:19.273748', '2020-10-15 15:03:53.867228'), (973, 'Didcot', 595, 774, '2020-09-23 10:48:19.310115', '2020-10-15 15:03:53.904723'), (974, 'UK', 595, 774, '2020-09-23 10:48:19.343798', '2020-10-15 15:03:53.939749')]
***********************Check if CR lines are one liners:************************
UK Catalysis Hub False
RCaH False
Rutherford Appleton Laboratory False
Didcot False
UK False
verify multiline affi
institution = 'UK Catalysis Hub' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
Assigned ID: 67 in recoverd IDs list: [67]
Article Author:  596
****************************CR Affilitations

institution = 'UK Catalysis Hub' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(1011, 'UK Catalysis Hub, Research Complex at Harwell, Rutherford Appleton Laboratory, Harwell Oxon, Didcot OX11 0FA, U.K.', 604, 612, '2020-09-23 10:48:34.432406', '2020-10-15 13:21:21.681256')
Assigned ID: 67 Recovered ID: 67
True
institution = 'University College London' AND school IS NULL AND department = 'Department of Chemistry' AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(1012, 'Department of Chemistry, University College London, 20 Gordon Street, London WC1H 0AJ, U.K.', 604, 613, '2020-09-23 10:48:34.458362', '2020-10-15 13:21:21.761572')
Assigned ID: 114 Recovered ID: 114
True
institution = 'Cardiff University' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group = 'Cardiff Catalysis Institute' AND country = 'United Kingdom'
(1013, 'Cardiff Catalysis Institute, 

Assigned ID: 67 Recovered ID: 67
True
institution = 'University College London' AND school IS NULL AND department = 'Department of Chemistry' AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(1029, 'Department of Chemistry, University College London, 20 Gordon Street, London WC1H 0AJ, U.K.', 613, 633, '2020-09-23 10:48:35.218882', '2020-10-15 14:47:27.333306')
Assigned ID: 114 Recovered ID: 114
True
institution = 'Diamond Light Source Ltd.' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(1030, 'Diamond Light Source, Harwell Science and Innovation Campus, Chilton, Didcot OX11 0DE, U.K.', 613, 630, '2020-09-23 10:48:35.241484', '2020-10-15 14:47:27.412004')
Assigned ID: 224 Recovered ID: 446
False
Problems with  1030
[[{'institution': 'UK Catalysis Hub', 'school': '', 'department': '', 'faculty': '', 'work_group': '', 'country': 'United Kingdom', 'address': 'Research Complex at Harwell, Rutherfo

institution = 'University of Southampton' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(1031, 'School of Chemistry, University of Southampton, Highfield, Southampton SO17 1BJ, U.K.', 613, 631, '2020-09-23 10:48:35.266292', '2020-10-15 14:47:27.486551')
Assigned ID: 166 Recovered ID: 166
True
Article Author:  614
****************************CR Affilitations found:***************************** 
 [(1032, 'Department of Chemistry; Kathleen Lonsdale Materials Chemistry; University College London, 20 Gordon Street; London WC1H 0AJ UK', 614, 169, '2020-09-23 10:49:10.151624', '2020-10-14 11:25:36.580148')]
***********************Check if CR lines are one liners:************************
Department of Chemistry; Kathleen Lonsdale Materials Chemistry; University College London, 20 Gordon Street; London WC1H 0AJ UK True
*******************************verify one liners********************************
institution

processing [{'institution': 'Diamond Light Source Ltd.', 'school': '', 'department': '', 'faculty': '', 'work_group': '', 'country': 'United Kingdom', 'address': 'Diamond House; Harwell Science and Innovation Campus; Didcot Oxfordshire OX11 0DE'}, [1037]]
institution = 'Diamond Light Source Ltd.' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
************************** Update Author Affiliation ***************************
Update ID: 433 with values: {'institution': 'Diamond Light Source Ltd.', 'school': '', 'department': '', 'faculty': '', 'work_group': '', 'country': 'United Kingdom', 'address': 'Diamond House; Harwell Science and Innovation Campus; Didcot Oxfordshire OX11 0DE'}
Updating 433 {'institution': 'Diamond Light Source Ltd.', 'school': '', 'department': '', 'faculty': '', 'work_group': '', 'country': 'United Kingdom', 'address': 'Diamond House; Harwell Science and Innovation Campus; Didcot Oxfordshire OX11 

****************************CR Affilitations found:***************************** 
 [(1052, 'School of Chemistry, University of Southampton, University Road, Southampton SO17 1BJ, U.K.', 630, 278, '2020-09-23 10:49:48.754530', '2020-10-14 11:29:16.629207')]
***********************Check if CR lines are one liners:************************
School of Chemistry, University of Southampton, University Road, Southampton SO17 1BJ, U.K. True
*******************************verify one liners********************************
institution = 'University of Southampton' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(1052, 'School of Chemistry, University of Southampton, University Road, Southampton SO17 1BJ, U.K.', 630, 278, '2020-09-23 10:49:48.754530', '2020-10-14 11:29:16.629207')
Assigned ID: 166 Recovered ID: 166
True
Article Author:  631
****************************CR Affilitations found:**************************

(1060, 'UK Catalysis Hub, Research Complex at Harwell, Science and Technology Facilities Council, Rutherford Appleton Laboratory, Harwell Science and Innovation Campus, Oxon OX11 0QX, U.K.', 635, 897, '2020-09-23 10:49:49.187343', '2020-10-15 19:45:17.803621')
Assigned ID: 67 Recovered ID: 67
True
institution = 'ISIS Neutron and Muon Source' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(1061, 'ISIS Pulsed Neutron and Muon Facility, Science and Technology Facilities Council, Rutherford Appleton Laboratory, Harwell Science and Innovation Campus, Oxon OX11 0QX, U.K.', 635, 403, '2020-09-23 10:49:49.215470', '2020-10-15 13:04:06.384673')
Assigned ID: 71 Recovered ID: 71
True
Article Author:  636
****************************CR Affilitations found:***************************** 
 [(1062, 'ISIS Pulsed Neutron and Muon Facility, Science and Technology Facilities Council, Rutherford Appleton Laboratory, Harwell Science and In

Assigned ID: 8 Recovered ID: 8
True
Article Author:  641
****************************CR Affilitations found:***************************** 
 [(1070, 'School of Chemistry, University of Southampton, University Road, Southampton SO17 1BJ, U.K.', 641, 336, '2020-09-23 10:49:49.688359', '2020-10-15 13:00:02.977694'), (1071, 'UK Catalysis Hub, Research Complex at Harwell, Science and Technology Facilities Council, Rutherford Appleton Laboratory, Harwell Science and Innovation Campus, Oxon OX11 0QX, U.K.', 641, 337, '2020-09-23 10:49:49.708168', '2020-10-15 13:00:03.056859')]
***********************Check if CR lines are one liners:************************
School of Chemistry, University of Southampton, University Road, Southampton SO17 1BJ, U.K. True
UK Catalysis Hub, Research Complex at Harwell, Science and Technology Facilities Council, Rutherford Appleton Laboratory, Harwell Science and Innovation Campus, Oxon OX11 0QX, U.K. True
*******************************verify one liners************

institution = 'University of Southampton' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
************************** Update Author Affiliation ***************************
Update ID: 604 with values: {'institution': 'University of Southampton', 'school': 'School of Chemistry', 'department': '', 'faculty': '', 'work_group': '', 'country': 'United Kingdom', 'address': 'Southampton'}
Updating 604 {'institution': 'University of Southampton', 'school': 'School of Chemistry', 'department': '', 'faculty': '', 'work_group': '', 'country': 'United Kingdom', 'address': 'Southampton'}
{'article_author_id': 0, 'name': 'School of Chemistry, University of Southampton', 'short_name': 'University of Southampton', 'add_01': 'Southampton', 'add_02': None, 'add_03': None, 'add_04': None, 'country': 'United Kingdom', 'affiliation_id': 166, 'created_at': '2024-08-06 16:36:22', 'updated_at': '2024-08-06 16:36:22'}
updating au

institution = 'University College London' AND school IS NULL AND department = 'Department of Chemistry' AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
Assigned ID: 114 in recoverd IDs list: [114, 67]
institution = 'UK Catalysis Hub' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country IS NULL
Assigned ID: 67 in recoverd IDs list: [114, 67]
Article Author:  660
****************************CR Affilitations found:***************************** 
 [(1111, 'Department of Chemistry', 660, 750, '2020-09-23 10:50:41.510935', '2020-10-15 15:03:47.061290'), (1112, 'University College London', 660, 750, '2020-09-23 10:50:41.531515', '2020-10-15 15:03:47.103314'), (1113, 'London', 660, 750, '2020-09-23 10:50:41.556726', '2020-10-15 15:03:47.146427'), (1114, 'UK', 660, 750, '2020-09-23 10:50:41.576890', '2020-10-15 15:03:47.181332'), (1115, 'UK Catalysis Hub', 660, 751, '2020-09-23 10:50:41.595892', '2020-10-15 15:03:47.252856')]


Assigned ID: 166 Recovered ID: 166
True
Article Author:  776
****************************CR Affilitations found:***************************** 
 [(1476, 'UOP LLC, a Honeywell Company, Des Plaines, IL 60017, USA', 776, 205, '2020-09-23 11:42:54.195158', '2020-10-14 11:25:41.507757')]
***********************Check if CR lines are one liners:************************
UOP LLC, a Honeywell Company, Des Plaines, IL 60017, USA True
*******************************verify one liners********************************
institution = 'UOP LLC, A Honeywell Company' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United States of America'
(1476, 'UOP LLC, a Honeywell Company, Des Plaines, IL 60017, USA', 776, 205, '2020-09-23 11:42:54.195158', '2020-10-14 11:25:41.507757')
Assigned ID: 397 Recovered ID: 397
True
Article Author:  777
****************************CR Affilitations found:***************************** 
 [(1477, 'UOP LLC, a Honeywell Company, De

Assigned ID: 114 Recovered ID: 114
True
Article Author:  785
****************************CR Affilitations found:***************************** 
 [(1501, 'UK Catalysis Hub, Research Complex at Harwell, Rutherford Appleton Laboratory, Harwell Oxon, Didcot OX11 0FA, United Kingdom', 785, 344, '2020-09-23 11:43:52.395197', '2020-10-15 13:00:24.977973'), (1502, 'Department of Chemistry, University College London, 20 Gordon Street, London WC1H 0AJ, United Kingdom', 785, 345, '2020-09-23 11:43:52.416318', '2020-10-15 13:00:25.056067')]
***********************Check if CR lines are one liners:************************
UK Catalysis Hub, Research Complex at Harwell, Rutherford Appleton Laboratory, Harwell Oxon, Didcot OX11 0FA, United Kingdom True
Department of Chemistry, University College London, 20 Gordon Street, London WC1H 0AJ, United Kingdom True
*******************************verify one liners********************************
institution = 'UK Catalysis Hub' AND school IS NULL AND department 

institution = 'UK Catalysis Hub' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(1511, 'UK Catalysis Hub, Research Complex at Harwell, Rutherford Appleton Laboratory, Harwell Oxon, Didcot OX11 0FA, United Kingdom', 791, 555, '2020-09-23 11:43:52.771354', '2020-10-15 13:15:44.394870')
Assigned ID: 67 Recovered ID: 67
True
institution = 'University College London' AND school IS NULL AND department = 'Department of Chemistry' AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(1512, 'Department of Chemistry, University College London, 20 Gordon Street, London WC1H 0AJ, United Kingdom', 791, 556, '2020-09-23 11:43:52.789872', '2020-10-15 13:15:44.485267')
Assigned ID: 114 Recovered ID: 114
True
institution = 'Sohag University' AND school IS NULL AND department = 'Chemistry Department' AND faculty = 'Faculty of Science' AND work_group IS NULL AND country = 'Egypt'
(1513, 'Chemistry Department, Facult

institution = 'Lehigh University' AND school IS NULL AND department = 'Department of Materials Science and Engineering' AND faculty IS NULL AND work_group IS NULL AND country = 'United States of America'
(1526, 'Department of Materials Science and Engineering Lehigh University, 5 East Packer Avenue, Bethlehem, PA 18015, USA', 800, 5, '2020-09-23 11:44:45.483922', '2020-10-14 11:25:10.969501')
Assigned ID: 42 Recovered ID: 42
True
Article Author:  801
****************************CR Affilitations found:***************************** 
 [(1527, 'UK Catalysis Hub, Research Complex at Harwell (RCaH), Rutherford Appleton Laboratory, Harwell, Oxon OX11 0FA, UK', 801, 366, '2020-09-23 11:44:45.545807', '2020-10-15 13:00:50.870281'), (1528, 'Department of Chemistry UCL, 20 Gordon St., London WC1H 0AJ, UK', 801, 367, '2020-09-23 11:44:45.567968', '2020-10-15 13:00:50.949771')]
***********************Check if CR lines are one liners:************************
UK Catalysis Hub, Research Complex at Har

Assigned ID: 217 Recovered ID: 217
True
institution = 'Beijing Forestry University' AND school = 'College of Environmental Science and Engineering' AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'Peoples Republic of China'
(1540, 'College of Environmental Science and Engineering, Beijing Forestry University, 35 Qinghua East Road, Haidian District, Beijing 100083, China', 808, 455, '2020-09-23 11:45:15.187286', '2020-10-15 13:04:15.595036')
Assigned ID: 6 Recovered ID: 6
True
Article Author:  809
****************************CR Affilitations found:***************************** 
 [(1541, 'Center for Catalysis, Department of Chemistry and Center of Excellence for Innovation in Chemistry, Faculty of Science, Mahidol University, 272 Rama VI Road, Thung Phayathai, Ratchathewi, Bangkok 10400, Thailand', 809, 209, '2020-09-23 11:45:15.233338', '2020-10-14 11:25:42.000546')]
***********************Check if CR lines are one liners:************************
Center f

Assigned ID: 8 Recovered ID: 8
True
Article Author:  826
****************************CR Affilitations found:***************************** 
 [(1554, 'UK Catalysis Hub; Research Complex at Harwell; Oxon OX11 0FA UK', 826, 380, '2020-09-23 11:47:21.856136', '2020-10-15 13:02:15.571614'), (1555, 'Department of Chemistry; University College London; Gordon Street London WC1H 0AJ UK', 826, 381, '2020-09-23 11:47:21.893920', '2020-10-15 13:02:15.663097')]
***********************Check if CR lines are one liners:************************
UK Catalysis Hub; Research Complex at Harwell; Oxon OX11 0FA UK True
Department of Chemistry; University College London; Gordon Street London WC1H 0AJ UK True
*******************************verify one liners********************************
institution = 'UK Catalysis Hub' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(1554, 'UK Catalysis Hub; Research Complex at Harwell; Oxon OX11 0FA UK', 826,

****************************CR Affilitations found:***************************** 
 [(1568, 'Department of Materials Science and Engineering, Lehigh University, 5 East Packer Avenue, Bethlehem, Pennsylvania 18015-3195, United States', 842, 241, '2020-09-23 11:47:54.529485', '2020-10-14 11:29:05.656000')]
***********************Check if CR lines are one liners:************************
Department of Materials Science and Engineering, Lehigh University, 5 East Packer Avenue, Bethlehem, Pennsylvania 18015-3195, United States True
*******************************verify one liners********************************
institution = 'Lehigh University' AND school IS NULL AND department = 'Department of Materials Science and Engineering' AND faculty IS NULL AND work_group IS NULL AND country = 'United States of America'
(1568, 'Department of Materials Science and Engineering, Lehigh University, 5 East Packer Avenue, Bethlehem, Pennsylvania 18015-3195, United States', 842, 241, '2020-09-23 11:47:54.529

****************************CR Affilitations found:***************************** 
 [(1585, 'UK Catalysis Hub, Research Complex at Harwell, Rutherford Appleton Laboratory, Harwell Oxon, Didcot, OX11 0FA, United Kingdom', 853, 368, '2020-09-23 11:48:55.114742', '2020-10-15 13:00:51.414949'), (1586, 'Department of Chemistry, University College London, 20 Gordon Street, London WC1H 0AJ, United Kingdom', 853, 369, '2020-09-23 11:48:55.143470', '2020-10-15 13:00:51.489875')]
***********************Check if CR lines are one liners:************************
UK Catalysis Hub, Research Complex at Harwell, Rutherford Appleton Laboratory, Harwell Oxon, Didcot, OX11 0FA, United Kingdom True
Department of Chemistry, University College London, 20 Gordon Street, London WC1H 0AJ, United Kingdom True
*******************************verify one liners********************************
institution = 'UK Catalysis Hub' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND coun

institution = 'UK Catalysis Hub' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(1597, 'UK Catalysis Hub, Research Complex at Harwell, Rutherford Appleton Laboratory, Harwell Oxon, Didcot, OX11 0FA, United Kingdom', 862, 354, '2020-09-23 11:48:55.844031', '2020-10-15 13:00:43.418968')
Assigned ID: 67 Recovered ID: 67
True
institution = 'Cardiff University' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group = 'Cardiff Catalysis Institute' AND country = 'United Kingdom'
(1598, 'Cardiff Catalysis Institute, School of Chemistry, Cardiff University, Cardiff, CF10 3AT, United Kingdom', 862, 355, '2020-09-23 11:48:55.875456', '2020-10-15 13:00:43.497790')
Assigned ID: 8 Recovered ID: 8
True
Article Author:  863
****************************CR Affilitations found:***************************** 
 [(1599, 'Dipartimento di Chimica, Università degli Studi di Milano, via Golgi 19, 20133 Mila

institution = 'Cardiff University' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group = 'Cardiff Catalysis Institute' AND country = 'United Kingdom'
Assigned ID: 8 in recoverd IDs list: [8]
Article Author:  883
****************************CR Affilitations found:***************************** 
 [(1631, 'Cardiff Catalysis Institute', 883, 698, '2020-09-23 11:50:01.178490', '2020-10-15 14:47:51.699397'), (1632, 'School of Chemistry', 883, 698, '2020-09-23 11:50:01.212549', '2020-10-15 14:47:51.736528'), (1633, 'Cardiff University', 883, 698, '2020-09-23 11:50:01.255663', '2020-10-15 14:47:51.785005'), (1634, 'Cardiff, UK', 883, 698, '2020-09-23 11:50:01.294064', '2020-10-15 14:47:51.820885')]
***********************Check if CR lines are one liners:************************
Cardiff Catalysis Institute False
School of Chemistry False
Cardiff University False
Cardiff, UK True
verify multiline affi
institution = 'Cardiff University' AND school = 'School

****************************CR Affilitations found:***************************** 
 [(1654, 'Clarendon Laboratory, Department of Physics, University of Oxford, Oxford OX1 3PU, United Kingdom', 914, 224, '2020-09-23 11:53:18.331257', '2020-10-14 11:25:44.242888')]
***********************Check if CR lines are one liners:************************
Clarendon Laboratory, Department of Physics, University of Oxford, Oxford OX1 3PU, United Kingdom True
*******************************verify one liners********************************
institution = 'University of Oxford' AND school IS NULL AND department = 'Department of Physics' AND faculty IS NULL AND work_group = 'Clarendon Laboratory' AND country = 'United Kingdom'
(1654, 'Clarendon Laboratory, Department of Physics, University of Oxford, Oxford OX1 3PU, United Kingdom', 914, 224, '2020-09-23 11:53:18.331257', '2020-10-14 11:25:44.242888')
Assigned ID: 161 Recovered ID: 161
True
Article Author:  919
****************************CR Affilitations 

Assigned ID: None not in recoverd IDs list: [67]
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
Problems with:
 924 924
[[{'institution': 'UK Catalysis Hub', 'school': '', 'department': '', 'faculty': '', 'work_group': 'Cardiff Catalysis Institute', 'country': 'United Kingdom', 'address': 'The, Research Complex at Harwell, Rutherford Appleton Laboratory'}, [1677, 1678, 1679, 1680, 1681]]]
verifying affiliations for article author 924
Article author affiliations: 1 [849]
Parsed article author affiliations: 1
institution = 'UK Catalysis Hub' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group = 'Cardiff Catalysis Institute' AND country = 'United Kingdom'
Affi does not exist
{'institution': 'UK Catalysis Hub', 'school': '', 'department': '', 'faculty': '', 'work_group': 'Cardiff Catalysis Institute', 'country': 'United Kingdom', 'address': 'The, Research Complex at Harwell, Rutherford Appleton Laboratory'}
Article Author:  925
***

institution = 'Aarhus University' AND school = 'School of Chemical Engineering and Technology' AND department = 'Department of Physics and Astronomy' AND faculty IS NULL AND work_group = 'Interdisciplinary Nanoscience Center (iNANO)' AND country = 'Denmark'
Assigned ID: None not in recoverd IDs list: [3, 23]
institution = 'Harbin Institute of Technology' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country IS NULL
Assigned ID: 23 in recoverd IDs list: [3, 23]
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
Problems with:
 934 934
[[{'institution': 'Aarhus University', 'school': 'School of Chemical Engineering and Technology', 'department': 'Department of Physics and Astronomy', 'faculty': '', 'work_group': 'Interdisciplinary Nanoscience Center (iNANO)', 'country': 'Denmark', 'address': 'and, DK-8000 Aarhus C'}, [1718, 1719, 1720, 1721]], [{'institution': 'Harbin Institute of Technology', 'school': '', 'departm

institution = 'Aarhus University' AND school IS NULL AND department = 'Department of Physics and Astronomy' AND faculty IS NULL AND work_group = 'Interdisciplinary Nanoscience Center (iNANO)' AND country = 'Denmark'
Assigned ID: 3 in recoverd IDs list: [3]
Article Author:  941
****************************CR Affilitations found:***************************** 
 [(1744, 'Cardiff Catalysis Institute', 941, 724, '2020-09-23 11:55:04.350453', '2020-10-15 15:03:40.779332'), (1745, 'School of Chemistry', 941, 724, '2020-09-23 11:55:04.387986', '2020-10-15 15:03:40.820773'), (1746, 'Cardiff University', 941, 724, '2020-09-23 11:55:04.418028', '2020-10-15 15:03:40.854119'), (1747, 'Cardiff, UK', 941, 724, '2020-09-23 11:55:04.456377', '2020-10-15 15:03:40.894535'), (1748, 'The UK Catalysis Hub', 941, 725, '2020-09-23 11:55:04.485870', '2020-10-15 15:03:40.983537')]
***********************Check if CR lines are one liners:************************
Cardiff Catalysis Institute False
School of Chemistr

Article Author:  949
****************************CR Affilitations found:***************************** 
 [(1761, 'Diamond Light Source, Harwell Science and innovation Campus, Didcot, Oxon, OX11 0DE, U.K.', 949, 225, '2020-09-23 11:55:29.756141', '2020-10-14 11:25:44.417960')]
***********************Check if CR lines are one liners:************************
Diamond Light Source, Harwell Science and innovation Campus, Didcot, Oxon, OX11 0DE, U.K. True
*******************************verify one liners********************************
institution = 'Diamond Light Source Ltd.' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(1761, 'Diamond Light Source, Harwell Science and innovation Campus, Didcot, Oxon, OX11 0DE, U.K.', 949, 225, '2020-09-23 11:55:29.756141', '2020-10-14 11:25:44.417960')
Assigned ID: 224 Recovered ID: 446
False
Problems with  1761
[[{'institution': 'Diamond Light Source Ltd.', 'school': '', 'department': '',

processing [{'institution': 'Diamond Light Source Ltd.', 'school': '', 'department': '', 'faculty': '', 'work_group': '', 'country': 'United Kingdom', 'address': 'Harwell Science and innovation Campus, Didcot, Oxon, OX11 0DE'}, [1764]]
institution = 'Diamond Light Source Ltd.' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
************************** Update Author Affiliation ***************************
Update ID: 490 with values: {'institution': 'Diamond Light Source Ltd.', 'school': '', 'department': '', 'faculty': '', 'work_group': '', 'country': 'United Kingdom', 'address': 'Harwell Science and innovation Campus, Didcot, Oxon, OX11 0DE'}
Updating 490 {'institution': 'Diamond Light Source Ltd.', 'school': '', 'department': '', 'faculty': '', 'work_group': '', 'country': 'United Kingdom', 'address': 'Harwell Science and innovation Campus, Didcot, Oxon, OX11 0DE'}
{'article_author_id': 0, 'name': 'Diamond Light Source

institution = 'Queen''s University Belfast' AND school = 'School of Chemistry and Chemical Engineering' AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(3141, 'School of Chemistry and Chemical Engineering; Queens University Belfast; David Keir Building Belfast BT9 5AG UK', 981, 1547, '2021-06-29 10:58:20.103195', '2021-06-29 12:03:28.582440')
Assigned ID: 399 Recovered ID: 399
True
institution = 'The University of Manchester' AND school = 'School of Chemical Engineering and Analytical Science' AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(3142, 'School of Chemical Engineering and Analytical Science; The University of Manchester; The Mill Manchester M13 9PL UK', 981, 1548, '2021-06-29 10:58:20.115674', '2021-06-29 12:03:28.632523')
Assigned ID: 84 Recovered ID: 84
True
Article Author:  982
****************************CR Affilitations found:***************************** 
 [(3143, 'School 

****************************CR Affilitations found:***************************** 
 [(3154, 'Departamento\rde Química e Instituto de Materiales y Nanotecnología, Universidad de La Laguna, Avda. Astrofísico Francisco Sánchez\rs/n, 38206 La Laguna,\rTenerife, Spain', 1008, 1578, '2021-06-29 10:58:28.334905', '2021-06-29 12:09:36.877342')]
***********************Check if CR lines are one liners:************************
Tenerife, Spain True,o de Materiales y Nanotecnología, Universidad de La Laguna, Avda. Astrofísico Francisco Sánchez
*******************************verify one liners********************************
institution = 'Universidad de La Laguna' AND school IS NULL AND department = 'Departamento de Química e Instituto de Materiales y Nanotecnología' AND faculty IS NULL AND work_group IS NULL AND country = 'Spain'
(3154, 'Departamento\rde Química e Instituto de Materiales y Nanotecnología, Universidad de La Laguna, Avda. Astrofísico Francisco Sánchez\rs/n, 38206 La Laguna,\rTenerife,

institution = 'Rey Juan Carlos University' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group = 'Chemical and Environmental Engineering Group' AND country = 'Spain'
Assigned ID: 69 in recoverd IDs list: [69]
Article Author:  1163
****************************CR Affilitations found:***************************** 
 [(3466, 'Chemical and Environmental Engineering Group', 1163, 1699, '2021-06-29 10:58:53.619132', '2021-06-29 12:10:31.485051'), (3467, 'Rey Juan Carlos University', 1163, 1699, '2021-06-29 10:58:53.632394', '2021-06-29 12:10:31.507142'), (3468, 'Móstoles', 1163, 1699, '2021-06-29 10:58:53.648937', '2021-06-29 12:10:31.533180'), (3469, 'Spain', 1163, 1699, '2021-06-29 10:58:53.686147', '2021-06-29 12:10:31.554559'), (3470, 'IMDEA Energy Institute', 1163, 1700, '2021-06-29 10:58:53.705416', '2021-06-29 12:10:31.605347')]
***********************Check if CR lines are one liners:************************
Chemical and Environmental Engineering Group False
Rey

institution = 'Cardiff University' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group = 'Cardiff Catalysis Institute' AND country IS NULL
Assigned ID: 8 in recoverd IDs list: [8]
Article Author:  1553
****************************CR Affilitations found:***************************** 
 [(4150, 'Cardiff Catalysis Institute', 1553, 1938, '2021-06-29 10:59:58.748108', '2021-06-29 12:15:49.579459'), (4151, 'School of Chemistry', 1553, 1938, '2021-06-29 10:59:58.771799', '2021-06-29 12:15:49.602955'), (4152, 'Cardiff University', 1553, 1938, '2021-06-29 10:59:58.791770', '2021-06-29 12:15:49.633861'), (4153, 'Main Building', 1553, 1938, '2021-06-29 10:59:58.808125', '2021-06-29 12:15:49.647799'), (4154, 'Park Place', 1553, 1938, '2021-06-29 10:59:58.828900', '2021-06-29 12:15:49.676359')]
***********************Check if CR lines are one liners:************************
Cardiff Catalysis Institute False
School of Chemistry False
Cardiff University False


institution = 'Cardiff University' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group = 'Cardiff Catalysis Institute' AND country = 'United Kingdom'
Assigned ID: 8 in recoverd IDs list: [8]
Article Author:  1562
****************************CR Affilitations found:***************************** 
 [(4195, 'Cardiff Catalysis Institute', 1562, 2003, '2021-06-29 11:00:01.184875', '2021-06-29 12:17:01.265341'), (4196, 'School of Chemistry', 1562, 2003, '2021-06-29 11:00:01.200278', '2021-06-29 12:17:01.289788'), (4197, 'Cardiff University', 1562, 2003, '2021-06-29 11:00:01.223769', '2021-06-29 12:17:01.308659'), (4198, 'Cardiff', 1562, 2003, '2021-06-29 11:00:01.244824', '2021-06-29 12:17:01.330965'), (4199, 'CF10 3AT UK', 1562, 2003, '2021-06-29 11:00:01.272220', '2021-06-29 12:17:01.360817')]
***********************Check if CR lines are one liners:************************
Cardiff Catalysis Institute False
School of Chemistry False
Cardiff University 

****************************CR Affilitations found:***************************** 
 [(4222, 'Cardiff Catalysis Institute, School of Chemistry, Cardiff University, Cardiff, CF10 3AT, U.K.', 1584, 2049, '2021-06-29 11:00:02.757642', '2021-06-29 12:17:07.708865')]
***********************Check if CR lines are one liners:************************
Cardiff Catalysis Institute, School of Chemistry, Cardiff University, Cardiff, CF10 3AT, U.K. True
*******************************verify one liners********************************
institution = 'Cardiff University' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group = 'Cardiff Catalysis Institute' AND country = 'United Kingdom'
(4222, 'Cardiff Catalysis Institute, School of Chemistry, Cardiff University, Cardiff, CF10 3AT, U.K.', 1584, 2049, '2021-06-29 11:00:02.757642', '2021-06-29 12:17:07.708865')
Assigned ID: 8 Recovered ID: 8
True
Article Author:  1585
****************************CR Affilitations found:**

institution = 'Queen''s University Belfast' AND school = 'School of Biological Sciences' AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
Assigned ID: 59 in recoverd IDs list: [59]
Article Author:  1600
****************************CR Affilitations found:***************************** 
 [(4243, 'Department of Chemical Engineering and Analytical Science, University of Manchester, The Mill, Sackville Street, Manchester M1 3BB, United Kingdom', 1600, 2067, '2021-06-29 11:00:20.962690', '2021-06-29 12:17:09.798385')]
***********************Check if CR lines are one liners:************************
Department of Chemical Engineering and Analytical Science, University of Manchester, The Mill, Sackville Street, Manchester M1 3BB, United Kingdom True
*******************************verify one liners********************************
institution = 'The University of Manchester' AND school IS NULL AND department = 'Department of Chemical Engineering and 

****************************CR Affilitations found:***************************** 
 [(4262, 'Department of Chemical Engineering, University College London, Torrington Place, WC1E 7JE London, United Kingdom', 1615, 1642, '2021-06-29 11:01:08.439828', '2021-06-29 12:09:44.306809')]
***********************Check if CR lines are one liners:************************
Department of Chemical Engineering, University College London, Torrington Place, WC1E 7JE London, United Kingdom True
*******************************verify one liners********************************
institution = 'University College London' AND school IS NULL AND department = 'Department of Chemical Engineering' AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(4262, 'Department of Chemical Engineering, University College London, Torrington Place, WC1E 7JE London, United Kingdom', 1615, 1642, '2021-06-29 11:01:08.439828', '2021-06-29 12:09:44.306809')
Assigned ID: 111 Recovered ID: 111
True
Article Author: 

institution = 'UK Catalysis Hub' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
Assigned ID: 67 in recoverd IDs list: [67]
Article Author:  1634
****************************CR Affilitations found:***************************** 
 [(4288, 'Kathleen Lonsdale Building', 1634, 1569, '2021-06-29 11:01:18.944600', '2021-06-29 11:01:18.944600'), (4289, 'Department of Chemistry', 1634, 1569, '2021-06-29 11:01:18.972699', '2021-06-29 12:07:35.350066'), (4290, 'University College London', 1634, 1569, '2021-06-29 11:01:18.986587', '2021-06-29 12:07:35.368380'), (4291, 'London', 1634, 1569, '2021-06-29 11:01:19.010869', '2021-06-29 12:07:35.390474'), (4292, 'UK', 1634, 1569, '2021-06-29 11:01:19.028891', '2021-06-29 12:07:35.410095')]
***********************Check if CR lines are one liners:************************
Kathleen Lonsdale Building False
Department of Chemistry False
University College London False
London False
UK False
ve

****************************CR Affilitations found:***************************** 
 [(4312, 'School of Chemistry', 1646, 2089, '2021-06-29 11:01:25.847868', '2021-06-29 12:17:14.663326'), (4313, 'University of Southampton', 1646, 2089, '2021-06-29 11:01:25.874255', '2021-06-29 12:17:14.683001'), (4314, 'Southampton', 1646, 2089, '2021-06-29 11:01:25.894105', '2021-06-29 12:17:14.706660'), (4315, 'UK', 1646, 2089, '2021-06-29 11:01:25.910169', '2021-06-29 12:17:14.739655')]
***********************Check if CR lines are one liners:************************
School of Chemistry False
University of Southampton False
Southampton False
UK False
verify multiline affi
institution = 'University of Southampton' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
Assigned ID: 166 in recoverd IDs list: [166]
Article Author:  1647
****************************CR Affilitations found:***************************** 
 [(4316, 'De

institution = 'University of Cape Town' AND school IS NULL AND department = 'Department of Chemical Engineering' AND faculty IS NULL AND work_group = 'Catalysis Institute' AND country = 'South Africa'
Assigned ID: 139 in recoverd IDs list: [139]
Article Author:  1662
****************************CR Affilitations found:***************************** 
 [(4348, 'Department of Chemistry', 1662, 2101, '2021-06-29 11:01:32.794008', '2021-06-29 12:17:16.777609'), (4349, 'University College London', 1662, 2101, '2021-06-29 11:01:32.831037', '2021-06-29 12:17:16.797647'), (4350, 'London', 1662, 2101, '2021-06-29 11:01:32.857007', '2021-06-29 12:17:16.836306'), (4351, 'UK', 1662, 2101, '2021-06-29 11:01:32.874278', '2021-06-29 12:17:16.853667')]
***********************Check if CR lines are one liners:************************
Department of Chemistry False
University College London False
London False
UK False
verify multiline affi
institution = 'University College London' AND school IS NULL AND depa

institution = 'University of Nottingham' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(4371, 'School of Chemistry, University of Nottingham, University Park, Nottingham NG7 2RD, United Kingdom', 1671, 2104, '2021-06-29 11:01:38.090007', '2021-06-29 12:17:17.074896')
Assigned ID: 154 Recovered ID: 154
True
Article Author:  1674
****************************CR Affilitations found:***************************** 
 [(4372, 'Department of Chemistry', 1674, 2105, '2021-06-29 11:01:39.085730', '2021-06-29 12:17:17.163232'), (4373, 'University College London', 1674, 2105, '2021-06-29 11:01:39.107485', '2021-06-29 12:17:17.189706'), (4374, 'London', 1674, 2105, '2021-06-29 11:01:39.132462', '2021-06-29 12:17:17.209956'), (4375, 'UK', 1674, 2105, '2021-06-29 11:01:39.157563', '2021-06-29 12:17:17.227023')]
***********************Check if CR lines are one liners:************************
Department of Chemistry Fal

institution IS NULL AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group = 'Laboratory of Organic Chemistry' AND country = 'Switzerland'
(4396, 'Laboratory of Organic Chemistry, ETH Zurich, 8093 Zurich, Switzerland', 1690, 2112, '2021-06-29 11:01:46.855364', '2021-06-29 12:17:18.216007')
Assigned ID: 17 Recovered ID: 17
True
Article Author:  1691
****************************CR Affilitations found:***************************** 
 [(4397, 'School of Cellular and Molecular Medicine, Biomedical Sciences Building, University of Bristol, Bristol, United Kingdom', 1691, 2113, '2021-06-29 11:01:47.340088', '2021-06-29 12:17:18.285345')]
***********************Check if CR lines are one liners:************************
School of Cellular and Molecular Medicine, Biomedical Sciences Building, University of Bristol, Bristol, United Kingdom True
*******************************verify one liners********************************
institution = 'University of Bristol' AND school = 'S

Assigned ID: 111 in recoverd IDs list: [111]
Article Author:  1708
****************************CR Affilitations found:***************************** 
 [(4433, 'Department of Chemistry, Technical University of Denmark, 2800 Kgs. Lyngby, Denmark', 1708, 2121, '2021-06-29 11:01:56.104192', '2021-06-29 12:17:19.835349')]
***********************Check if CR lines are one liners:************************
Department of Chemistry, Technical University of Denmark, 2800 Kgs. Lyngby, Denmark True
*******************************verify one liners********************************
institution = 'Technical University of Denmark' AND school IS NULL AND department = 'Department of Chemistry' AND faculty IS NULL AND work_group IS NULL AND country = 'Denmark'
(4433, 'Department of Chemistry, Technical University of Denmark, 2800 Kgs. Lyngby, Denmark', 1708, 2121, '2021-06-29 11:01:56.104192', '2021-06-29 12:17:19.835349')
Assigned ID: 79 Recovered ID: 79
True
Article Author:  1709
****************************

institution = 'University of Bristol' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group = 'Centre for Computational Chemistry' AND country = 'United Kingdom'
(4459, 'Centre for Computational Chemistry, School of Chemistry, University of Bristol, Bristol, U.K. BS8 1TS', 1726, 2132, '2021-06-29 11:02:04.845370', '2021-06-29 12:17:21.066926')
Assigned ID: 133 Recovered ID: 133
True
Article Author:  1727
****************************CR Affilitations found:***************************** 
 [(4460, 'Centre for Computational Chemistry, School of Chemistry, University of Bristol, Bristol, U.K. BS8 1TS', 1727, 2133, '2021-06-29 11:02:04.881018', '2021-06-29 12:17:21.115813')]
***********************Check if CR lines are one liners:************************
Centre for Computational Chemistry, School of Chemistry, University of Bristol, Bristol, U.K. BS8 1TS True
*******************************verify one liners********************************
institution = '

****************************CR Affilitations found:***************************** 
 [(4494, 'Centre for High Resolution Transmission Electron Microscopy', 1754, 2151, '2021-06-29 11:02:17.765275', '2021-06-29 12:17:22.831503'), (4495, 'Physics Department', 1754, 2151, '2021-06-29 11:02:17.797670', '2021-06-29 12:17:22.850279'), (4496, 'Nelson Mandela University', 1754, 2151, '2021-06-29 11:02:17.813537', '2021-06-29 12:17:22.884769'), (4497, 'Port Elizabeth', 1754, 2151, '2021-06-29 11:02:17.832345', '2021-06-29 12:17:22.900621'), (4498, 'South Africa', 1754, 2151, '2021-06-29 11:02:17.848972', '2021-06-29 12:17:22.927091')]
***********************Check if CR lines are one liners:************************
Centre for High Resolution Transmission Electron Microscopy False
Physics Department False
Nelson Mandela University False
Port Elizabeth False
South Africa False
verify multiline affi
institution = 'Nelson Mandela University' AND school IS NULL AND department = 'Physics Department' AND

Assigned ID: 72 in recoverd IDs list: [72, 67]
Article Author:  1774
****************************CR Affilitations found:***************************** 
 [(4522, 'Department of Physics, University of Antwerpen, Groenenborgerlaan 171, B-2020 Antwerpen, Belgium', 1774, 2161, '2021-06-29 11:02:27.346760', '2021-06-29 12:17:24.478188')]
***********************Check if CR lines are one liners:************************
Department of Physics, University of Antwerpen, Groenenborgerlaan 171, B-2020 Antwerpen, Belgium True
*******************************verify one liners********************************
institution = 'University of Antwerpen' AND school IS NULL AND department = 'Department of Physics' AND faculty IS NULL AND work_group IS NULL AND country = 'Belgium'
(4522, 'Department of Physics, University of Antwerpen, Groenenborgerlaan 171, B-2020 Antwerpen, Belgium', 1774, 2161, '2021-06-29 11:02:27.346760', '2021-06-29 12:17:24.478188')
Assigned ID: 122 Recovered ID: 122
True
Article Author:  

institution = 'The University of Manchester' AND school = 'School of Chemistry' AND department IS NULL AND faculty = 'Faculty of Science and Engineering' AND work_group IS NULL AND country = 'United Kingdom'
(4548, 'School of ChemistryFaculty of Science and EngineeringUniversity of Manchester 131 Princess Street Manchester M1 7DN UK', 1796, 2180, '2021-06-29 11:02:47.532885', '2021-06-29 12:17:26.562267')
Assigned ID: 86 Recovered ID: 86
True
Article Author:  1799
****************************CR Affilitations found:***************************** 
 [(4549, 'Centre for Computational Chemistry, School of Chemistry, University of Bristol, Bristol, U.K. BS8 1TS', 1799, 2181, '2021-06-29 11:02:49.663627', '2021-06-29 12:17:26.622998')]
***********************Check if CR lines are one liners:************************
Centre for Computational Chemistry, School of Chemistry, University of Bristol, Bristol, U.K. BS8 1TS True
*******************************verify one liners**************************

Assigned ID: 102 in recoverd IDs list: [102]
Article Author:  1821
****************************CR Affilitations found:***************************** 
 [(4572, 'Department of Chemical Engineering, University College London, Torrington Place, WC1E 7JE London, United Kingdom', 1821, 2193, '2021-06-29 11:03:05.635849', '2021-06-29 12:17:27.886881')]
***********************Check if CR lines are one liners:************************
Department of Chemical Engineering, University College London, Torrington Place, WC1E 7JE London, United Kingdom True
*******************************verify one liners********************************
institution = 'University College London' AND school IS NULL AND department = 'Department of Chemical Engineering' AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(4572, 'Department of Chemical Engineering, University College London, Torrington Place, WC1E 7JE London, United Kingdom', 1821, 2193, '2021-06-29 11:03:05.635849', '2021-06-29 12:17:2

institution = 'Keysight Technologies, Inc.' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group = 'Semiconductor Measurement Solutions' AND country = 'United States of America'
Assigned ID: 38 in recoverd IDs list: [38]
Article Author:  1836
****************************CR Affilitations found:***************************** 
 [(4595, 'Department of Chemical Engineering, University College London, Torrington Place, WC1E 7JE London, United Kingdom', 1836, 2199, '2021-06-29 11:03:15.814675', '2021-06-29 12:17:28.867835')]
***********************Check if CR lines are one liners:************************
Department of Chemical Engineering, University College London, Torrington Place, WC1E 7JE London, United Kingdom True
*******************************verify one liners********************************
institution = 'University College London' AND school IS NULL AND department = 'Department of Chemical Engineering' AND faculty IS NULL AND work_group IS NULL AND country = '

Assigned ID: 142 Recovered ID: 142
True
institution = 'UK Catalysis Hub' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(4611, 'UK Catalysis HubResearch Complex at Harwell Rutherford Appleton Laboratory Harwell Oxon Didcot OX11 0FA UK', 1849, 1925, '2021-06-29 11:03:21.055204', '2021-06-29 12:14:45.576715')
Assigned ID: 67 Recovered ID: 67
True
Article Author:  1850
****************************CR Affilitations found:***************************** 
 [(4612, 'School of Chemical Engineering and Analytical ScienceThe University of Manchester Oxford Road Manchester M13 9PL UK', 1850, 1540, '2021-06-29 11:03:21.080130', '2021-06-29 12:03:28.065367')]
***********************Check if CR lines are one liners:************************
School of Chemical Engineering and Analytical ScienceThe University of Manchester Oxford Road Manchester M13 9PL UK True
*******************************verify one liners*****************************

Assigned ID: 224 Recovered ID: 446
False
Problems with  4623
[[{'institution': 'Diamond Light Source Ltd.', 'school': '', 'department': '', 'faculty': '', 'work_group': '', 'country': 'United Kingdom', 'address': 'Harwell Science and Innovation Campus, Didcot, Oxfordshire OX11 0DE'}, [4623]]]
verifying affiliations for article author 1936
Article author affiliations: 1 [2219]
Parsed article author affiliations: 1
processing [{'institution': 'Diamond Light Source Ltd.', 'school': '', 'department': '', 'faculty': '', 'work_group': '', 'country': 'United Kingdom', 'address': 'Harwell Science and Innovation Campus, Didcot, Oxfordshire OX11 0DE'}, [4623]]
institution = 'Diamond Light Source Ltd.' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
************************** Update Author Affiliation ***************************
Update ID: 2219 with values: {'institution': 'Diamond Light Source Ltd.', 'school': '', 'department': 

****************************CR Affilitations found:***************************** 
 [(4634, 'Interdisciplinary\rNanoscience Centre (iNANO), Aarhus University, DK-8000 Aarhus\rC, Denmark', 1947, 2036, '2021-06-29 11:03:31.413413', '2021-06-29 12:17:06.632279'), (4635, 'SynCat@Beijing,\rSynfuelsChina Co. Ltd., Leyuan South\rStreet II, No.1, Yanqi Economic Development Zone C#, Huairou District, Beijing 101407, China', 1947, 2037, '2021-06-29 11:03:31.431923', '2021-06-29 12:17:06.680220')]
***********************Check if CR lines are one liners:************************
C, Denmark Truetre (iNANO), Aarhus University, DK-8000 Aarhus
Street II, No.1, Yanqi Economic Development Zone C#, Huairou District, Beijing 101407, China True
*******************************verify one liners********************************
institution = 'Aarhus University' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'Denmark'
(4634, 'Interdisciplinary\rNanoscience Centr

****************************CR Affilitations found:***************************** 
 [(4650, 'Interdisciplinary\rNanoscience Centre (iNANO), Aarhus University, DK-8000 Aarhus\rC, Denmark', 1957, 2240, '2021-06-29 11:03:31.812033', '2021-06-29 12:17:33.607865'), (4651, 'Department\rof Chemistry, Aarhus University, Langelandsgade 140, DK-8000 Aarhus C, Denmark', 1957, 2239, '2021-06-29 11:03:31.838023', '2021-06-29 12:17:33.525916')]
***********************Check if CR lines are one liners:************************
C, Denmark Truetre (iNANO), Aarhus University, DK-8000 Aarhus
of Chemistry, Aarhus University, Langelandsgade 140, DK-8000 Aarhus C, Denmark True
*******************************verify one liners********************************
institution = 'Aarhus University' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'Denmark'
(4650, 'Interdisciplinary\rNanoscience Centre (iNANO), Aarhus University, DK-8000 Aarhus\rC, Denmark', 1957, 2240, 

************************** Update Author Affiliation ***************************
Update ID: 2220 with values: {'institution': 'Diamond Light Source Ltd.', 'school': '', 'department': '', 'faculty': '', 'work_group': '', 'country': 'United Kingdom', 'address': 'Ltd, Harwell Science &amp; Innovation Campus, Didcot, Oxfordshire OX11 0DE'}
Updating 2220 {'institution': 'Diamond Light Source Ltd.', 'school': '', 'department': '', 'faculty': '', 'work_group': '', 'country': 'United Kingdom', 'address': 'Ltd, Harwell Science &amp; Innovation Campus, Didcot, Oxfordshire OX11 0DE'}
{'article_author_id': 0, 'name': 'Diamond Light Source Ltd.', 'short_name': 'Diamond Light Source Ltd.', 'add_01': 'Ltd, Harwell Science &amp; Innovation Campus, Didcot, Oxfordshire OX11 0DE', 'add_02': None, 'add_03': None, 'add_04': None, 'country': 'United Kingdom', 'affiliation_id': 446, 'created_at': '2024-08-06 16:36:40', 'updated_at': '2024-08-06 16:36:40'}
updating aut_affi_id: 2220 column: name value: Diamon

{'article_author_id': 0, 'name': 'UK Catalysis Hub', 'short_name': 'UK Catalysis Hub', 'add_01': 'Research Complex at Harwell, Didcot, Oxfordshire OX11 0FA', 'add_02': None, 'add_03': None, 'add_04': None, 'country': 'United Kingdom', 'affiliation_id': 67, 'created_at': '2024-08-06 16:36:41', 'updated_at': '2024-08-06 16:36:41'}
updating aut_affi_id: 1904 column: name value: UK Catalysis Hub
updating aut_affi_id: 1904 column: short_name value: UK Catalysis Hub
updating aut_affi_id: 1904 column: add_01 value: Research Complex at Harwell, Didcot, Oxfordshire OX11 0FA
updating aut_affi_id: 1904 column: add_02 value: None
updating aut_affi_id: 1904 column: add_03 value: None
updating aut_affi_id: 1904 column: add_04 value: None
updating aut_affi_id: 1904 column: country value: United Kingdom
updating aut_affi_id: 1904 column: affiliation_id value: 67
updating aut_affi_id: 1904 column: updated_at value: 2024-08-06 16:36:41
processing [{'institution': 'Diamond Light Source Ltd.', 'school': '

institution = 'UK Catalysis Hub' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(4681, 'Rutherford Appleton\rLaboratory, UK Catalysis Hub, Research Complex at Harwell (RCaH), Harwell, Oxon OX11 0FA, United Kingdom', 1993, 2046, '2021-06-29 11:03:33.788068', '2021-06-29 12:17:07.393218')
Assigned ID: 67 Recovered ID: 67
True
institution = 'University College London' AND school IS NULL AND department = 'Department of Chemistry' AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(4682, 'Department\rof Chemistry, University College London, 20 Gordon Street, London WC1H 0AJ, United Kingdom', 1993, 2047, '2021-06-29 11:03:33.811160', '2021-06-29 12:17:07.434301')
Assigned ID: 114 Recovered ID: 114
True
Article Author:  1994
****************************CR Affilitations found:***************************** 
 [(4683, 'Rutherford Appleton\rLaboratory, UK Catalysis Hub, Research Complex at Harwell (RCaH), H

Assigned ID: 148 Recovered ID: 148
True
Article Author:  2005
****************************CR Affilitations found:***************************** 
 [(4696, 'Department of Physics, University of Liverpool, Liverpool L69 7ZE, United Kingdom', 2005, 2410, '2021-06-29 11:03:35.051091', '2021-06-29 14:39:15.119201'), (4697, 'XMaS CRG, European Synchrotron Radiation Facility, 38043 Cedex, Grenoble, France', 2005, 2275, '2021-06-29 11:03:35.068519', '2021-06-29 12:29:40.617659')]
***********************Check if CR lines are one liners:************************
Department of Physics, University of Liverpool, Liverpool L69 7ZE, United Kingdom True
XMaS CRG, European Synchrotron Radiation Facility, 38043 Cedex, Grenoble, France True
*******************************verify one liners********************************
institution = 'University of Liverpool' AND school IS NULL AND department = 'Department of Physics' AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(4696, 'Departme

institution = 'University of Southampton' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
Assigned ID: 171 in recoverd IDs list: [171]
Article Author:  2032
****************************CR Affilitations found:***************************** 
 [(4724, 'Faculty of Natural and Environmental Sciences', 2032, 2285, '2021-06-29 11:03:37.446721', '2021-06-29 11:03:37.446721'), (4725, 'School of Chemistry', 2032, 2285, '2021-06-29 11:03:37.476018', '2021-06-29 12:29:41.791679'), (4726, 'University of Southampton', 2032, 2285, '2021-06-29 11:03:37.505936', '2021-06-29 12:29:41.819126'), (4727, 'UK', 2032, 2285, '2021-06-29 11:03:37.523703', '2021-06-29 12:29:41.840370')]
***********************Check if CR lines are one liners:************************
Faculty of Natural and Environmental Sciences False
School of Chemistry False
University of Southampton False
UK False
verify multiline affi
institution = 'University of Southampton' 

Assigned ID: 8 in recoverd IDs list: [8]
Article Author:  2041
****************************CR Affilitations found:***************************** 
 [(4759, 'Cardiff Catalysis Institute', 2041, 2292, '2021-06-29 11:03:39.890516', '2021-06-29 12:29:43.625380'), (4760, 'Cardiff University', 2041, 2292, '2021-06-29 11:03:39.915720', '2021-06-29 12:29:43.683758'), (4761, 'UK', 2041, 2292, '2021-06-29 11:03:39.933093', '2021-06-29 12:29:43.705956')]
***********************Check if CR lines are one liners:************************
Cardiff Catalysis Institute False
Cardiff University False
UK False
verify multiline affi
institution = 'Cardiff University' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group = 'Cardiff Catalysis Institute' AND country = 'United Kingdom'
Assigned ID: 8 in recoverd IDs list: [8]
Article Author:  2042
****************************CR Affilitations found:***************************** 
 [(4762, 'Cardiff Catalysis Institute', 2042, 2293, '2021-06-29

****************************CR Affilitations found:***************************** 
 [(4806, 'Centro de Química Estrutural', 2064, 2419, '2021-06-29 11:03:42.018587', '2021-06-29 14:55:47.769803'), (4807, 'Departamento de Engenharia Química', 2064, 2419, '2021-06-29 11:03:42.035578', '2021-06-29 14:55:47.786514'), (4808, 'Instituto Superior Técnico', 2064, 2419, '2021-06-29 11:03:42.061656', '2021-06-29 14:55:47.811157'), (4809, 'Universidade de Lisboa', 2064, 2419, '2021-06-29 11:03:42.084311', '2021-06-29 14:55:47.835797'), (4810, '1049-001 Lisboa', 2064, 2419, '2021-06-29 11:03:42.099679', '2021-06-29 14:55:47.869476')]
***********************Check if CR lines are one liners:************************
Centro de Química Estrutural False
Departamento de Engenharia Química False
Instituto Superior Técnico False
Universidade de Lisboa False
1049-001 Lisboa False
verify multiline affi
institution = 'Universidade de Lisboa' AND school IS NULL AND department IS NULL AND faculty IS NULL AND wor

institution = 'The University of Edinburgh' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group = 'EaStCHEM' AND country = 'United Kingdom'
Assigned ID: 82 in recoverd IDs list: [82]
Article Author:  2074
****************************CR Affilitations found:***************************** 
 [(4850, 'Defence Science Technology Laboratory (DSTL)', 2074, 2423, '2021-06-29 11:03:44.172350', '2021-06-29 14:55:48.568418'), (4851, 'Salisbury', 2074, 2423, '2021-06-29 11:03:44.195433', '2021-06-29 14:55:48.589743'), (4852, 'UK', 2074, 2423, '2021-06-29 11:03:44.223611', '2021-06-29 14:55:48.616171')]
***********************Check if CR lines are one liners:************************
Defence Science Technology Laboratory (DSTL) True
Salisbury False
UK False
verify multiline affi
institution = 'Defence Science Technology Laboratory ' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
Assigned ID: 2

institution = 'University College London' AND school IS NULL AND department = 'Department of Chemistry' AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
Assigned ID: 114 in recoverd IDs list: [114, 67]
institution = 'UK Catalysis Hub' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country IS NULL
Assigned ID: 67 in recoverd IDs list: [114, 67]
Article Author:  2083
****************************CR Affilitations found:***************************** 
 [(4890, 'UNILAB, State Key Lab of Chemical Engineering, School of Chemical Engineering', 2083, 435, '2021-06-29 11:03:47.017714', '2021-06-29 11:03:47.017714'), (4891, 'East China University of Science and Technology', 2083, 435, '2021-06-29 11:03:47.040971', '2021-06-29 12:34:17.990869'), (4892, 'Shanghai 200237', 2083, 435, '2021-06-29 11:03:47.057496', '2021-06-29 12:34:18.014088'), (4893, 'P. R. China', 2083, 435, '2021-06-29 11:03:47.074415', '2021-06-29 12:34:18.042522'),

institution = 'East China University of Science and Technology' AND school = 'School of Chemical Engineering' AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'Peoples Republic of China'
Assigned ID: 435 in recoverd IDs list: [435]
Article Author:  2091
****************************CR Affilitations found:***************************** 
 [(4924, 'EaStCHEM School of Chemistry', 2091, 1790, '2021-06-29 11:03:48.492323', '2021-06-29 12:14:32.705388'), (4925, 'Joseph Black Building', 2091, 1790, '2021-06-29 11:03:48.514252', '2021-06-29 12:14:32.727417'), (4926, "The King's Buildings", 2091, 1790, '2021-06-29 11:03:48.528377', '2021-06-29 12:14:32.750709'), (4927, 'The University of Edinburgh', 2091, 1790, '2021-06-29 11:03:48.550543', '2021-06-29 12:14:32.771400'), (4928, 'Edinburgh', 2091, 1790, '2021-06-29 11:03:48.565443', '2021-06-29 12:14:32.786129')]
***********************Check if CR lines are one liners:************************
EaStCHEM School of Chemis

institution = 'Heriot-Watt University' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group = 'Institute of Chemical Sciences' AND country = 'United Kingdom'
Assigned ID: 24 in recoverd IDs list: [24]
Article Author:  2099
****************************CR Affilitations found:***************************** 
 [(4962, 'School of Chemistry', 2099, 1703, '2021-06-29 11:03:50.349299', '2021-06-29 12:10:32.000741'), (4963, 'University of Bristol', 2099, 1703, '2021-06-29 11:03:50.376758', '2021-06-29 12:10:32.034395'), (4964, "Cantock's Close", 2099, 1703, '2021-06-29 11:03:50.400573', '2021-06-29 12:10:32.057105'), (4965, 'Bristol', 2099, 1703, '2021-06-29 11:03:50.413933', '2021-06-29 12:10:32.073806'), (4966, 'UK', 2099, 1703, '2021-06-29 11:03:50.436126', '2021-06-29 12:10:32.099182')]
***********************Check if CR lines are one liners:************************
School of Chemistry False
University of Bristol False
Cantock's Close False
Bristol False
UK False
verif

****************************CR Affilitations found:***************************** 
 [(5006, 'EaStCHEM School of Chemistry', 2108, 2327, '2021-06-29 11:03:52.107844', '2021-06-29 12:34:21.362174'), (5007, 'University of Edinburgh', 2108, 2327, '2021-06-29 11:03:52.133853', '2021-06-29 12:34:21.384139'), (5008, "The King's Buildings", 2108, 2327, '2021-06-29 11:03:52.149895', '2021-06-29 12:34:21.414580'), (5009, 'Edinburgh', 2108, 2327, '2021-06-29 11:03:52.174297', '2021-06-29 12:34:21.431256'), (5010, 'UK', 2108, 2327, '2021-06-29 11:03:52.197768', '2021-06-29 12:34:21.455378')]
***********************Check if CR lines are one liners:************************
EaStCHEM School of Chemistry True
University of Edinburgh False
The King's Buildings False
Edinburgh False
UK False
verify multiline affi
institution = 'The University of Edinburgh' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group = 'EaStCHEM' AND country = 'United Kingdom'
Assigned ID: 8

****************************CR Affilitations found:***************************** 
 [(2050, 'University of ManchesterUK', 2453, 1054, '2020-10-17 15:59:20.060107', '2020-10-19 12:45:45.500278')]
***********************Check if CR lines are one liners:************************
University of ManchesterUK True
*******************************verify one liners********************************
institution = 'The University of Manchester' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(2050, 'University of ManchesterUK', 2453, 1054, '2020-10-17 15:59:20.060107', '2020-10-19 12:45:45.500278')
Assigned ID: 88 Recovered ID: 88
True
Article Author:  2454
****************************CR Affilitations found:***************************** 
 [(2051, 'Johnson Matthey Technology CentreUK', 2454, 1066, '2020-10-17 15:59:20.121159', '2020-10-19 12:45:47.812225')]
***********************Check if CR lines are one liners:***********************

Assigned ID: 67 in recoverd IDs list: [114, 67]
Article Author:  2481
****************************CR Affilitations found:***************************** 
 [(2106, 'Centre for Advanced Materials and Industrial Chemistry (CAMIC)', 2481, 1116, '2020-10-19 09:31:04.525151', '2020-10-19 13:08:54.233079'), (2107, 'School of Science', 2481, 1116, '2020-10-19 09:31:04.564442', '2020-10-19 13:08:54.258810'), (2108, 'RMIT University', 2481, 1116, '2020-10-19 09:31:04.594223', '2020-10-19 13:08:54.278907'), (2109, 'Melbourne', 2481, 1116, '2020-10-19 09:31:04.633759', '2020-10-19 13:08:54.300067'), (2110, 'Australia', 2481, 1116, '2020-10-19 09:31:04.664906', '2020-10-19 12:59:32.205354')]
***********************Check if CR lines are one liners:************************
Centre for Advanced Materials and Industrial Chemistry (CAMIC) False
School of Science False
RMIT University False
Melbourne False
Australia False
verify multiline affi
institution = 'RMIT University' AND school = 'School of Science'

institution = 'The University of Manchester' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group = 'Manchester Institute of Biotechnology' AND country = 'United Kingdom'
Assigned ID: 85 in recoverd IDs list: [85]
Article Author:  2493
****************************CR Affilitations found:***************************** 
 [(2150, 'School of Chemistry', 2493, 1150, '2020-10-19 09:45:35.553067', '2020-10-19 13:30:57.858207'), (2151, 'University of Manchester', 2493, 1150, '2020-10-19 09:45:35.593305', '2020-10-19 13:30:57.883582'), (2152, 'Manchester Institute of Biotechnology', 2493, 1150, '2020-10-19 09:45:35.635125', '2020-10-19 13:30:57.903596'), (2153, 'Manchester', 2493, 1150, '2020-10-19 09:45:35.671294', '2020-10-19 13:30:57.932839'), (2154, 'UK', 2493, 1150, '2020-10-19 09:45:35.712606', '2020-10-19 13:30:57.977686')]
***********************Check if CR lines are one liners:************************
School of Chemistry False
University of Manches

****************************CR Affilitations found:***************************** 
 [(2184, 'Cardiff Catalysis Institute', 2503, 1026, '2020-10-19 09:56:34.624106', '2020-10-19 12:37:35.039979'), (2185, 'School of Chemistry', 2503, 1026, '2020-10-19 09:56:34.722046', '2020-10-19 12:37:35.060670'), (2186, 'Cardiff University', 2503, 1026, '2020-10-19 09:56:34.762622', '2020-10-19 12:37:35.083246'), (2187, 'Cardiff', 2503, 1026, '2020-10-19 09:56:34.804039', '2020-10-19 12:37:35.104901'), (2188, 'UK', 2503, 1026, '2020-10-19 09:56:34.860252', '2020-10-19 12:37:35.126148')]
***********************Check if CR lines are one liners:************************
Cardiff Catalysis Institute False
School of Chemistry False
Cardiff University False
Cardiff False
UK False
verify multiline affi
institution = 'Cardiff University' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group = 'Cardiff Catalysis Institute' AND country = 'United Kingdom'
Assigned ID: 8 in rec

(5234, 'Department of Chemical Engineering and Analytical Science, School of Engineering The University of Manchester  Manchester UK', 3090, 2489, '2021-08-24 20:26:44.841334', '2022-05-16 20:00:00')
Assigned ID: 218 Recovered ID: 218
True
Article Author:  3091
****************************CR Affilitations found:***************************** 
 [(5235, 'Department of ChemistryUniversity of OxfordChemistry Research Laboratory Mansfield Road Oxford OX1 3TA UK', 3091, 2490, '2021-10-05 08:51:32.445082', '2022-05-16 20:00:00'), (5236, 'Current address: Institute of Materials Research and Engineering 2 Fusionopolis Way Singapore 138634 Singapore', 3091, 2491, '2021-10-05 08:51:32.467559', '2022-05-16 20:00:00')]
***********************Check if CR lines are one liners:************************
Department of ChemistryUniversity of OxfordChemistry Research Laboratory Mansfield Road Oxford OX1 3TA UK True
Current address: Institute of Materials Research and Engineering 2 Fusionopolis Way Singapore

****************************CR Affilitations found:***************************** 
 [(5249, 'Department of Chemistry, Chemistry Research Laboratory, University of Oxford, 12 Mansfield Road, Oxford OX1 3TA, U.K.', 3104, 2504, '2021-10-05 08:55:52.457803', '2022-05-16 20:00:00')]
***********************Check if CR lines are one liners:************************
Department of Chemistry, Chemistry Research Laboratory, University of Oxford, 12 Mansfield Road, Oxford OX1 3TA, U.K. True
*******************************verify one liners********************************
institution = 'University of Oxford' AND school IS NULL AND department = 'Department of Chemistry' AND faculty IS NULL AND work_group = 'Chemistry Research Laboratory' AND country = 'United Kingdom'
(5249, 'Department of Chemistry, Chemistry Research Laboratory, University of Oxford, 12 Mansfield Road, Oxford OX1 3TA, U.K.', 3104, 2504, '2021-10-05 08:55:52.457803', '2022-05-16 20:00:00')
Assigned ID: 217 Recovered ID: 217
True
Artic

****************************CR Affilitations found:***************************** 
 [(5263, 'Department of Chemistry, Chemistry Research Laboratory, University of Oxford, 12 Mansfield Road, Oxford, OX1 3TA, U.K.', 3118, 2518, '2021-10-05 09:03:25.439182', '2022-05-16 20:00:00')]
***********************Check if CR lines are one liners:************************
Department of Chemistry, Chemistry Research Laboratory, University of Oxford, 12 Mansfield Road, Oxford, OX1 3TA, U.K. True
*******************************verify one liners********************************
institution = 'University of Oxford' AND school IS NULL AND department = 'Department of Chemistry' AND faculty IS NULL AND work_group = 'Chemistry Research Laboratory' AND country = 'United Kingdom'
(5263, 'Department of Chemistry, Chemistry Research Laboratory, University of Oxford, 12 Mansfield Road, Oxford, OX1 3TA, U.K.', 3118, 2518, '2021-10-05 09:03:25.439182', '2022-05-16 20:00:00')
Assigned ID: 217 Recovered ID: 217
True
Ar

****************************CR Affilitations found:***************************** 
 [(5293, 'Department of Chemistry, Chemistry Research Laboratory, 12 Mansfield Road, Oxford, OX1 3TA, U.K.', 3130, 2741, '2021-10-05 09:09:43.802879', '2022-05-17 20:00:00')]
***********************Check if CR lines are one liners:************************
Department of Chemistry, Chemistry Research Laboratory, 12 Mansfield Road, Oxford, OX1 3TA, U.K. True
*******************************verify one liners********************************
institution IS NULL AND school IS NULL AND department = 'Department of Chemistry' AND faculty IS NULL AND work_group = 'Chemistry Research Laboratory' AND country = 'United Kingdom'
(5293, 'Department of Chemistry, Chemistry Research Laboratory, 12 Mansfield Road, Oxford, OX1 3TA, U.K.', 3130, 2741, '2021-10-05 09:09:43.802879', '2022-05-17 20:00:00')
Assigned ID: 217 Recovered ID: 217
True
Article Author:  3131
****************************CR Affilitations found:************

institution = 'University of Oxford' AND school IS NULL AND department = 'Department of Chemistry' AND faculty IS NULL AND work_group = 'Chemistry Research Laboratory' AND country = 'United Kingdom'
(5323, 'Department of Chemistry University of Oxford Chemistry Research Laboratory  Oxford UK', 3142, 2540, '2021-10-05 09:19:41.931038', '2022-05-16 20:00:00')
Assigned ID: 217 Recovered ID: 217
True
Article Author:  3143
****************************CR Affilitations found:***************************** 
 [(5324, 'Department of Chemistry University of Oxford Chemistry Research Laboratory  Oxford UK', 3143, 2541, '2021-10-05 09:19:42.018661', '2022-05-16 20:00:00')]
***********************Check if CR lines are one liners:************************
Department of Chemistry University of Oxford Chemistry Research Laboratory  Oxford UK True
*******************************verify one liners********************************
institution = 'University of Oxford' AND school IS NULL AND department = 'Depar

****************************CR Affilitations found:***************************** 
 [(5338, 'School of Chemistry', 3154, 3031, '2021-10-05 09:23:34.179904', '2021-10-05 09:23:34.179904'), (5339, 'University of Edinburgh', 3154, 3031, '2021-10-05 09:23:34.204799', '2021-10-05 09:23:34.204799'), (5340, 'UK', 3154, 3031, '2021-10-05 09:23:34.252267', '2021-10-05 09:23:34.252267')]
***********************Check if CR lines are one liners:************************
School of Chemistry False
University of Edinburgh False
UK False
verify multiline affi
institution = 'The University of Edinburgh' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
Assigned ID: 83 in recoverd IDs list: [83]
Article Author:  3155
****************************CR Affilitations found:***************************** 
 [(5341, 'Chemistry Research Laboratory', 3155, 3032, '2021-10-05 09:23:34.301535', '2021-10-05 09:23:34.301535'), (5342, 'Univer

****************************CR Affilitations found:***************************** 
 [(5374, 'Chemistry Research Laboratory', 3172, 3038, '2021-10-05 09:29:22.952455', '2021-10-05 09:29:22.952455'), (5375, '12 Mansfield Road', 3172, 3038, '2021-10-05 09:29:22.978005', '2021-10-05 09:29:22.978005'), (5376, 'University of Oxford', 3172, 3038, '2021-10-05 09:29:23.001239', '2021-10-05 09:29:23.001239'), (5377, 'Oxford', 3172, 3038, '2021-10-05 09:29:23.017962', '2021-10-05 09:29:23.017962'), (5378, 'UK', 3172, 3038, '2021-10-05 09:29:23.037403', '2021-10-05 09:29:23.037403')]
***********************Check if CR lines are one liners:************************
Chemistry Research Laboratory False
12 Mansfield Road False
University of Oxford False
Oxford False
UK False
verify multiline affi
institution = 'University of Oxford' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group = 'Chemistry Research Laboratory' AND country = 'United Kingdom'
Assigned ID: 217 in recoverd ID

****************************CR Affilitations found:***************************** 
 [(5407, 'Department of Chemistry, Imperial College London, London SW7 2AZ, United Kingdom', 3182, 2565, '2021-10-05 09:31:43.475546', '2022-05-16 20:00:00')]
***********************Check if CR lines are one liners:************************
Department of Chemistry, Imperial College London, London SW7 2AZ, United Kingdom True
*******************************verify one liners********************************
institution = 'Imperial College London' AND school IS NULL AND department = 'Department of Chemistry' AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(5407, 'Department of Chemistry, Imperial College London, London SW7 2AZ, United Kingdom', 3182, 2565, '2021-10-05 09:31:43.475546', '2022-05-16 20:00:00')
Assigned ID: 29 Recovered ID: 29
True
Article Author:  3183
****************************CR Affilitations found:***************************** 
 [(5408, 'Department of Chemistry, Ch

institution = 'University of Glasgow' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
Assigned ID: 142 in recoverd IDs list: [142]
Article Author:  3216
****************************CR Affilitations found:***************************** 
 [(5487, 'School of Chemistry', 3216, 3058, '2021-10-05 11:28:12.826262', '2021-10-05 11:28:12.826262'), (5488, 'University of Glasgow', 3216, 3058, '2021-10-05 11:28:12.848864', '2021-10-05 11:28:12.848864'), (5489, 'Glasgow', 3216, 3058, '2021-10-05 11:28:12.871307', '2021-10-05 11:28:12.871307'), (5490, 'UK', 3216, 3058, '2021-10-05 11:28:12.890286', '2021-10-05 11:28:12.890286')]
***********************Check if CR lines are one liners:************************
School of Chemistry False
University of Glasgow False
Glasgow False
UK False
verify multiline affi
institution = 'University of Glasgow' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NUL

Assigned ID: 8 Recovered ID: 8
True
Article Author:  3227
****************************CR Affilitations found:***************************** 
 [(5523, 'Cardiff Catalysis Institute, School of Chemistry, Cardiff University, Cardiff, CF10 3AT, U.K.', 3227, 2588, '2021-10-05 11:32:41.195247', '2022-05-16 20:00:00')]
***********************Check if CR lines are one liners:************************
Cardiff Catalysis Institute, School of Chemistry, Cardiff University, Cardiff, CF10 3AT, U.K. True
*******************************verify one liners********************************
institution = 'Cardiff University' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group = 'Cardiff Catalysis Institute' AND country = 'United Kingdom'
(5523, 'Cardiff Catalysis Institute, School of Chemistry, Cardiff University, Cardiff, CF10 3AT, U.K.', 3227, 2588, '2021-10-05 11:32:41.195247', '2022-05-16 20:00:00')
Assigned ID: 8 Recovered ID: 8
True
Article Author:  3228
*********

(5539, 'Rutherford Appleton Laboratory, Research Complex at Harwell, Didcot, Oxfordshire OX11 0FA, U.K.', 3238, 2431, '2021-10-05 11:37:39.603411', '2021-11-23 17:48:56.601047')
Assigned ID: 373 Recovered ID: 373
True
Article Author:  3239
****************************CR Affilitations found:***************************** 
 [(5540, 'Central Laser Facility, Science and Technology Facilities Council, Research Complex at Harwell, Didcot, Oxfordshire OX11 0FA, U.K.', 3239, 2596, '2021-10-05 11:37:39.659225', '2022-05-16 20:00:00')]
***********************Check if CR lines are one liners:************************
Central Laser Facility, Science and Technology Facilities Council, Research Complex at Harwell, Didcot, Oxfordshire OX11 0FA, U.K. True
*******************************verify one liners********************************
institution = 'Central Laser Facility' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(5540, 'Central 

Assigned ID: 67 in recoverd IDs list: [114, 67]
Article Author:  3265
****************************CR Affilitations found:***************************** 
 [(5589, 'School of Materials, University of Manchester, Manchester, Lancashire M13 9PL, UK', 3265, 2865, '2021-10-05 11:50:13.133452', '2021-10-05 11:50:13.133452'), (5590, 'Research Complex at Harwell, Harwell Science and Innovation Campus, Rutherford Appleton Laboratory, Didcot, Oxon, OX11 0FA, UK', 3265, 2983, '2021-10-05 11:50:13.153413', '2021-10-05 11:50:13.153413')]
***********************Check if CR lines are one liners:************************
School of Materials, University of Manchester, Manchester, Lancashire M13 9PL, UK True
Research Complex at Harwell, Harwell Science and Innovation Campus, Rutherford Appleton Laboratory, Didcot, Oxon, OX11 0FA, UK True
*******************************verify one liners********************************
institution = 'The University of Manchester' AND school = 'School of Materials' AND depart

****************************CR Affilitations found:***************************** 
 [(5604, 'Finden Limited  Merchant House, 5 East St Helens Street Abingdon OX14 5EG UK', 3275, 2781, '2021-10-05 12:12:58.759574', '2021-10-05 12:12:58.759574')]
***********************Check if CR lines are one liners:************************
Finden Limited  Merchant House, 5 East St Helens Street Abingdon OX14 5EG UK True
*******************************verify one liners********************************
institution = 'Finden Ltd' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(5604, 'Finden Limited  Merchant House, 5 East St Helens Street Abingdon OX14 5EG UK', 3275, 2781, '2021-10-05 12:12:58.759574', '2021-10-05 12:12:58.759574')
Assigned ID: 277 Recovered ID: 277
True
Article Author:  3276
****************************CR Affilitations found:***************************** 
 [(5605, 'The Faraday Institution Quad One Harwell Science and Inn

institution = 'Coelho Software' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'Australia'
(5608, 'Coelho Software  72 Cedar Street, Wynnum Brisbane Queensland 4178 Australia', 3278, 2783, '2021-10-05 12:12:58.992925', '2021-10-05 12:12:58.992925')
Assigned ID: 304 Recovered ID: 304
True
Article Author:  3279
****************************CR Affilitations found:***************************** 
 [(5609, 'Department of Chemistry University College London  20 Gordon Street London WC1H 0AJ UK', 3279, 2608, '2021-10-05 12:12:59.068541', '2022-05-16 20:00:00')]
***********************Check if CR lines are one liners:************************
Department of Chemistry University College London  20 Gordon Street London WC1H 0AJ UK True
*******************************verify one liners********************************
institution = 'University College London' AND school IS NULL AND department = 'Department of Chemistry' AND faculty IS NULL AND work_gro

institution = 'UK Catalysis Hub' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
Assigned ID: 67 in recoverd IDs list: [67]
Article Author:  3305
****************************CR Affilitations found:***************************** 
 [(5627, 'Johnson Matthey Technology Centre', 3305, 3073, '2021-10-05 12:26:44.844563', '2021-10-05 12:26:44.844563'), (5628, 'Blounts Court Road', 3305, 3073, '2021-10-05 12:26:44.860903', '2021-10-05 12:26:44.860903'), (5629, 'Sonning Common', 3305, 3073, '2021-10-05 12:26:44.886022', '2021-10-05 12:26:44.886022'), (5630, 'Reading', 3305, 3073, '2021-10-05 12:26:44.904403', '2021-10-05 12:26:44.904403'), (5631, 'UK', 3305, 3073, '2021-10-05 12:26:44.925137', '2021-10-05 12:26:44.925137')]
***********************Check if CR lines are one liners:************************
Johnson Matthey Technology Centre False
Blounts Court Road False
Sonning Common False
Reading False
UK False
verify multiline a

{'article_author_id': 0, 'name': 'Diamond Light Source Ltd.', 'short_name': 'Diamond Light Source Ltd.', 'add_01': 'Harwell Science & Innovation Campus, Didcot', 'add_02': None, 'add_03': None, 'add_04': None, 'country': 'United Kingdom', 'affiliation_id': 446, 'created_at': '2024-08-06 16:36:53', 'updated_at': '2024-08-06 16:36:53'}
updating aut_affi_id: 3078 column: name value: Diamond Light Source Ltd.
updating aut_affi_id: 3078 column: short_name value: Diamond Light Source Ltd.
updating aut_affi_id: 3078 column: add_01 value: Harwell Science & Innovation Campus, Didcot
updating aut_affi_id: 3078 column: add_02 value: None
updating aut_affi_id: 3078 column: add_03 value: None
updating aut_affi_id: 3078 column: add_04 value: None
updating aut_affi_id: 3078 column: country value: United Kingdom
updating aut_affi_id: 3078 column: affiliation_id value: 446
updating aut_affi_id: 3078 column: updated_at value: 2024-08-06 16:36:53
Article Author:  3311
****************************CR Affil

Article Author:  3314
****************************CR Affilitations found:***************************** 
 [(5668, 'UK Catalysis Hub, Research Complex at Harwell', 3314, 3082, '2021-10-05 12:26:46.060166', '2021-10-05 12:26:46.060166'), (5669, 'Rutherford Appleton Laboratories', 3314, 3082, '2021-10-05 12:26:46.077634', '2021-10-05 12:26:46.077634'), (5670, 'Harwell Science & Innovation Campus', 3314, 3082, '2021-10-05 12:26:46.094958', '2021-10-05 12:26:46.094958'), (5671, 'Didcot', 3314, 3082, '2021-10-05 12:26:46.124646', '2021-10-05 12:26:46.124646'), (5672, 'UK', 3314, 3082, '2021-10-05 12:26:46.144165', '2021-10-05 12:26:46.144165')]
***********************Check if CR lines are one liners:************************
UK Catalysis Hub, Research Complex at Harwell True
Rutherford Appleton Laboratories False
Harwell Science & Innovation Campus False
Didcot False
UK False
verify multiline affi
institution = 'UK Catalysis Hub' AND school IS NULL AND department IS NULL AND faculty IS NULL AN

institution = 'Cardiff University' AND school = 'School of Chemistry' AND department = 'Department of Chemistry' AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
Assigned ID: None not in recoverd IDs list: [10]
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
Problems with:
 3323 3323
[[{'institution': 'Cardiff University', 'school': 'School of Chemistry', 'department': 'Department of Chemistry', 'faculty': '', 'work_group': '', 'country': 'United Kingdom', 'address': 'Cardiff'}, [5709, 5710, 5711, 5712, 5713]]]
verifying affiliations for article author 3323
Article author affiliations: 1 [2457]
Parsed article author affiliations: 1
institution = 'Cardiff University' AND school = 'School of Chemistry' AND department = 'Department of Chemistry' AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
Affi does not exist
{'institution': 'Cardiff University', 'school': 'School of Chemistry', 'department': 'Departmen

****************************CR Affilitations found:***************************** 
 [(5745, 'Department of Chemical Engineering and Analytical Science, The University of Manchester, The Mill, Sackville Street, Manchester M13 9PL, United Kingdom', 3336, 2630, '2021-10-05 12:34:56.354341', '2022-05-16 20:00:00')]
***********************Check if CR lines are one liners:************************
Department of Chemical Engineering and Analytical Science, The University of Manchester, The Mill, Sackville Street, Manchester M13 9PL, United Kingdom True
*******************************verify one liners********************************
institution = 'The University of Manchester' AND school IS NULL AND department = 'Department of Chemical Engineering and Analytical Science' AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(5745, 'Department of Chemical Engineering and Analytical Science, The University of Manchester, The Mill, Sackville Street, Manchester M13 9PL, United Ki

****************************CR Affilitations found:***************************** 
 [(5762, 'School of Chemistry; University of Southampton; University Road Southampton SO17 1BJ UK', 3345, 2885, '2021-10-05 12:37:21.185571', '2021-10-05 12:37:21.185571'), (5763, 'UK Catalysis Hub, Research Complex at Harwell; Science and Technology Facilities Council Rutherford Appleton Laboratory, Harwell Science and Innovation Campus; Oxon OX11 0QX UK', 3345, 2950, '2021-10-05 12:37:21.213934', '2021-10-05 12:37:21.213934')]
***********************Check if CR lines are one liners:************************
School of Chemistry; University of Southampton; University Road Southampton SO17 1BJ UK True
UK Catalysis Hub, Research Complex at Harwell; Science and Technology Facilities Council Rutherford Appleton Laboratory, Harwell Science and Innovation Campus; Oxon OX11 0QX UK True
*******************************verify one liners********************************
institution = 'University of Southampton' AND sc

Assigned ID: 235 in recoverd IDs list: [235]
Article Author:  3368
****************************CR Affilitations found:***************************** 
 [(5792, 'Department of Chemical Engineering and Analytical Science, School of Engineering, The University of Manchester, Oxford Road, Manchester M13 9PL, United Kingdom', 3368, 2730, '2021-10-05 14:20:54.303311', '2022-05-17 20:00:00')]
***********************Check if CR lines are one liners:************************
Department of Chemical Engineering and Analytical Science, School of Engineering, The University of Manchester, Oxford Road, Manchester M13 9PL, United Kingdom True
*******************************verify one liners********************************
institution = 'The University of Manchester' AND school = 'School of Engineering' AND department = 'Department of Chemical Engineering and Analytical Science' AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(5792, 'Department of Chemical Engineering and Analyt

(5803, 'Chemistry Research Laboratory University of Oxford  12 Mansfield Road Oxford OX1 3TA UK', 3405, 2637, '2021-10-05 14:30:54.756203', '2022-05-16 20:00:00')
Assigned ID: 217 Recovered ID: 217
True
Article Author:  3406
****************************CR Affilitations found:***************************** 
 [(5804, 'Chemistry Research Laboratory University of Oxford  12 Mansfield Road Oxford OX1 3TA UK', 3406, 2638, '2021-10-05 14:30:54.869383', '2022-05-16 20:00:00')]
***********************Check if CR lines are one liners:************************
Chemistry Research Laboratory University of Oxford  12 Mansfield Road Oxford OX1 3TA UK True
*******************************verify one liners********************************
institution = 'University of Oxford' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group = 'Chemistry Research Laboratory' AND country = 'United Kingdom'
(5804, 'Chemistry Research Laboratory University of Oxford  12 Mansfield Road Oxford OX1 3TA 

****************************CR Affilitations found:***************************** 
 [(5815, 'School of Chemistry, Food and PharmacyUniversity of Reading Reading RG6 6AT UK', 3416, 2792, '2021-10-05 14:32:22.557465', '2021-10-05 14:32:22.557465')]
***********************Check if CR lines are one liners:************************
School of Chemistry, Food and PharmacyUniversity of Reading Reading RG6 6AT UK True
*******************************verify one liners********************************
institution = 'University of Reading' AND school = 'School of Chemistry, Food and Pharmacy' AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(5815, 'School of Chemistry, Food and PharmacyUniversity of Reading Reading RG6 6AT UK', 3416, 2792, '2021-10-05 14:32:22.557465', '2021-10-05 14:32:22.557465')
Assigned ID: 307 Recovered ID: 307
True
Article Author:  3417
****************************CR Affilitations found:***************************** 
 [(5816, 'Scho

Assigned ID: 159 Recovered ID: 159
True
Article Author:  3428
****************************CR Affilitations found:***************************** 
 [(5827, 'Inorganic\rChemistry Laboratory,\rDept of Chemistry, University of Oxford, South Parks Road, Oxford OX1 3QR, United Kingdom', 3428, 2653, '2021-11-30 19:25:48.005248', '2022-05-16 20:00:00')]
***********************Check if CR lines are one liners:************************
Dept of Chemistry, University of Oxford, South Parks Road, Oxford OX1 3QR, United Kingdom True
*******************************verify one liners********************************
institution = 'University of Oxford' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group = 'Inorganic Chemistry Laboratory' AND country = 'United Kingdom'
(5827, 'Inorganic\rChemistry Laboratory,\rDept of Chemistry, University of Oxford, South Parks Road, Oxford OX1 3QR, United Kingdom', 3428, 2653, '2021-11-30 19:25:48.005248', '2022-05-16 20:00:00')
Assigned ID: 159 R

****************************CR Affilitations found:***************************** 
 [(5839, 'Cardiff\rCatalysis Institute, School of Chemistry, Main Building, Park Place, Cardiff University, Cardiff, CF10 3AT, U.K.', 3440, 2737, '2021-11-30 20:31:05.300388', '2022-05-17 20:00:00')]
***********************Check if CR lines are one liners:************************
Catalysis Institute, School of Chemistry, Main Building, Park Place, Cardiff University, Cardiff, CF10 3AT, U.K. True
*******************************verify one liners********************************
institution = 'Cardiff University' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group = 'Cardiff Catalysis Institute' AND country = 'United Kingdom'
(5839, 'Cardiff\rCatalysis Institute, School of Chemistry, Main Building, Park Place, Cardiff University, Cardiff, CF10 3AT, U.K.', 3440, 2737, '2021-11-30 20:31:05.300388', '2022-05-17 20:00:00')
Assigned ID: 8 Recovered ID: 8
True
Article Author

****************************CR Affilitations found:***************************** 
 [(5852, 'Inorganic\rChemistry Laboratory,\rDepartment of Chemistry, University of Oxford, South Parks Road, Oxford OX1 3QR, United Kingdom', 3458, 2668, '2021-11-30 20:51:37.385367', '2022-05-16 20:00:00')]
***********************Check if CR lines are one liners:************************
Department of Chemistry, University of Oxford, South Parks Road, Oxford OX1 3QR, United Kingdom True
*******************************verify one liners********************************
institution = 'University of Oxford' AND school IS NULL AND department = 'Department of Chemistry' AND faculty IS NULL AND work_group = 'Inorganic Chemistry Laboratory' AND country = 'United Kingdom'
(5852, 'Inorganic\rChemistry Laboratory,\rDepartment of Chemistry, University of Oxford, South Parks Road, Oxford OX1 3QR, United Kingdom', 3458, 2668, '2021-11-30 20:51:37.385367', '2022-05-16 20:00:00')
Assigned ID: 159 Recovered ID: 159
True
Ar

institution = 'Institute of Materials Research and Engineering' AND school IS NULL AND department = 'Department of Chemistry' AND faculty IS NULL AND work_group IS NULL AND country = 'Singapore'
Affi does not exist
{'institution': 'Institute of Materials Research and Engineering', 'school': '', 'department': 'Department of Chemistry', 'faculty': '', 'work_group': '', 'country': 'Singapore', 'address': 'IMRE, Agency for Science, Agency for Science, Technology and Research, Technology and Research ('}
Article Author:  3503
****************************CR Affilitations found:***************************** 
 [(5905, 'Institute of Materials Research and Engineering (IMRE)', 3503, 3107, '2021-11-30 23:59:25.360089', '2021-11-30 23:59:25.360089'), (5906, 'Agency for Science', 3503, 3107, '2021-11-30 23:59:25.451121', '2021-11-30 23:59:25.451121'), (5907, 'Technology and Research (A*STAR)', 3503, 3107, '2021-11-30 23:59:25.503388', '2021-11-30 23:59:25.503388'), (5908, 'Singapore', 3503, 3107, '

Assigned ID: 160 Recovered ID: 160
True
Article Author:  3512
****************************CR Affilitations found:***************************** 
 [(5928, 'Department of Chemistry', 3512, 3111, '2021-12-01 00:02:22.857129', '2021-12-01 00:02:22.857129'), (5929, 'University of Southampton', 3512, 3111, '2021-12-01 00:02:22.909533', '2021-12-01 00:02:22.909533'), (5930, 'Department of Chemistry', 3512, 3111, '2021-12-01 00:02:22.960907', '2021-12-01 00:02:22.960907'), (5931, 'Southampton', 3512, 3111, '2021-12-01 00:02:23.018499', '2021-12-01 00:02:23.018499'), (5932, 'UK', 3512, 3111, '2021-12-01 00:02:23.068575', '2021-12-01 00:02:23.068575')]
***********************Check if CR lines are one liners:************************
Department of Chemistry False
University of Southampton False
Department of Chemistry False
Southampton False
UK False
verify multiline affi
institution = 'University of Southampton' AND school IS NULL AND department = 'Department of Chemistry' AND faculty IS NULL AND 

institution = 'University of Oxford' AND school IS NULL AND department = 'Department of Chemistry' AND faculty IS NULL AND work_group = 'Inorganic Chemistry Laboratory' AND country = 'United Kingdom'
(5988, 'Inorganic Chemistry Laboratory, Department of ChemistryUniversity of Oxford South Parks Road Oxford OX1 3QR UK', 3544, 2699, '2021-12-04 18:32:51.752785', '2022-05-16 20:00:00')
Assigned ID: 159 Recovered ID: 159
True
Article Author:  3545
****************************CR Affilitations found:***************************** 
 [(5989, 'Inorganic Chemistry Laboratory, Department of ChemistryUniversity of Oxford South Parks Road Oxford OX1 3QR UK', 3545, 2700, '2021-12-04 18:32:52.028451', '2022-05-16 20:00:00')]
***********************Check if CR lines are one liners:************************
Inorganic Chemistry Laboratory, Department of ChemistryUniversity of Oxford South Parks Road Oxford OX1 3QR UK True
*******************************verify one liners********************************
ins

institution = 'Chulalongkorn University' AND school IS NULL AND department = 'Department of Biochemistry' AND faculty = 'Faculty of Science' AND work_group = 'Structural and Computational Biology Research Unit' AND country IS NULL
Assigned ID: 343 in recoverd IDs list: [343]
Article Author:  3603
****************************CR Affilitations found:***************************** 
 [(6146, 'Department of Chemistry and Center of Excellence for Innovation in Chemistry', 3603, 3143, '2021-12-04 18:53:38.766092', '2021-12-04 18:53:38.766092'), (6147, 'Faculty of Science', 3603, 3143, '2021-12-04 18:53:38.858920', '2021-12-04 18:53:38.858920'), (6148, 'Mahidol University', 3603, 3143, '2021-12-04 18:53:38.939809', '2021-12-04 18:53:38.939809'), (6149, 'Bangkok 10400', 3603, 3143, '2021-12-04 18:53:38.969311', '2021-12-04 18:53:38.969311'), (6150, 'Thailand', 3603, 3143, '2021-12-04 18:53:39.019805', '2021-12-04 18:53:39.019805')]
***********************Check if CR lines are one liners:*********

institution = 'University of Bristol' AND school = 'School of Cellular and Molecular Medicine' AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
Assigned ID: 225 in recoverd IDs list: [225]
Article Author:  3611
****************************CR Affilitations found:***************************** 
 [(6186, 'Centre for Computational Chemistry', 3611, 3151, '2021-12-04 18:53:42.361872', '2021-12-04 18:53:42.361872'), (6187, 'School of Chemistry', 3611, 3151, '2021-12-04 18:53:42.415517', '2021-12-04 18:53:42.415517'), (6188, 'University of Bristol', 3611, 3151, '2021-12-04 18:53:42.513992', '2021-12-04 18:53:42.513992'), (6189, "Cantock's Close", 3611, 3151, '2021-12-04 18:53:42.579819', '2021-12-04 18:53:42.579819'), (6190, 'Bristol', 3611, 3151, '2021-12-04 18:53:42.629085', '2021-12-04 18:53:42.629085')]
***********************Check if CR lines are one liners:************************
Centre for Computational Chemistry False
School of Chemistry

(6202, 'Department of Chemistry, KU Leuven, Leuven, Belgium', 3622, 2725, '2021-12-04 18:57:01.460773', '2022-05-16 20:00:00')
Assigned ID: 284 Recovered ID: 284
True
Article Author:  3623
****************************CR Affilitations found:***************************** 
 [(6203, 'Centre for Computational Chemistry, School of Chemistry, University of Bristol, Bristol, United Kingdom', 3623, 2536, '2021-12-04 18:57:01.632841', '2022-05-16 20:00:00')]
***********************Check if CR lines are one liners:************************
Centre for Computational Chemistry, School of Chemistry, University of Bristol, Bristol, United Kingdom True
*******************************verify one liners********************************
institution = 'University of Bristol' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group = 'Centre for Computational Chemistry' AND country = 'United Kingdom'
(6203, 'Centre for Computational Chemistry, School of Chemistry, University

institution = 'University of Bristol' AND school = 'School of Cellular and Molecular Medicine' AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
Assigned ID: 225 in recoverd IDs list: [225]
Article Author:  3637
****************************CR Affilitations found:***************************** 
 [(6249, 'Centre for Computational Chemistry', 3637, 3160, '2021-12-04 19:00:08.007199', '2021-12-04 19:00:08.007199'), (6250, 'School of Chemistry', 3637, 3160, '2021-12-04 19:00:08.117347', '2021-12-04 19:00:08.117347'), (6251, 'University of Bristol', 3637, 3160, '2021-12-04 19:00:08.206440', '2021-12-04 19:00:08.206440'), (6252, 'Bristol BS8 1TS', 3637, 3160, '2021-12-04 19:00:08.306854', '2021-12-04 19:00:08.306854'), (6253, 'UK', 3637, 3160, '2021-12-04 19:00:08.373857', '2021-12-04 19:00:08.373857')]
***********************Check if CR lines are one liners:************************
Centre for Computational Chemistry False
School of Chemistry Fals

Assigned ID: 133 Recovered ID: 133
True
Article Author:  3658
****************************CR Affilitations found:***************************** 
 [(6276, 'School of Biochemistry, University of Bristol, Bristol BS8 1TD, U.K.', 3658, 2549, '2021-12-04 19:05:01.116665', '2022-05-16 20:00:00')]
***********************Check if CR lines are one liners:************************
School of Biochemistry, University of Bristol, Bristol BS8 1TD, U.K. True
*******************************verify one liners********************************
institution = 'University of Bristol' AND school = 'School of Biochemistry' AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(6276, 'School of Biochemistry, University of Bristol, Bristol BS8 1TD, U.K.', 3658, 2549, '2021-12-04 19:05:01.116665', '2022-05-16 20:00:00')
Assigned ID: 132 Recovered ID: 132
True
Article Author:  3659
****************************CR Affilitations found:***************************** 
 [(6277, 'Ce

Assigned ID: 133 Recovered ID: 133
True
Article Author:  3678
****************************CR Affilitations found:***************************** 
 [(6289, 'School of Cellular and Molecular Medicine, University of Bristol, Bristol BS8 1TD, U.K.', 3678, 2552, '2021-12-04 19:08:05.887652', '2022-05-16 20:00:00')]
***********************Check if CR lines are one liners:************************
School of Cellular and Molecular Medicine, University of Bristol, Bristol BS8 1TD, U.K. True
*******************************verify one liners********************************
institution = 'University of Bristol' AND school = 'School of Cellular and Molecular Medicine' AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(6289, 'School of Cellular and Molecular Medicine, University of Bristol, Bristol BS8 1TD, U.K.', 3678, 2552, '2021-12-04 19:08:05.887652', '2022-05-16 20:00:00')
Assigned ID: 225 Recovered ID: 225
True
Article Author:  3679
******************

Assigned ID: 142 Recovered ID: 142
True
Article Author:  3690
****************************CR Affilitations found:***************************** 
 [(6303, 'BM28/XMaS UK CRG ESRF  38043 Grenoble France', 3690, 2915, '2021-12-04 19:09:53.086985', '2021-12-04 19:09:53.086985'), (6304, 'Oliver Lodge Laboratory Department of Physics University of Liverpool  Liverpool L69 7ZE UK', 3690, 2932, '2021-12-04 19:09:53.191435', '2021-12-04 19:09:53.191435')]
***********************Check if CR lines are one liners:************************
BM28/XMaS UK CRG ESRF  38043 Grenoble France True
Oliver Lodge Laboratory Department of Physics University of Liverpool  Liverpool L69 7ZE UK True
*******************************verify one liners********************************
institution = 'European Synchrotron Radiation Facility' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'France'
(6303, 'BM28/XMaS UK CRG ESRF  38043 Grenoble France', 3690, 2915, '2021-12-04

institution = 'Diamond Light Source Ltd.' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
************************** Update Author Affiliation ***************************
Update ID: 2455 with values: {'institution': 'Diamond Light Source Ltd.', 'school': '', 'department': '', 'faculty': '', 'work_group': '', 'country': 'United Kingdom', 'address': 'Harwell Science and Innovation Campus Chilton, Didcot OX11 0DE'}
Updating 2455 {'institution': 'Diamond Light Source Ltd.', 'school': '', 'department': '', 'faculty': '', 'work_group': '', 'country': 'United Kingdom', 'address': 'Harwell Science and Innovation Campus Chilton, Didcot OX11 0DE'}
{'article_author_id': 0, 'name': 'Diamond Light Source Ltd.', 'short_name': 'Diamond Light Source Ltd.', 'add_01': 'Harwell Science and Innovation Campus Chilton, Didcot OX11 0DE', 'add_02': None, 'add_03': None, 'add_04': None, 'country': 'United Kingdom', 'affiliation_id': 446, 'crea

institution = 'Queen''s University Belfast' AND school = 'School of Chemistry and Chemical Engineering' AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(6338, 'School of Chemistry and Chemical Engineering, Queen’s University of Belfast, University Road, Belfast BT9 5AG, Northern Ireland, United Kingdom', 3717, 2749, '2022-04-04 09:52:33.019154', '2022-05-17 20:00:00')
Assigned ID: 399 Recovered ID: 399
True
Article Author:  3718
****************************CR Affilitations found:***************************** 
 [(6339, 'School of Chemistry and Chemical Engineering, Queen’s University of Belfast, University Road, Belfast BT9 5AG, Northern Ireland, United Kingdom', 3718, 2750, '2022-04-04 09:52:33.310381', '2022-05-17 20:00:00')]
***********************Check if CR lines are one liners:************************
School of Chemistry and Chemical Engineering, Queen’s University of Belfast, University Road, Belfast BT9 5AG, Northern Ireland, Unit

institution = 'University of Salford' AND school = 'School of Science, Engineering and Environment' AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
************************** Update Author Affiliation ***************************
Update ID: 2948 with values: {'institution': 'University of Salford', 'school': 'School of Science, Engineering and Environment', 'department': '', 'faculty': '', 'work_group': '', 'country': 'United Kingdom', 'address': 'Cockcroft Building, Greater Manchester M5 4WT'}
Updating 2948 {'institution': 'University of Salford', 'school': 'School of Science, Engineering and Environment', 'department': '', 'faculty': '', 'work_group': '', 'country': 'United Kingdom', 'address': 'Cockcroft Building, Greater Manchester M5 4WT'}
{'article_author_id': 0, 'name': 'School of Science, Engineering and Environment, University of Salford', 'short_name': 'University of Salford', 'add_01': 'Cockcroft Building, Greater Manchester M5

****************************CR Affilitations found:***************************** 
 [(6360, 'Department of Inorganic Chemistry, Fritz-Haber-Institut der Max-Planck Gesellschaft, Faradayweg 4-6, 14195 Berlin, Germany', 3736, 2841, '2022-04-04 09:55:29.835104', '2022-04-04 09:55:29.835104'), (6361, 'Max-Planck-Institut für Chemische Energiekonversion, Stiftstrasse 34-36, 45470 Mülheim an der Ruhr, Germany', 3736, 2956, '2022-04-04 09:55:29.921139', '2022-04-04 09:55:29.921139')]
***********************Check if CR lines are one liners:************************
Department of Inorganic Chemistry, Fritz-Haber-Institut der Max-Planck Gesellschaft, Faradayweg 4-6, 14195 Berlin, Germany True
Max-Planck-Institut für Chemische Energiekonversion, Stiftstrasse 34-36, 45470 Mülheim an der Ruhr, Germany True
*******************************verify one liners********************************
institution = 'Fritz-Haber-Institut der Max-Planck Gesellschaft' AND school IS NULL AND department = 'Department of 

****************************CR Affilitations found:***************************** 
 [(6376, 'UK Catalysis Hub, Research Complex at Harwell, STFC Rutherford Appleton Laboratory, Didcot, Oxfordshire OX11 0FA, United Kingdom', 3744, 2996, '2022-04-04 09:58:33.154033', '2022-04-04 09:58:33.154033'), (6377, 'School of Chemistry and Chemical Engineering, Queen’s University Belfast, David-Keir Building, Stranmillis Road, Belfast BT9 5AG, United Kingdom', 3744, 2847, '2022-04-04 09:58:33.223332', '2022-04-04 09:58:33.223332')]
***********************Check if CR lines are one liners:************************
UK Catalysis Hub, Research Complex at Harwell, STFC Rutherford Appleton Laboratory, Didcot, Oxfordshire OX11 0FA, United Kingdom True
School of Chemistry and Chemical Engineering, Queen’s University Belfast, David-Keir Building, Stranmillis Road, Belfast BT9 5AG, United Kingdom True
*******************************verify one liners********************************
institution = 'UK Catalysis Hu

Assigned ID: 295 Recovered ID: 295
True
Article Author:  3754
****************************CR Affilitations found:***************************** 
 [(6388, 'Energy Policy Research Group, Judge Business School, University of Cambridge, Trumpington Street, Cambridge, CB2 1AG, UK', 3754, 2598, '2022-04-04 10:00:59.054392', '2022-05-16 20:00:00')]
***********************Check if CR lines are one liners:************************
Energy Policy Research Group, Judge Business School, University of Cambridge, Trumpington Street, Cambridge, CB2 1AG, UK True
*******************************verify one liners********************************
institution = 'University of Cambridge' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(6388, 'Energy Policy Research Group, Judge Business School, University of Cambridge, Trumpington Street, Cambridge, CB2 1AG, UK', 3754, 2598, '2022-04-04 10:00:59.054392', '2022-05-16 20:00:00')
Assigned ID: 184 

institution = 'ISIS Neutron and Muon Source' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(6402, 'ISIS Pulsed Neutron and Muon Facility, Science and Technology Facilities Council, Rutherford Appleton Laboratory, Didcot, OX11 0QX, UK', 3768, 2975, '2022-04-04 10:05:11.994338', '2022-04-04 10:05:11.994338')
Assigned ID: 71 Recovered ID: 71
True
Article Author:  3769
****************************CR Affilitations found:***************************** 
 [(6403, 'School of Chemistry, Cardiff University, Cardiff CF10 3AT, UK', 3769, 2976, '2022-04-04 10:05:12.190208', '2022-04-04 10:05:12.190208'), (6404, 'School of Chemistry, University of Leeds, Leeds LT2 9JT, UK', 3769, 2858, '2022-04-04 10:05:12.269860', '2022-04-04 10:05:12.269860')]
***********************Check if CR lines are one liners:************************
School of Chemistry, Cardiff University, Cardiff CF10 3AT, UK True
School of Chemistry, University of Leeds, 

****************************CR Affilitations found:***************************** 
 [(6417, 'UK Catalysis Hub, Research Complex at Harwell, Rutherford Appleton Laboratory, Oxford, OX11 0FA, UK', 3782, 2984, '2022-04-04 10:10:41.221364', '2022-04-04 10:10:41.221364'), (6418, 'Cardiff Catalysis Institute, School of Chemistry, Cardiff University, Cardiff CF10 3AT, UK', 3782, 2866, '2022-04-04 10:10:41.279353', '2022-04-04 10:10:41.279353')]
***********************Check if CR lines are one liners:************************
UK Catalysis Hub, Research Complex at Harwell, Rutherford Appleton Laboratory, Oxford, OX11 0FA, UK True
Cardiff Catalysis Institute, School of Chemistry, Cardiff University, Cardiff CF10 3AT, UK True
*******************************verify one liners********************************
institution = 'UK Catalysis Hub' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(6417, 'UK Catalysis Hub, Research Complex at H

****************************CR Affilitations found:***************************** 
 [(6427, 'Department of Chemistry University of Oxford Chemistry Research Laboratory  12 Mansfield Rd Oxford OX1 3TA UK', 3801, 2612, '2022-04-04 10:16:43.412351', '2022-05-16 20:00:00')]
***********************Check if CR lines are one liners:************************
Department of Chemistry University of Oxford Chemistry Research Laboratory  12 Mansfield Rd Oxford OX1 3TA UK True
*******************************verify one liners********************************
institution = 'University of Oxford' AND school IS NULL AND department = 'Department of Chemistry' AND faculty IS NULL AND work_group = 'Chemistry Research Laboratory' AND country = 'United Kingdom'
(6427, 'Department of Chemistry University of Oxford Chemistry Research Laboratory  12 Mansfield Rd Oxford OX1 3TA UK', 3801, 2612, '2022-04-04 10:16:43.412351', '2022-05-16 20:00:00')
Assigned ID: 217 Recovered ID: 217
True
Article Author:  3802
*******

Assigned ID: 114 Recovered ID: 114
True
institution = 'UK Catalysis Hub' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(6441, 'UK Catalysis Hub Research Complex at Harwell Rutherford Appleton Laboratories  Harwell Science and Innovation Campus Harwell Didcot Oxon OX11 0FA UK', 3813, 2990, '2022-04-04 10:19:41.699635', '2022-04-04 10:19:41.699635')
Assigned ID: 67 Recovered ID: 67
True
Article Author:  3814
****************************CR Affilitations found:***************************** 
 [(6442, 'Department of Chemistry University College London  20 Gordon Street London WC1H 0AJ UK', 3814, 2870, '2022-04-04 10:19:42.071420', '2022-04-04 10:19:42.071420'), (6443, 'UK Catalysis Hub Research Complex at Harwell Rutherford Appleton Laboratories  Harwell Science and Innovation Campus Harwell Didcot Oxon OX11 0FA UK', 3814, 2991, '2022-04-04 10:19:42.143341', '2022-04-04 10:19:42.143341')]
***********************Check if CR

****************************CR Affilitations found:***************************** 
 [(6454, 'Department of Chemistry, Chemical Sciences Division, Lawrence Berkeley National Laboratory, University of California, Berkeley, Berkeley, California 94720, United States', 3822, 2873, '2022-04-04 10:21:49.857349', '2022-04-04 10:21:49.857349'), (6455, 'EaStCHEM School of Chemistry, The University of Edinburgh, Joseph Black Building, David Brewster Road, Edinburgh EH9 3FJ, United Kingdom', 3822, 2994, '2022-04-04 10:21:49.921692', '2022-04-04 10:21:49.921692')]
***********************Check if CR lines are one liners:************************
Department of Chemistry, Chemical Sciences Division, Lawrence Berkeley National Laboratory, University of California, Berkeley, Berkeley, California 94720, United States True
EaStCHEM School of Chemistry, The University of Edinburgh, Joseph Black Building, David Brewster Road, Edinburgh EH9 3FJ, United Kingdom True
*******************************verify one lin

(6467, 'Department of Chemical Engineering and Analytical Science, University of Manchester, Oxford Road, Greater Manchester, M1 3BB, UK', 3832, 2758, '2022-04-04 10:23:22.995919', '2022-05-17 20:00:00')
Assigned ID: 235 Recovered ID: 235
True
Article Author:  3840
****************************CR Affilitations found:***************************** 
 [(6468, 'Chemistry Research Laboratory, Department of Chemistry, University of Oxford, 12 Mansfield Road, Oxford OX1 3TA, U.K.', 3840, 2627, '2022-04-04 10:25:32.650611', '2022-05-16 20:00:00')]
***********************Check if CR lines are one liners:************************
Chemistry Research Laboratory, Department of Chemistry, University of Oxford, 12 Mansfield Road, Oxford OX1 3TA, U.K. True
*******************************verify one liners********************************
institution = 'University of Oxford' AND school IS NULL AND department = 'Department of Chemistry' AND faculty IS NULL AND work_group = 'Chemistry Research Laboratory' AND

****************************CR Affilitations found:***************************** 
 [(6483, 'UK Catalysis Hub, Research Complex at Harwell, Rutherford Appleton Laboratory, R92 Harwell, Oxfordshire OX11 0FA, UK', 3853, 2475, '2022-04-04 10:27:32.879215', '2022-05-09 15:05:49.921313'), (6484, 'School of Chemistry, Cardiff University, Main Building, Park Place, Cardiff, CF10 3AT, UK', 3853, 2476, '2022-04-04 10:27:32.970874', '2022-05-09 15:05:50.066247'), (6485, 'Department of Chemistry, University College London, London WC1E 6BT, UK', 3853, 2477, '2022-04-04 10:27:33.063060', '2022-05-09 15:05:50.147692')]
***********************Check if CR lines are one liners:************************
UK Catalysis Hub, Research Complex at Harwell, Rutherford Appleton Laboratory, R92 Harwell, Oxfordshire OX11 0FA, UK True
School of Chemistry, Cardiff University, Main Building, Park Place, Cardiff, CF10 3AT, UK True
Department of Chemistry, University College London, London WC1E 6BT, UK True
*************

institution = 'University College London' AND school IS NULL AND department = 'Department of Chemistry' AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
Assigned ID: 114 in recoverd IDs list: [114, 67]
institution = 'UK Catalysis Hub' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country IS NULL
Assigned ID: 67 in recoverd IDs list: [114, 67]
Article Author:  3866
****************************CR Affilitations found:***************************** 
 [(6507, 'Institute for Integrated Catalysis', 3866, 3172, '2022-05-27 13:54:53.444316', '2022-05-27 13:54:53.444316'), (6508, 'Pacific Northwest National Laboratory', 3866, 3172, '2022-05-27 13:54:53.509761', '2022-05-27 13:54:53.509761'), (6509, 'Richland', 3866, 3172, '2022-05-27 13:54:53.611832', '2022-05-27 13:54:53.611832'), (6510, 'USA', 3866, 3172, '2022-05-27 13:54:53.678022', '2022-05-27 13:54:53.678022')]
***********************Check if CR lines are one liners:*******

Assigned ID: 365 in recoverd IDs list: [365]
Article Author:  3877
****************************CR Affilitations found:***************************** 
 [(6552, 'Department of Chemical Engineering', 3877, 3183, '2022-05-27 13:55:56.851248', '2022-05-27 13:55:56.851248'), (6553, 'University College London', 3877, 3183, '2022-05-27 13:55:56.925270', '2022-05-27 13:55:56.925270'), (6554, 'London WC1E 7JE', 3877, 3183, '2022-05-27 13:55:57.012029', '2022-05-27 13:55:57.012029'), (6555, 'UK', 3877, 3183, '2022-05-27 13:55:57.110393', '2022-05-27 13:55:57.110393')]
***********************Check if CR lines are one liners:************************
Department of Chemical Engineering False
University College London False
London WC1E 7JE False
UK False
verify multiline affi
institution = 'University College London' AND school IS NULL AND department = 'Department of Chemical Engineering' AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
Assigned ID: 111 in recoverd IDs list: [1

****************************CR Affilitations found:***************************** 
 [(6587, 'Dipartimento di Chimica and NIS Centre of Excellence', 3885, 3191, '2022-05-27 13:57:42.656876', '2022-05-27 13:57:42.656876'), (6588, 'Università di Torino', 3885, 3191, '2022-05-27 13:57:42.749785', '2022-05-27 13:57:42.749785'), (6589, '10125 Torino', 3885, 3191, '2022-05-27 13:57:42.800035', '2022-05-27 13:57:42.800035'), (6590, 'Italy', 3885, 3191, '2022-05-27 13:57:42.866235', '2022-05-27 13:57:42.866235')]
***********************Check if CR lines are one liners:************************
Dipartimento di Chimica and NIS Centre of Excellence True
Università di Torino False
10125 Torino False
Italy False
verify multiline affi
institution = 'Università di Torino' AND school IS NULL AND department = 'Dipartimento di Chimica' AND faculty IS NULL AND work_group = 'NIS Centre of Excellence' AND country = 'Italy'
Assigned ID: 367 in recoverd IDs list: [367]
Article Author:  3886
********************

(6614, 'Institut\rde Recherches sur la Catalyse et l’Environnement de Lyon, CNRS, Université Lyon 1, 2 Ave. A. Einstein, F-69626 Villeurbanne, France', 3893, 3215, '2022-05-27 14:06:23.015928', '2022-05-27 14:06:23.015928')
Assigned ID: 368 Recovered ID: 368
True
Article Author:  3894
****************************CR Affilitations found:***************************** 
 [(6615, 'State Key Laboratory of Materials-Oriented Chemical Engineering', 3894, 3216, '2022-05-27 14:09:14.715118', '2022-05-27 14:09:14.715118'), (6616, 'College of Chemical Engineering', 3894, 3216, '2022-05-27 14:09:14.770969', '2022-05-27 14:09:14.770969'), (6617, 'Nanjing Tech University', 3894, 3216, '2022-05-27 14:09:14.853894', '2022-05-27 14:09:14.853894'), (6618, 'Nanjing 210009', 3894, 3216, '2022-05-27 14:09:14.925862', '2022-05-27 14:09:14.925862'), (6619, 'China', 3894, 3216, '2022-05-27 14:09:14.962083', '2022-05-27 14:09:14.962083')]
***********************Check if CR lines are one liners:******************

****************************CR Affilitations found:***************************** 
 [(6648, 'School of Chemistry, Cardiff University, Main Building, Park Place, Cardiff CF10 3AT, U.K.', 3902, 3256, '2022-05-27 14:12:44.402028', '2022-05-27 14:12:44.402028'), (6649, 'UK Catalysis Hub, Research Complex at Harwell, STFC Rutherford Appleton Laboratory, Didcot, Oxfordshire OX11 0FA, U.K.', 3902, 3239, '2022-05-27 14:12:44.448445', '2022-05-27 14:12:44.448445')]
***********************Check if CR lines are one liners:************************
School of Chemistry, Cardiff University, Main Building, Park Place, Cardiff CF10 3AT, U.K. True
UK Catalysis Hub, Research Complex at Harwell, STFC Rutherford Appleton Laboratory, Didcot, Oxfordshire OX11 0FA, U.K. True
*******************************verify one liners********************************
institution = 'Cardiff University' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United 

****************************CR Affilitations found:***************************** 
 [(6664, 'UK Catalysis Hub; Oxford OX11 0FA UK', 3912, 3243, '2022-05-27 14:14:02.215216', '2022-05-27 14:14:02.215216'), (6665, 'University College London; London WC1H 0AJ UK', 3912, 3260, '2022-05-27 14:14:02.294466', '2022-05-27 14:14:02.294466')]
***********************Check if CR lines are one liners:************************
UK Catalysis Hub; Oxford OX11 0FA UK True
University College London; London WC1H 0AJ UK True
*******************************verify one liners********************************
institution = 'UK Catalysis Hub' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(6664, 'UK Catalysis Hub; Oxford OX11 0FA UK', 3912, 3243, '2022-05-27 14:14:02.215216', '2022-05-27 14:14:02.215216')
Assigned ID: 67 Recovered ID: 67
True
institution = 'University College London' AND school IS NULL AND department IS NULL AND faculty IS NULL AN

institution = 'University College London' AND school IS NULL AND department = 'Department of Chemical Engineering' AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
Assigned ID: 111 in recoverd IDs list: [111]
Article Author:  3924
****************************CR Affilitations found:***************************** 
 [(6686, 'Diamond Light Source Ltd.', 3924, 3227, '2022-05-27 14:16:43.992500', '2022-05-27 14:16:43.992500'), (6687, 'Didcot OX11 0DE', 3924, 3227, '2022-05-27 14:16:44.092970', '2022-05-27 14:16:44.092970'), (6688, 'UK', 3924, 3227, '2022-05-27 14:16:44.150007', '2022-05-27 14:16:44.150007')]
***********************Check if CR lines are one liners:************************
Diamond Light Source Ltd. False
Didcot OX11 0DE False
UK False
verify multiline affi
institution = 'Diamond Light Source Ltd.' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
Assigned ID: 446 not in recoverd IDs list:

institution = 'University College London' AND school IS NULL AND department = 'Department of Chemistry' AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
Assigned ID: 114 in recoverd IDs list: [114]
Article Author:  3931
****************************CR Affilitations found:***************************** 
 [(6718, 'Department of Chemical Engineering', 3931, 3234, '2022-05-27 14:16:46.626957', '2022-05-27 14:16:46.626957'), (6719, 'University College London', 3931, 3234, '2022-05-27 14:16:46.702690', '2022-05-27 14:16:46.702690'), (6720, 'London WC1E 7JE', 3931, 3234, '2022-05-27 14:16:46.766696', '2022-05-27 14:16:46.766696'), (6721, 'UK', 3931, 3234, '2022-05-27 14:16:46.820034', '2022-05-27 14:16:46.820034')]
***********************Check if CR lines are one liners:************************
Department of Chemical Engineering False
University College London False
London WC1E 7JE False
UK False
verify multiline affi
institution = 'University College London' AND school

institution = 'Cardiff University' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group = 'Cardiff Catalysis Institute' AND country = 'United Kingdom'
(6733, 'Cardiff Catalysis Institute, School of Chemistry, Cardiff University, Cardiff CF10 3AT, United Kingdom', 3945, 3210, '2022-05-27 14:22:12.627349', '2022-05-27 14:22:12.627349')
Assigned ID: 8 Recovered ID: 8
True
Article Author:  3946
****************************CR Affilitations found:***************************** 
 [(6734, 'Cardiff Catalysis Institute, School of Chemistry, Cardiff University, Cardiff CF10 3AT, United Kingdom', 3946, 3246, '2022-05-27 14:22:12.738221', '2022-05-27 14:22:12.738221'), (6735, 'now at: Clean Energy Cluster, International Iberian Nanotechnology Laboratory (INL), Av. Mestre José Veiga, 4715-330 Braga, Portugal', 3946, 3263, '2022-05-27 14:22:12.788818', '2022-05-27 14:22:12.788818')]
***********************Check if CR lines are one liners:************************

{'article_author_id': 0, 'name': 'Cardiff Catalysis Institute, Cardiff University', 'short_name': 'Cardiff University', 'add_01': 'School of Chemistry', 'add_02': 'Cardiff CF10 3AT', 'add_03': None, 'add_04': None, 'add_05': None, 'country': 'United Kingdom', 'affiliation_id': 8, 'created_at': '2024-08-06 16:37:12', 'updated_at': '2024-08-06 16:37:12'}
updating aut_affi_id: 3248 column: name value: Cardiff Catalysis Institute, Cardiff University
updating aut_affi_id: 3248 column: short_name value: Cardiff University
updating aut_affi_id: 3248 column: add_01 value: School of Chemistry
updating aut_affi_id: 3248 column: add_02 value: Cardiff CF10 3AT
updating aut_affi_id: 3248 column: add_03 value: None
updating aut_affi_id: 3248 column: add_04 value: None
updating aut_affi_id: 3248 column: add_05 value: None
updating aut_affi_id: 3248 column: country value: United Kingdom
updating aut_affi_id: 3248 column: affiliation_id value: 8
updating aut_affi_id: 3248 column: updated_at value: 2024

****************************CR Affilitations found:***************************** 
 [(6753, 'Department of Chemical Engineering The University of Manchester  Oxford Road Manchester M13 9PL UK', 3957, 3427, '2022-07-06 12:35:08.396231', '2022-07-06 12:35:08.396231'), (6754, 'Department of Chemical Pharmaceutical and Agricultural Sciences University of Ferrara  Via L. Borsari, 46 44121 Ferrara Italy', 3957, 3428, '2022-07-06 12:35:08.407394', '2022-07-06 12:35:08.407394')]
***********************Check if CR lines are one liners:************************
Department of Chemical Engineering The University of Manchester  Oxford Road Manchester M13 9PL UK True
Department of Chemical Pharmaceutical and Agricultural Sciences University of Ferrara  Via L. Borsari, 46 44121 Ferrara Italy True
*******************************verify one liners********************************
institution = 'The University of Manchester' AND school IS NULL AND department = 'Department of Chemical Engineering' AND facult

institution = 'Tianjin University' AND school = 'School of Chemical Engineering and Technology' AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'Peoples Republic of China'
(6765, 'School of Chemical Engineering and Technology, Tianjin University, Tianjin 30072, China', 3967, 3436, '2022-07-06 12:37:13.965818', '2022-07-06 12:37:13.965818')
Assigned ID: 377 Recovered ID: 377
True
Article Author:  3968
****************************CR Affilitations found:***************************** 
 [(6766, 'School of Environmental Science and Engineering, Tianjin University, Tianjin 300072, China', 3968, 3437, '2022-07-06 12:37:14.005837', '2022-07-06 12:37:14.005837')]
***********************Check if CR lines are one liners:************************
School of Environmental Science and Engineering, Tianjin University, Tianjin 300072, China True
*******************************verify one liners********************************
institution = 'Tianjin University' AND school = 

****************************CR Affilitations found:***************************** 
 [(6777, 'Department of Chemistry, University of Manchester, Manchester M13 9PL, U.K.', 3979, 3360, '2022-07-06 12:39:13.570672', '2022-07-06 12:39:13.570672'), (6778, 'Photon Science Institute, University of Manchester, Manchester M13 9PL, U.K.', 3979, 3359, '2022-07-06 12:39:13.581707', '2022-07-06 12:39:13.581707')]
***********************Check if CR lines are one liners:************************
Department of Chemistry, University of Manchester, Manchester M13 9PL, U.K. True
Photon Science Institute, University of Manchester, Manchester M13 9PL, U.K. True
*******************************verify one liners********************************
institution = 'The University of Manchester' AND school IS NULL AND department = 'Department of Chemistry' AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(6777, 'Department of Chemistry, University of Manchester, Manchester M13 9PL, U.K.', 3979,

****************************CR Affilitations found:***************************** 
 [(6792, 'ISIS Facility, Science and Technology Facilities Council, Rutherford Appleton Laboratory, Chilton OX11 0QX, U.K.', 3989, 3283, '2022-07-06 12:39:14.100429', '2022-07-06 12:39:14.100429')]
***********************Check if CR lines are one liners:************************
ISIS Facility, Science and Technology Facilities Council, Rutherford Appleton Laboratory, Chilton OX11 0QX, U.K. True
*******************************verify one liners********************************
institution = 'ISIS Neutron and Muon Source' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(6792, 'ISIS Facility, Science and Technology Facilities Council, Rutherford Appleton Laboratory, Chilton OX11 0QX, U.K.', 3989, 3283, '2022-07-06 12:39:14.100429', '2022-07-06 12:39:14.100429')
Assigned ID: 71 Recovered ID: 71
True
Article Author:  3990
************************

Assigned ID: 291 Recovered ID: 291
True
institution = 'Max-Planck-Institut für Chemische Energiekonversion' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'Germany'
(6800, 'Max-Planck-Institut für Chemische Energiekonversion, Stiftstrasse 34-36, 45470 Mülheim an der Ruhr, Germany', 3995, 3380, '2022-07-06 13:08:23.015129', '2022-07-06 13:08:23.015129')
Assigned ID: 194 Recovered ID: 194
True
Article Author:  3996
****************************CR Affilitations found:***************************** 
 [(6801, 'Diamond Light Source Ltd, Harwell Science & Innovation Campus, Didcot, Oxfordshire OX11 0DE, UK', 3996, 3381, '2022-07-06 13:08:23.058722', '2022-07-06 13:08:23.058722')]
***********************Check if CR lines are one liners:************************
Diamond Light Source Ltd, Harwell Science & Innovation Campus, Didcot, Oxfordshire OX11 0DE, UK True
*******************************verify one liners********************************
insti

(6809, 'Centre for Sustainable and Circular Technologies, University of Bath, Claverton Down, BA2 7AY, UK', 4001, 3388, '2022-07-06 13:11:36.048465', '2022-07-06 13:11:36.048465')
Assigned ID: 127 Recovered ID: 127
True
institution = 'University of Bath' AND school IS NULL AND department = 'Department of Chemistry' AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(6810, 'Department of Chemistry, University of Bath, Claverton Down, BA2 7AY, UK', 4001, 3389, '2022-07-06 13:11:36.066571', '2022-07-06 13:11:36.066571')
Assigned ID: 124 Recovered ID: 124
True
Article Author:  4002
****************************CR Affilitations found:***************************** 
 [(6811, 'Centre for Sustainable and Circular Technologies, University of Bath, Claverton Down, BA2 7AY, UK', 4002, 3296, '2022-07-06 13:11:36.101011', '2022-09-26 17:18:26.714361'), (6812, 'Department of Chemistry, University of Bath, Claverton Down, BA2 7AY, UK', 4002, 3297, '2022-07-06 13:11:36.114696', '2

Assigned ID: 382 Recovered ID: 382
True
institution = 'Academy of Scientific and Innovative Research (AcSIR)' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'India'
(6825, 'Academy of Scientific and Innovative Research (AcSIR), Ghaziabad-201002, India', 4028, 3446, '2022-07-15 12:43:40.076291', '2022-07-15 12:43:40.076291')
Assigned ID: 383 Recovered ID: 383
True
Article Author:  4029
****************************CR Affilitations found:***************************** 
 [(6826, 'Catalysis & Fine Chemicals Division, CSIR-Indian Institute of Chemical Technology, Uppal Road, Hyderabad 500007, India', 4029, 3447, '2022-07-15 12:43:40.086997', '2022-07-15 12:43:40.086997'), (6827, 'Academy of Scientific and Innovative Research (AcSIR), Ghaziabad-201002, India', 4029, 3448, '2022-07-15 12:43:40.089736', '2022-07-15 12:43:40.089736')]
***********************Check if CR lines are one liners:************************
Catalysis & Fine Chemicals Divi

****************************CR Affilitations found:***************************** 
 [(6839, 'Department of Chemical Engineering, School of Engineering, The University of Manchester, Oxford Road, Manchester M13 9PL, U.K.', 4039, 3458, '2022-07-15 12:46:02.944604', '2022-07-15 12:46:02.944604')]
***********************Check if CR lines are one liners:************************
Department of Chemical Engineering, School of Engineering, The University of Manchester, Oxford Road, Manchester M13 9PL, U.K. True
*******************************verify one liners********************************
institution = 'The University of Manchester' AND school = 'School of Engineering' AND department = 'Department of Chemical Engineering' AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(6839, 'Department of Chemical Engineering, School of Engineering, The University of Manchester, Oxford Road, Manchester M13 9PL, U.K.', 4039, 3458, '2022-07-15 12:46:02.944604', '2022-07-15 12:46:02.94

institution = 'University of Bath' AND school IS NULL AND department = 'Department of Chemistry' AND faculty IS NULL AND work_group = 'Centre for Sustainable and Circular Technologies' AND country = 'United Kingdom'
(6854, 'Department of Chemistry University of Bath Centre for Sustainable and Circular Technologies  Claverton Down, Bath BA2 7AY UK', 4050, 3402, '2022-07-15 12:48:36.286346', '2022-07-15 12:48:36.286346')
Assigned ID: 251 Recovered ID: 251
True
Article Author:  4051
****************************CR Affilitations found:***************************** 
 [(6855, 'Department of Chemistry University of Oxford Chemistry Research Laboratory  12 Mansfield Rd Oxford OX1 3TA UK', 4051, 3403, '2022-07-15 12:48:36.295086', '2022-07-15 12:48:36.295086')]
***********************Check if CR lines are one liners:************************
Department of Chemistry University of Oxford Chemistry Research Laboratory  12 Mansfield Rd Oxford OX1 3TA UK True
*******************************verify one 

Assigned ID: 88 Recovered ID: 88
True
Article Author:  4082
****************************CR Affilitations found:***************************** 
 [(6871, 'Department of Materials, University of Manchester, Oxford Road, Manchester, M13 9PL, UK', 4082, 3417, '2022-08-02 14:20:22.283455', '2022-08-02 14:20:22.283455')]
***********************Check if CR lines are one liners:************************
Department of Materials, University of Manchester, Oxford Road, Manchester, M13 9PL, UK True
*******************************verify one liners********************************
institution = 'The University of Manchester' AND school IS NULL AND department = 'Department of Materials' AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(6871, 'Department of Materials, University of Manchester, Oxford Road, Manchester, M13 9PL, UK', 4082, 3417, '2022-08-02 14:20:22.283455', '2022-08-02 14:20:22.283455')
Assigned ID: 285 Recovered ID: 285
True
Article Author:  4083
*****************

****************************CR Affilitations found:***************************** 
 [(6885, "School of Maths and Physics, Queen's University Belfast, David-Keir Building, Stranmillis Road, Belfast BT9 5AG, UK", 4102, 3426, '2022-08-02 14:22:36.336917', '2022-08-02 14:22:36.336917')]
***********************Check if CR lines are one liners:************************
School of Maths and Physics, Queen's University Belfast, David-Keir Building, Stranmillis Road, Belfast BT9 5AG, UK True
*******************************verify one liners********************************
institution = 'Queen''s University Belfast' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(6885, "School of Maths and Physics, Queen's University Belfast, David-Keir Building, Stranmillis Road, Belfast BT9 5AG, UK", 4102, 3426, '2022-08-02 14:22:36.336917', '2022-08-02 14:22:36.336917')
Assigned ID: 241 Recovered ID: 241
True
Article Author:  4103
**************

Assigned ID: 401 Recovered ID: 401
True
Article Author:  4121
****************************CR Affilitations found:***************************** 
 [(6899, 'Chemistry Research Laboratory, University of Oxford, 12 Mansfield Road, Oxford OX1 3TA, U.K.', 4121, 3751, '2022-10-16 06:22:49.468897', '2022-10-16 06:22:49.468897')]
***********************Check if CR lines are one liners:************************
Chemistry Research Laboratory, University of Oxford, 12 Mansfield Road, Oxford OX1 3TA, U.K. True
*******************************verify one liners********************************
institution = 'University of Oxford' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group = 'Chemistry Research Laboratory' AND country = 'United Kingdom'
(6899, 'Chemistry Research Laboratory, University of Oxford, 12 Mansfield Road, Oxford OX1 3TA, U.K.', 4121, 3751, '2022-10-16 06:22:49.468897', '2022-10-16 06:22:49.468897')
Assigned ID: 217 Recovered ID: 217
True
Article Author:  4130
**

institution = 'Research Complex at Harwell' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(6912, 'Research Complex at Harwell (RCaH), Harwell, Didcot OX11 0FA, Oxfordshire, U.K.', 4138, 3477, '2022-10-16 06:25:49.221835', '2022-10-16 06:25:49.221835')
Assigned ID: 373 Recovered ID: 373
True
Article Author:  4139
****************************CR Affilitations found:***************************** 
 [(6913, 'Diamond Light Source, Harwell, Didcot OX11 0DE, Oxfordshire, U.K.', 4139, 3478, '2022-10-16 06:25:49.252540', '2022-10-16 06:25:49.252540')]
***********************Check if CR lines are one liners:************************
Diamond Light Source, Harwell, Didcot OX11 0DE, Oxfordshire, U.K. True
*******************************verify one liners********************************
institution = 'Diamond Light Source Ltd.' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United K

institution IS NULL AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(6928, 'Nanotechnology and Integrated Bio‐Engineering Centre (NIBEC) Ulster University  Newtownabbey BT37 0QB UK', 4156, 3490, '2022-11-01 13:43:35.140975', '2022-11-01 13:43:35.140975')
Assigned ID: 5 Recovered ID: 5
True
Article Author:  4157
****************************CR Affilitations found:***************************** 
 [(6929, 'School of Engineering Newcastle University  Newcastle upon Tyne NE1 7RU UK', 4157, 4614, '2022-11-01 13:43:35.174385', '2022-11-01 13:43:35.174385')]
***********************Check if CR lines are one liners:************************
School of Engineering Newcastle University  Newcastle upon Tyne NE1 7RU UK True
*******************************verify one liners********************************
institution = 'Newcastle University' AND school = 'School of Engineering ' AND department IS NULL AND faculty IS NULL AND work_group IS

****************************CR Affilitations found:***************************** 
 [(6942, 'Department of Energy Science and Engineering, Daegu Gyeongbuk Institute of Science and Technology (DGIST), 333 Techno Jungang-daero, Hyeonpung-eup, Dalseong-gun, Daegu42988, Republic of Korea', 4170, 3768, '2022-11-01 13:52:56.794228', '2022-11-01 13:52:56.794228'), (6943, 'Catalysis & Fine Chemicals Division, CSIR-Indian Institute of Chemical Technology, Uppal Road, Hyderabad 500007, India', 4170, 3769, '2022-11-01 13:52:56.802909', '2022-11-01 13:52:56.802909'), (6944, 'Academy of Scientific and Innovative Research (AcSIR), Ghaziabad-201002, India', 4170, 3770, '2022-11-01 13:52:56.809207', '2022-11-01 13:52:56.809207')]
***********************Check if CR lines are one liners:************************
Department of Energy Science and Engineering, Daegu Gyeongbuk Institute of Science and Technology (DGIST), 333 Techno Jungang-daero, Hyeonpung-eup, Dalseong-gun, Daegu42988, Republic of Korea True

****************************CR Affilitations found:***************************** 
 [(6956, 'Max Planck-Cardiff Centre on the Fundamentals of Heterogeneous Catalysis FUNCAT, Cardiff Catalysis Institute, School of Chemistry, Cardiff University, Main Building, Park Place, Cardiff, CF10 3AT, UK', 4180, 3780, '2022-11-01 13:54:43.523877', '2022-11-01 13:54:43.523877')]
***********************Check if CR lines are one liners:************************
Max Planck-Cardiff Centre on the Fundamentals of Heterogeneous Catalysis FUNCAT, Cardiff Catalysis Institute, School of Chemistry, Cardiff University, Main Building, Park Place, Cardiff, CF10 3AT, UK True
*******************************verify one liners********************************
institution = 'Cardiff University' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group = 'Cardiff Catalysis Institute' AND country = 'United Kingdom'
(6956, 'Max Planck-Cardiff Centre on the Fundamentals of Heterogeneous Cata

Assigned ID: 8 Recovered ID: 8
True
Article Author:  4187
****************************CR Affilitations found:***************************** 
 [(6963, 'Max Planck-Cardiff Centre on the Fundamentals of Heterogeneous Catalysis FUNCAT, Cardiff Catalysis Institute, School of Chemistry, Cardiff University, Main Building, Park Place, Cardiff, CF10 3AT, UK', 4187, 3787, '2022-11-01 13:54:43.789675', '2022-11-01 13:54:43.789675')]
***********************Check if CR lines are one liners:************************
Max Planck-Cardiff Centre on the Fundamentals of Heterogeneous Catalysis FUNCAT, Cardiff Catalysis Institute, School of Chemistry, Cardiff University, Main Building, Park Place, Cardiff, CF10 3AT, UK True
*******************************verify one liners********************************
institution = 'Cardiff University' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group = 'Cardiff Catalysis Institute' AND country = 'United Kingdom'
(6963, 'Max Planc

institution = 'University of Oxford' AND school IS NULL AND department = 'Department of Chemistry' AND faculty IS NULL AND work_group = 'Inorganic Chemistry Laboratory' AND country = 'United Kingdom'
(6974, 'Inorganic Chemistry Laboratory, Department of Chemistry, University of Oxford, South Parks Road, Oxford, OX1 3QR, UK', 4198, 3796, '2022-11-01 13:58:15.348900', '2022-11-01 13:58:15.348900')
Assigned ID: 159 Recovered ID: 159
True
Article Author:  4199
****************************CR Affilitations found:***************************** 
 [(6975, 'Department of Chemistry, University of Helsinki, PO Box 55, 00014, Helsinki, Finland', 4199, 3797, '2022-11-01 13:58:15.374102', '2022-11-01 13:58:15.374102')]
***********************Check if CR lines are one liners:************************
Department of Chemistry, University of Helsinki, PO Box 55, 00014, Helsinki, Finland True
*******************************verify one liners********************************
institution IS NULL AND school IS N

Assigned ID: 112 Recovered ID: 112
True
Article Author:  4229
****************************CR Affilitations found:***************************** 
 [(6988, 'Scientific Computing Department, STFC Daresbury Laboratory, Keckwick Lane, Daresbury, WarringtonWA4 4AD, U.K.', 4229, 3808, '2023-01-13 10:48:54.344011', '2023-01-13 10:48:54.344011')]
***********************Check if CR lines are one liners:************************
Scientific Computing Department, STFC Daresbury Laboratory, Keckwick Lane, Daresbury, WarringtonWA4 4AD, U.K. True
*******************************verify one liners********************************
institution = 'Science and Technology Facilities Council' AND school IS NULL AND department = 'Scientific Computing Department' AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(6988, 'Scientific Computing Department, STFC Daresbury Laboratory, Keckwick Lane, Daresbury, WarringtonWA4 4AD, U.K.', 4229, 3808, '2023-01-13 10:48:54.344011', '2023-01-13 10:48:54

****************************CR Affilitations found:***************************** 
 [(7003, 'School of Environmental Science and Engineering, Guangdong Provincial Key Laboratory of Environmental Pollution Control and Remediation Technology, Sun Yat-sen University, Guangzhou 510275, China', 4239, 3505, '2023-01-13 10:50:12.217563', '2023-01-13 10:50:12.217563')]
***********************Check if CR lines are one liners:************************
School of Environmental Science and Engineering, Guangdong Provincial Key Laboratory of Environmental Pollution Control and Remediation Technology, Sun Yat-sen University, Guangzhou 510275, China True
*******************************verify one liners********************************
institution = 'Sun Yat-sen University' AND school = 'School of Environmental Science and Engineering' AND department IS NULL AND faculty IS NULL AND work_group = 'Guangdong Provincial Key Laboratory of Environmental Pollution Control and Remediation Technology' AND country 

****************************CR Affilitations found:***************************** 
 [(7010, 'School of Chemistry, Cardiff University, Main Building, Park Place, Cardiff, CF10 3AT, UK', 4245, 3511, '2023-01-13 10:50:12.372034', '2023-01-13 10:50:12.372034'), (7011, 'UK Catalysis Hub, Research Complex at Harwell, Rutherford Appleton Laboratory, Harwell, OX11 0FA, UK', 4245, 3512, '2023-01-13 10:50:12.381149', '2023-01-13 10:50:12.381149'), (7012, 'Department of Chemistry, University College London, 20 Gordon Street, London, WC1H 0AJ, UK', 4245, 3513, '2023-01-13 10:50:12.389367', '2023-01-13 10:50:12.389367')]
***********************Check if CR lines are one liners:************************
School of Chemistry, Cardiff University, Main Building, Park Place, Cardiff, CF10 3AT, UK True
UK Catalysis Hub, Research Complex at Harwell, Rutherford Appleton Laboratory, Harwell, OX11 0FA, UK True
Department of Chemistry, University College London, 20 Gordon Street, London, WC1H 0AJ, UK True
*******

Assigned ID: 316 Recovered ID: 316
True
Article Author:  4254
****************************CR Affilitations found:***************************** 
 [(7022, 'LPCNO, Université de Toulouse, 135 Avenue de Rangueil, 31077 Toulouse, France', 4254, 3814, '2023-01-13 11:37:52.799040', '2023-01-13 11:37:52.799040')]
***********************Check if CR lines are one liners:************************
LPCNO, Université de Toulouse, 135 Avenue de Rangueil, 31077 Toulouse, France True
*******************************verify one liners********************************
institution = 'Université de Toulouse' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'France'
(7022, 'LPCNO, Université de Toulouse, 135 Avenue de Rangueil, 31077 Toulouse, France', 4254, 3814, '2023-01-13 11:37:52.799040', '2023-01-13 11:37:52.799040')
Assigned ID: 310 Recovered ID: 310
True
Article Author:  4255
****************************CR Affilitations found:****************************

****************************CR Affilitations found:***************************** 
 [(7033, 'School of Chemistry, Cardiff University, Cardiff, CF10 3AT, UK', 4278, 3821, '2023-02-10 10:17:39.029576', '2023-02-10 10:17:39.029576'), (7034, 'UK Catalysis Hub, Research Complex at Harwell, STFC Rutherford Appleton Laboratory, Didcot, Oxfordshire, OX11 0FA, UK', 4278, 3822, '2023-02-10 10:17:39.039650', '2023-02-10 10:17:39.039650')]
***********************Check if CR lines are one liners:************************
School of Chemistry, Cardiff University, Cardiff, CF10 3AT, UK True
UK Catalysis Hub, Research Complex at Harwell, STFC Rutherford Appleton Laboratory, Didcot, Oxfordshire, OX11 0FA, UK True
*******************************verify one liners********************************
institution = 'Cardiff University' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(7033, 'School of Chemistry, Cardiff University, 

(7048, 'Institute of Science and Technology for Ceramics (ISTEC) of the National Research Council (CNR), Via Granarolo 64, I-48018 Faenza (RA), Italy', 4286, 3831, '2023-02-10 10:18:57.723022', '2023-02-10 10:18:57.723022')
Assigned ID: 34 Recovered ID: 34
True
Article Author:  4287
****************************CR Affilitations found:***************************** 
 [(7049, 'Diamond Light Source, Harwell Science and Innovation Campus, Chilton, Didcot, OX11 0DE, UK', 4287, 3832, '2023-02-10 10:18:57.747649', '2023-02-10 10:18:57.747649')]
***********************Check if CR lines are one liners:************************
Diamond Light Source, Harwell Science and Innovation Campus, Chilton, Didcot, OX11 0DE, UK True
*******************************verify one liners********************************
institution = 'Diamond Light Source Ltd.' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(7049, 'Diamond Light Source, Harwell Scie

Assigned ID: 193 Recovered ID: 193
True
Article Author:  4297
****************************CR Affilitations found:***************************** 
 [(7060, 'School of Electrical Engineering and Automation Wuhan University Wuhan China', 4297, 3528, '2023-02-24 13:02:59.140887', '2023-02-24 13:02:59.140887')]
***********************Check if CR lines are one liners:************************
School of Electrical Engineering and Automation Wuhan University Wuhan China True
*******************************verify one liners********************************
institution IS NULL AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'Peoples Republic of China'
(7060, 'School of Electrical Engineering and Automation Wuhan University Wuhan China', 4297, 3528, '2023-02-24 13:02:59.140887', '2023-02-24 13:02:59.140887')
Assigned ID: 6 Recovered ID: 6
True
Article Author:  4298
****************************CR Affilitations found:***************************** 
 [(7

institution = 'Kyoto University' AND school = 'Graduate School of Engineering' AND department = 'Department of Molecular Engineering' AND faculty IS NULL AND work_group IS NULL AND country = 'Japan'
(7068, 'Department of Molecular Engineering Graduate School of Engineering Kyoto University Kyotodaigaku Katsura Nishikyo-ku Kyoto 6158510 Japan', 4305, 4225, '2023-02-24 13:02:59.429711', '2023-02-24 13:02:59.429711')
Assigned ID: 414 Recovered ID: 414
True
Article Author:  4306
****************************CR Affilitations found:***************************** 
 [(7069, 'Department of Materials Science and Engineering National University of Singapore Singapore 117575 Singapore', 4306, 3843, '2023-02-24 13:02:59.454932', '2023-02-24 13:02:59.454932')]
***********************Check if CR lines are one liners:************************
Department of Materials Science and Engineering National University of Singapore Singapore 117575 Singapore True
*******************************verify one liners***

****************************CR Affilitations found:***************************** 
 [(7083, "Shenyang National Laboratory for Materials Science, Institute of Metal Research, Chinese Academy of Sciences, 72 Wenhua Road, 110016 Liaoning, People's Republic of China", 4332, 3534, '2023-02-24 13:03:23.819726', '2023-02-24 13:03:23.819726')]
***********************Check if CR lines are one liners:************************
Shenyang National Laboratory for Materials Science, Institute of Metal Research, Chinese Academy of Sciences, 72 Wenhua Road, 110016 Liaoning, People's Republic of China True
*******************************verify one liners********************************
institution = 'Institute of Metal Research' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group = 'Shenyang National Laboratory for Materials Science' AND country = 'Peoples Republic of China'
(7083, "Shenyang National Laboratory for Materials Science, Institute of Metal Research, Chinese Academy of 

updating aut_affi_id: 3858 column: short_name value: The University of Manchester
updating aut_affi_id: 3858 column: add_01 value: at Harwell, Diamond Light Source, Harwell Science and Innovation Campus, Didcot, Oxfordshire OX11 0DE
updating aut_affi_id: 3858 column: add_02 value: None
updating aut_affi_id: 3858 column: add_03 value: None
updating aut_affi_id: 3858 column: add_04 value: None
updating aut_affi_id: 3858 column: country value: United Kingdom
updating aut_affi_id: 3858 column: affiliation_id value: 88
updating aut_affi_id: 3858 column: updated_at value: 2024-08-06 16:37:23
processing [{'institution': 'Diamond Light Source Ltd.', 'school': '', 'department': '', 'faculty': '', 'work_group': '', 'country': 'United Kingdom', 'address': 'Harwell Science and Innovation Campus, Didcot, Oxfordshire OX11 0DE'}, [7094]]
institution = 'Diamond Light Source Ltd.' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
*******

****************************CR Affilitations found:***************************** 
 [(7105, 'Department of Chemical Engineering and Analytical Science (CEAS), The University of Manchester, M13 9PL, Manchester, UK', 4374, 3866, '2023-02-24 13:08:39.331244', '2023-02-24 13:08:39.331244'), (7106, 'Diamond Light Source, Harwell Science and Innovation Campus, OX11 0DE, Didcot, Oxfordshire, UK', 4374, 3867, '2023-02-24 13:08:39.338969', '2023-02-24 13:08:39.338969'), (7107, 'The University of Manchester at Harwell, Harwell Science and Innovation Campus, OX11 0DE, Didcot, Oxfordshire, UK', 4374, 3868, '2023-02-24 13:08:39.345744', '2023-02-24 13:08:39.345744'), (7108, 'Catalysis Hub, Research Complex at Harwell, Rutherford Appleton Laboratory, OX11 0FA, Harwell, Oxfordshire, UK', 4374, 3869, '2023-02-24 13:08:39.353315', '2023-02-24 13:08:39.353315')]
***********************Check if CR lines are one liners:************************
Department of Chemical Engineering and Analytical Science (CEAS

************************** Update Author Affiliation ***************************
Update ID: 3869 with values: {'institution': 'Research Complex at Harwell', 'school': '', 'department': '', 'faculty': '', 'work_group': '', 'country': 'United Kingdom', 'address': 'Catalysis Hub Rutherford Appleton Laboratory, OX11 0FA, Harwell, Oxfordshire'}
Updating 3869 {'institution': 'Research Complex at Harwell', 'school': '', 'department': '', 'faculty': '', 'work_group': '', 'country': 'United Kingdom', 'address': 'Catalysis Hub Rutherford Appleton Laboratory, OX11 0FA, Harwell, Oxfordshire'}
{'article_author_id': 0, 'name': 'School of Chemistry, Research Complex at Harwell', 'short_name': 'Research Complex at Harwell', 'add_01': 'Catalysis Hub Rutherford Appleton Laboratory, OX11 0FA, Harwell, Oxfordshire', 'add_02': None, 'add_03': None, 'add_04': None, 'country': 'United Kingdom', 'affiliation_id': 373, 'created_at': '2024-08-06 16:37:24', 'updated_at': '2024-08-06 16:37:24'}
updating aut_affi_

****************************CR Affilitations found:***************************** 
 [(7121, 'ISIS Facility, STFC Rutherford Appleton Laboratory, ChiltonOX11 0QX, Oxon, U.K.', 4390, 3880, '2023-02-24 13:08:53.591677', '2023-02-24 13:08:53.591677')]
***********************Check if CR lines are one liners:************************
ISIS Facility, STFC Rutherford Appleton Laboratory, ChiltonOX11 0QX, Oxon, U.K. True
*******************************verify one liners********************************
institution = 'ISIS Neutron and Muon Source' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(7121, 'ISIS Facility, STFC Rutherford Appleton Laboratory, ChiltonOX11 0QX, Oxon, U.K.', 4390, 3880, '2023-02-24 13:08:53.591677', '2023-02-24 13:08:53.591677')
Assigned ID: 71 Recovered ID: 71
True
Article Author:  4391
****************************CR Affilitations found:***************************** 
 [(7122, 'ISIS Facility, STFC Rutherford 

****************************CR Affilitations found:***************************** 
 [(7136, 'Department of Materials, University of Manchester, Oxford Road, Manchester M13 9PL, U.K.', 4403, 3555, '2023-02-24 13:09:00.688522', '2023-02-24 13:09:00.688522')]
***********************Check if CR lines are one liners:************************
Department of Materials, University of Manchester, Oxford Road, Manchester M13 9PL, U.K. True
*******************************verify one liners********************************
institution = 'The University of Manchester' AND school IS NULL AND department = 'Department of Materials' AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(7136, 'Department of Materials, University of Manchester, Oxford Road, Manchester M13 9PL, U.K.', 4403, 3555, '2023-02-24 13:09:00.688522', '2023-02-24 13:09:00.688522')
Assigned ID: 285 Recovered ID: 285
True
Article Author:  4422
****************************CR Affilitations found:***********************

institution = 'Beijing University of Chemical Technology' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'Peoples Republic of China'
(7148, 'Beijing Engineering Center for Hierarchical Catalysts, Beijing University of Chemical Technology, Beijing 100029, China', 4452, 3562, '2023-02-24 13:11:16.842942', '2023-02-24 13:11:16.842942')
Assigned ID: 7 Recovered ID: 7
True
Article Author:  4453
****************************CR Affilitations found:***************************** 
 [(7149, 'State Key Laboratory of Chemical Engineering, Beijing University of Chemical Technology, Beijing 100029, China', 4453, 3563, '2023-02-24 13:11:16.872674', '2023-02-24 13:11:16.872674'), (7150, 'Beijing Engineering Center for Hierarchical Catalysts, Beijing University of Chemical Technology, Beijing 100029, China', 4453, 3564, '2023-02-24 13:11:16.880187', '2023-02-24 13:11:16.880187')]
***********************Check if CR lines are one liners:******************

institution = 'UK Catalysis Hub' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(7167, 'UK Catalysis Hub, Research Complex at Harwell, Didcot OX11 0FA, U.K.', 4529, 3578, '2023-02-24 13:25:16.078960', '2023-02-24 13:25:16.078960')
Assigned ID: 67 Recovered ID: 67
True
institution = 'Cardiff University' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group = 'Cardiff Catalysis Institute' AND country = 'United Kingdom'
(7168, 'Cardiff Catalysis Institute, School of Chemistry, Cardiff University, Cardiff CF10 3AT, U.K.', 4529, 3579, '2023-02-24 13:25:16.085718', '2023-02-24 13:25:16.085718')
Assigned ID: 8 Recovered ID: 8
True
Article Author:  4530
****************************CR Affilitations found:***************************** 
 [(7169, 'Department of Materials, School of Natural Sciences, The University of Manchester, Oxford Road, Manchester M13 9PL, U.K.', 4530, 4571, '2023-02-24

Article author affiliations: 1 [3898]
Parsed article author affiliations: 1
processing [{'institution': 'Diamond Light Source Ltd.', 'school': '', 'department': '', 'faculty': '', 'work_group': '', 'country': 'United Kingdom', 'address': 'Harwell Science & Innovation Campus, Didcot, Oxfordshire OX11 0DE'}, [7194]]
institution = 'Diamond Light Source Ltd.' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
************************** Update Author Affiliation ***************************
Update ID: 3898 with values: {'institution': 'Diamond Light Source Ltd.', 'school': '', 'department': '', 'faculty': '', 'work_group': '', 'country': 'United Kingdom', 'address': 'Harwell Science & Innovation Campus, Didcot, Oxfordshire OX11 0DE'}
Updating 3898 {'institution': 'Diamond Light Source Ltd.', 'school': '', 'department': '', 'faculty': '', 'work_group': '', 'country': 'United Kingdom', 'address': 'Harwell Science & Innovation Cam

****************************CR Affilitations found:***************************** 
 [(7204, 'Department of ChiBioFarAm, ERIC aisbl and CASPE/INSTM, University of Messina, V. le F.Stagno D’ Alcontres 31, 98166 Messina, Italy', 4577, 3906, '2023-05-05 11:55:10.628673', '2023-05-05 11:55:10.628673')]
***********************Check if CR lines are one liners:************************
Department of ChiBioFarAm, ERIC aisbl and CASPE/INSTM, University of Messina, V. le F.Stagno D’ Alcontres 31, 98166 Messina, Italy True
*******************************verify one liners********************************
institution = 'University of Messina' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'Italy'
(7204, 'Department of ChiBioFarAm, ERIC aisbl and CASPE/INSTM, University of Messina, V. le F.Stagno D’ Alcontres 31, 98166 Messina, Italy', 4577, 3906, '2023-05-05 11:55:10.628673', '2023-05-05 11:55:10.628673')
Assigned ID: 292 Recovered ID: 292
True
Articl

****************************CR Affilitations found:***************************** 
 [(7526, 'School of Chemistry University of Glasgow  Glasgow G12 8QQ UK', 4871, 4100, '2023-11-09 10:28:33.958578', '2023-11-09 10:28:33.958578')]
***********************Check if CR lines are one liners:************************
School of Chemistry University of Glasgow  Glasgow G12 8QQ UK True
*******************************verify one liners********************************
institution = 'University of Glasgow' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(7526, 'School of Chemistry University of Glasgow  Glasgow G12 8QQ UK', 4871, 4100, '2023-11-09 10:28:33.958578', '2023-11-09 10:28:33.958578')
Assigned ID: 142 Recovered ID: 142
True
Article Author:  4872
****************************CR Affilitations found:***************************** 
 [(7527, 'School of Chemistry University of Southampton  Southampton SO17 1BJ UK', 4

Assigned ID: 114 Recovered ID: 114
True
institution = 'Research Complex at Harwell' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(7801, 'Research Complex at Harwell (RCaH), Harwell, Didcot, Oxfordshire OX11 0FA, U.K.', 5161, 4353, '2023-12-18 10:11:17.854842', '2023-12-18 10:11:17.854842')
Assigned ID: 373 Recovered ID: 373
True
Article Author:  5162
****************************CR Affilitations found:***************************** 
 [(7802, 'Department of Chemistry, University College London, 20 Gordon Street, London WC1H 0AJ, U.K.', 5162, 4354, '2023-12-18 10:11:17.892424', '2023-12-18 10:11:17.892424'), (7803, 'Research Complex at Harwell (RCaH), Harwell, Didcot, Oxfordshire OX11 0FA, U.K.', 5162, 4355, '2023-12-18 10:11:17.905132', '2023-12-18 10:11:17.905132')]
***********************Check if CR lines are one liners:************************
Department of Chemistry, University College London, 20 Gordon Street, Lon

****************************CR Affilitations found:***************************** 
 [(7822, 'School of Chemistry University of Glasgow  Glasgow G12 8QQ UK', 5177, 4374, '2023-12-18 10:14:22.373298', '2023-12-18 10:14:22.373298')]
***********************Check if CR lines are one liners:************************
School of Chemistry University of Glasgow  Glasgow G12 8QQ UK True
*******************************verify one liners********************************
institution = 'University of Glasgow' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(7822, 'School of Chemistry University of Glasgow  Glasgow G12 8QQ UK', 5177, 4374, '2023-12-18 10:14:22.373298', '2023-12-18 10:14:22.373298')
Assigned ID: 142 Recovered ID: 142
True
Article Author:  5178
****************************CR Affilitations found:***************************** 
 [(7823, 'School of Chemistry University of Glasgow  Glasgow G12 8QQ UK', 5178, 4375

(7836, 'Centre for Glycoscience Keele University  Keele Staffordshire ST5 5BG UK', 5185, 4388, '2023-12-18 10:15:57.730567', '2023-12-18 10:15:57.730567')
Assigned ID: 5 Recovered ID: 5
True
Article Author:  5186
****************************CR Affilitations found:***************************** 
 [(7837, 'Croda Europe Ltd. Croda Europe Ltd.  Cowick Hall, Snaith Goole DN14 9AA UK', 5186, 4389, '2023-12-18 10:15:57.784228', '2023-12-18 10:15:57.784228')]
***********************Check if CR lines are one liners:************************
Croda Europe Ltd. Croda Europe Ltd.  Cowick Hall, Snaith Goole DN14 9AA UK True
*******************************verify one liners********************************
institution IS NULL AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(7837, 'Croda Europe Ltd. Croda Europe Ltd.  Cowick Hall, Snaith Goole DN14 9AA UK', 5186, 4389, '2023-12-18 10:15:57.784228', '2023-12-18 10:15:57.784228')
Assigned I

****************************CR Affilitations found:***************************** 
 [(7849, 'Johnson Matthey Technology Centre, Sonning Common, Reading RG4 9NH, UK', 5200, 4401, '2023-12-18 10:18:05.886467', '2023-12-18 10:18:05.886467')]
***********************Check if CR lines are one liners:************************
Johnson Matthey Technology Centre, Sonning Common, Reading RG4 9NH, UK True
*******************************verify one liners********************************
institution = 'Johnson Matthey Technology Centre' AND school IS NULL AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(7849, 'Johnson Matthey Technology Centre, Sonning Common, Reading RG4 9NH, UK', 5200, 4401, '2023-12-18 10:18:05.886467', '2023-12-18 10:18:05.886467')
Assigned ID: 37 Recovered ID: 37
True
Article Author:  5201
****************************CR Affilitations found:***************************** 
 [(7850, 'Centre for High Resolution Transmission Electron Micr

institution = 'University of Southampton' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
************************** Update Author Affiliation ***************************
Update ID: 4406 with values: {'institution': 'University of Southampton', 'school': 'School of Chemistry', 'department': '', 'faculty': '', 'work_group': '', 'country': 'United Kingdom', 'address': 'University Road, Southampton SO17 1BJ'}
Updating 4406 {'institution': 'University of Southampton', 'school': 'School of Chemistry', 'department': '', 'faculty': '', 'work_group': '', 'country': 'United Kingdom', 'address': 'University Road, Southampton SO17 1BJ'}
{'article_author_id': 0, 'name': 'School of Chemistry, University of Southampton', 'short_name': 'University of Southampton', 'add_01': 'University Road, Southampton SO17 1BJ', 'add_02': None, 'add_03': None, 'add_04': None, 'country': 'United Kingdom', 'affiliation_id': 166, 'crea

****************************CR Affilitations found:***************************** 
 [(7867, 'School of Chemistry, University of Nottingham, NG7 2RD, Nottingham, UK', 5214, 4418, '2023-12-18 10:19:30.831688', '2023-12-18 10:19:30.831688')]
***********************Check if CR lines are one liners:************************
School of Chemistry, University of Nottingham, NG7 2RD, Nottingham, UK True
*******************************verify one liners********************************
institution = 'University of Nottingham' AND school = 'School of Chemistry' AND department IS NULL AND faculty IS NULL AND work_group IS NULL AND country = 'United Kingdom'
(7867, 'School of Chemistry, University of Nottingham, NG7 2RD, Nottingham, UK', 5214, 4418, '2023-12-18 10:19:30.831688', '2023-12-18 10:19:30.831688')
Assigned ID: 154 Recovered ID: 154
True
Article Author:  5215
****************************CR Affilitations found:***************************** 
 [(7868, 'Department of Interface Chemistry and Surfac

(7915, 'The Faraday Institution, Quad One, Harwell Science and Innovation Campus, Didcot, OX11 0RA, UK', 5281, 4463, '2024-02-21 22:44:41.430609', '2024-02-21 22:44:41.430609')
Assigned ID: 5 Recovered ID: 5
True
Article Author:  5282
****************************CR Affilitations found:***************************** 
 [(7916, 'Department of Chemistry, University of Oxford, Chemistry Research Laboratory, 12 Mansfield Road, Oxford, OX1 3TA, UK', 5282, 4464, '2024-02-21 22:44:41.454636', '2024-02-21 22:44:41.454636')]
***********************Check if CR lines are one liners:************************
Department of Chemistry, University of Oxford, Chemistry Research Laboratory, 12 Mansfield Road, Oxford, OX1 3TA, UK True
*******************************verify one liners********************************
institution = 'University of Oxford' AND school IS NULL AND department = 'Department of Chemistry' AND faculty IS NULL AND work_group = 'Chemistry Research Laboratory' AND country = 'United Kingdom

In [66]:
already_ok.sort()
print("OK:", len(already_ok))
print("Last 50 OK:", already_ok[-50:])
not_ok_list = list(set(list_art_aut_ids)-set(already_ok))
not_ok_list.sort()
print("Not OK:", len(not_ok_list))
print(not_ok_list)

OK: 9052
Last 50 OK: [13455, 13456, 13457, 13458, 13459, 13460, 13461, 13462, 13463, 13464, 13465, 13466, 13467, 13468, 13469, 13470, 13471, 13472, 13473, 13474, 13475, 13476, 13477, 13478, 13479, 13480, 13481, 13482, 13519, 13520, 13521, 13555, 13556, 13571, 13572, 13573, 13574, 13575, 13661, 13662, 13663, 13664, 13665, 13680, 13681, 13682, 13683, 13684, 13685, 13686]
Not OK: 62
[576, 577, 589, 591, 644, 655, 781, 783, 788, 790, 855, 919, 923, 924, 929, 931, 934, 948, 949, 1604, 1635, 1636, 1667, 1683, 1829, 1936, 1977, 1978, 1992, 2038, 2044, 2085, 3309, 3310, 3311, 3312, 3313, 3323, 3502, 3504, 3505, 3784, 3785, 3817, 3869, 3922, 3924, 3929, 3990, 3996, 4000, 4139, 4140, 4287, 4302, 4303, 4567, 4570, 4572, 4573, 5163, 5164]


In [67]:
already_ok.sort()
print("OK:", len(already_ok))
print(already_ok)
save_ok_list(already_ok, 'ok_affi_list.txt')


OK: 9052
[1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 2

In [65]:
def correct_oneline_1(db_name, cr_parser, cr_affi):
    # get a list of parsed affis with the ids of the corresponding cr_records
    parsed_affi = cr_parser.parse_and_map_single(cr_affi)
    print("Single Line Parsed:", parsed_affi)
    # all belong to same article author
    art_author_id = cr_affi[2]
    
    print ("verifying affiliations for article author", art_author_id)
    
    art_auth_affis = get_auth_affi_id_for_author(db_name, art_author_id)
    
    print ("Article author affiliations:", len(art_auth_affis), art_auth_affis )
    
    
    print('processing', parsed_affi[0])
    affi_vals = parsed_affi[0][0]
    cr_affi_ids = parsed_affi[0][1]
    correct_this = 0
    # strip all values
    for a_val in affi_vals:
        affi_vals[a_val] = affi_vals[a_val].strip()
    
    affi_id = get_affiliation_id(db_name, affi_vals)
    print ("recovered affi ID", affi_id)
    if affi_id == None:
        parsed_no_blanks = {k:v for k,v in affi_vals.items() if v != ''}
        affi_id = get_close_affiliation_id(db_name, parsed_no_blanks)
    if correct_this != 0:
        # the affiliation does not exist but something was assigned to author affi
        if affi_id == None:
            print('{0:*^80}'.format('Affi does not exist'))
            print(affi_vals)
            affi_id = add_new_affiliation(db_name, affi_vals)
        # if the affiliation exists    
        if affi_id != None:
            print('{0:*^80}'.format(' Update Author Affiliation '))
            print('Update ID:', correct_this, 'with values:', affi_vals )
            # verify that country is not empty 
            update_author_affiliation(db_name, correct_this, affi_id, affi_vals)
            update_cr_aai(db_name, cr_affi_ids[0], correct_this)


    else:
        if affi_id != None :
            print("Add author affiliation for author: ", art_author_id, 'with affi:', affi_vals) 
            new_affi_id = add_author_affiliation(db_name, art_author_id, affi_id, affi_vals)
            #update cr_affis (assign author_affi_id)
            for cr_id in cr_affi_ids:
                update_cr_aai(db_name, cr_id, new_affi_id)
                    
import craffiparser
import importlib
importlib.reload(craffiparser)

affi_parser = get_parser(ukchapp_db)

for an_aai in notebook.tqdm(not_ok_list):
    cr_lines = get_cr_lines_for_article_author_ids(ukchapp_db, an_aai)
    for a_line in cr_lines:
        if a_line[3] == None:
            print(a_line)
            parsed_line = affi_parser.parse_and_map_single(a_line)
            print (parsed_line)
            assigned_ok = check_assigned_affi_ol(ukchapp_db, affi_parser, a_line)
            print("Need to correct ", a_line)
            print("*"*80)
            correct_oneline_1(ukchapp_db, affi_parser, a_line)
            
    #break

Refreshing lists


  0%|          | 0/35 [00:00<?, ?it/s]

## CR_AFFI processing
Check if cr_affis can be assigned or parsed

In [68]:
def get_not_parsed_cr_lines(db_name):
    db_conn = dbh.DataBaseAdapter(db_name)
    s_table = 'cr_affiliations'
    s_fields = '*'
    s_where = "author_affiliation_id IS NULL"
    cr_affi_list = db_conn.get_values(s_table, s_fields, s_where)
    return cr_affi_list

not_parsed = get_not_parsed_cr_lines(ukchapp_db)
not_parsed

[]

In [69]:
for a_cr_line in not_parsed:
    if a_cr_line[3] == None:
        print(a_cr_line)
        parsed_line = affi_parser.parse_and_map_single(a_cr_line)
        print (parsed_line)
        assigned_ok = check_assigned_affi_ol(ukchapp_db, affi_parser, a_cr_line)
        print("Need to correct ", a_cr_line)
        print("*"*80)
        correct_oneline_1(ukchapp_db, affi_parser, a_cr_line)
        break

## Check that pdf files exist 

Use the data on the articles table to verify if file are stored in the corresponding folder
We also check that the files in the folder are all accounted for (have a corersponding record)

In [77]:
if current_step == 2:
    # get publication data from the ukch app
    app_pubs = pr_fns.get_pub_data(ukchapp_db)

    dups = []
    for idx, a_pub in enumerate(notebook.tqdm(app_pubs)):
        pub_id = a_pub[0]
        pub_title = a_pub[1]
        pub_doi = a_pub[2]
        pub_url = a_pub[3]
        for i_indx in range(idx,len(app_pubs)):
            if not (pub_doi is None) and pub_doi.strip().lower() ==  app_pubs[i_indx][2]:
                print(pub_doi, "duplicated at:", i_indx) 

  0%|          | 0/750 [00:00<?, ?it/s]

10.1038/s41929-019-0334-3 duplicated at: 0
10.1021/acscatal.9b00685 duplicated at: 1
10.1002/cctc.201901268 duplicated at: 2
10.1002/chem.201805250 duplicated at: 3
10.1016/j.bmc.2018.10.015 duplicated at: 4
10.1021/acssuschemeng.8b03568 duplicated at: 5
10.1002/ejoc.201800799 duplicated at: 6
10.1039/c8ob00066b duplicated at: 7
10.1016/j.bmc.2017.03.068 duplicated at: 8
10.1021/acs.biochem.8b00169 duplicated at: 9
10.1021/acscatal.8b00389 duplicated at: 10
10.1021/acscatal.8b00624 duplicated at: 11
10.1021/jacs.7b12621 duplicated at: 12
10.1021/acscatal.8b03169 duplicated at: 13
10.1021/acscatal.9b01820 duplicated at: 14
10.1039/c9cc02459j duplicated at: 15
10.1038/s41929-018-0213-3 duplicated at: 16
10.1016/j.apcata.2018.10.010 duplicated at: 17
10.1016/j.apcata.2018.11.026 duplicated at: 18
10.1021/acs.jpcc.8b08420 duplicated at: 19
10.1016/j.apcatb.2018.07.008 duplicated at: 20
10.1039/c8cc07444e duplicated at: 21
10.1021/acscatal.8b02232 duplicated at: 22
10.1039/c8cy00422f duplic

In [75]:
current_step+=1

In [79]:
if current_step == 2:
    for infile in notebook.tqdm(Path("pdf_files").glob('*.pdf')):
        file_found = False
        for a_pub in app_pubs:
            if infile.name == a_pub[4]:
                file_found = True
                break
        if not file_found:
            print("Not in DB:", infile.name, "DB Name", a_pub[4])

0it [00:00, ?it/s]

Not in DB: 10.1002_cplu.202300413.pdf DB Name d4ey00044g.pdf
Not in DB: ChemBioChem-2023-Wahart-Harnessing_a_Biocatalyst_to_Bioremediate.pdf DB Name d4ey00044g.pdf
Not in DB: ChemPlusChem - 2023 - Price - Impact of Porous Silica Nanosphere Architectures on the Catalytic Performance of Supported.pdf DB Name d4ey00044g.pdf
Not in DB: ChemPlusChem-2023-Aljohani-Enhancing_Hydrogen_Production_from_the_Photoreforming _of_Lignin.pdf DB Name d4ey00044g.pdf
Not in DB: ChemPlusChem-2023-Peng-A_Facile_Synthesis_Route_to_AuPd_Alloys.pdf DB Name d4ey00044g.pdf
Not in DB: ChemSusChem-2023-Al_Sobhi-A_Comparison_of_the_Reactivity_of_the_Lattice_Nitrogen.pdf DB Name d4ey00044g.pdf
Not in DB: dorota_matras_phd.PDF DB Name d4ey00044g.pdf


## Get missing pdfs

In [82]:
# use regular expression to check if a given string
# is a valid DOI, using pattern from CR
def valid_doi(cr_doi):
    # CR DOIS: https://www.crossref.org/blog/dois-and-matching-regular-expressions/
    # CR DOIs re1
    # /^10.\d{4,9}/[-._;()/:A-Z0-9]+$/i
    if cr_doi == None:
        return False
    cr_re_01 = '^10.\d{4,9}/[-._;()/:A-Z0-9]+'
    compare = re.match(cr_re_01, cr_doi, re.IGNORECASE)
    if compare != None and cr_doi == compare.group():
        return True
    else:
        return False
# get publication data from the ukch app
db_pubs = pr_fns.get_pub_data(ukchapp_db)

if current_step == 2:
    for a_pub in notebook.tqdm(db_pubs):
        if a_pub[0] > 616:
            pub_id = a_pub[0]
            pub_title = a_pub[1]
            pub_doi = a_pub[2]
            pub_url = a_pub[3]
            pub_pdf = a_pub[4]
            #pub_html = a_pub[5]
            if pub_pdf == None:
                not_in_url = True
                print("ID: ", pub_id, "Publication: ",pub_title,
                      "\n\tDOI: ", pub_doi, " URL: ", pub_url)
                if "pdf" in pub_url:
                    print ("\tTry to get the pdf from URL: ", pub_url)
                    try:
                        response = requests.get(pub_url)
                        content_type = response.headers['content-type']
                        if not 'text' in content_type:
                            #print(response.headers)
                            cd= response.headers['content-disposition']
                            #print(cd)
                            fname = re.findall("filename=(.+)", cd)[0]
                            #print(fname)
                            if not Path('pdf_files/' + pdf_file).is_file():
                                with open('pdf_files/'+ fname +'.pdf', 'wb') as f:
                                    f.write(response.content)
                            else:
                                set_pdf_file_value(pdf_file, pub_id, ukchapp_db)
                            not_in_url = False
                    except:
                        print("ID: ", pub_id, "\nPublication: ",pub_title, 
                               "\nDOI: ", pub_doi, "\nDOI: ", pub_url) 
                if not_in_url:
                    print("\tTry to see if json file has link to pdf: ")
                    if valid_doi(pub_doi):
                        crjd, doi_file = pr_fns.get_cr_json_object(pub_doi)
                        got_pdf = False
                        if "link" in crjd.keys():
                            for a_link in crjd["link"]:
                                if "\tURL" in a_link.keys() and ("pdf" in a_link["URL"] or "pdf" in a_link["content-type"]):
                                    cr_url = a_link["URL"]
                                    #print("URL: ", cr_url)
                                    pdf_file = get_pdf_from_url(cr_url)
                                    # if the name corresponds to a existing file, assign value to db_record
                                    if Path('pdf_files/' + pdf_file).is_file():
                                        print("\tFile name:", pdf_file)
                                        set_pdf_file_value(pdf_file, pub_id, ukchapp_db)
                                        got_pdf = True
                                    else:
                                        print("\tcould not get file from", cr_url)
                        else: 
                            print("\tno links in json", pub_doi)
                    if not got_pdf and "elsevier" in pub_url:
                        print("\tTrying elsevier doi:" )
                        pdf_file = pr_fns.get_elsevier_pdf(pub_doi)
                        if Path('pdf_files/' + pdf_file).is_file():
                            print("\tFile name:", pdf_file)
                            pr_fns.set_pdf_file_value(pdf_file, pub_id, ukchapp_db)
                            got_pdf = True
                    elif not got_pdf and "wiley" in pub_url:
                        print("\tTrying wiley doi:" )
                        pdf_file = pr_fns.get_wiley_pdf(pub_doi)
                        if Path('pdf_files/' + pdf_file).is_file():
                            print("\tFile name:", pdf_file)
                            pr_fns.set_pdf_file_value(pdf_file, pub_id, ukchapp_db)
                            got_pdf = True
                    elif not got_pdf and "pubs.acs" in pub_url:
                        print("\tTrying acs doi:" )
                        pdf_file = pr_fns.get_acs_pdf(pub_doi)
                        if Path('pdf_files/' + pdf_file).is_file():
                            print("\tFile name:", pdf_file)
                            pr_fns.set_pdf_file_value(pdf_file, pub_id, ukchapp_db)
                            got_pdf = True
                    if not got_pdf:
                        print("\tTry doi:  https://doi.org/" + pub_doi)
    


  0%|          | 0/750 [00:00<?, ?it/s]

## Use pdfminer to get metadata from pdf file

In [83]:
import pdfminer
from pdfminer import high_level as pdfmnr_hl

# functions for PDFminer

def get_pdf_text(pdf_file):
    return pdfmnr_hl.extract_text(pdf_file)

# get the paragraph fragments with references to data
def get_ref_sentences(pdf_text):
    sentences = pdf_text.split("\n")
    groups=[]
    for sentence in sentences:
        if pr_fns.is_data_stmt(sentence.lower()):
            idx = sentences.index(sentence)
            groups.append([idx-1,idx,idx+1])
    reduced_groups = []
    for group in groups:
        idx_group = groups.index(group)
        if groups.index(group) > 0:
            set_g = set(group)
            # make the array before current a set
            set_bg = set(groups[idx_group - 1])
            # make the array after current a set
            set_ag = set()
            if idx_group + 1 < len(groups):    
                set_ag = set(groups[idx_group + 1])
            if len(set_bg.intersection(set_g)) > 0:
                ordered_union = list(set_bg.union(set_g))
                ordered_union.sort()
                reduced_groups.append(ordered_union)
            if len(set_ag.intersection(set_g)) > 0:
                ordered_union = list(set_ag.union(set_g))
                ordered_union.sort()
                reduced_groups.append(ordered_union)
            if len(reduced_groups) > 0:
                is_in_rg = False
                for a_rg in reduced_groups:
                    if set_g.issubset(a_rg):
                        is_in_rg = True
                        break
                if not is_in_rg:
                    reduced_groups.append(list(set_g))
    return_group = []
    for sentence_group in reduced_groups:
        full_sentence = ""
        for single_sentence in sentence_group:
            full_sentence += sentences[single_sentence].strip()
        return_group.append(full_sentence)
    return return_group

# get the paragraph fragments with references to data
def get_all_data_sentences(pdf_text):
    sentences = pdf_text.split("\n")
    groups=[]
    for sentence in sentences:
        if 'data' in sentence.lower() or 'inform' in sentence.lower():
            idx = sentences.index(sentence)
            groups.append([idx-1, idx, idx+1])
    reduced_groups = []
    for group in groups:
        idx_group = groups.index(group)
        if groups.index(group) > 0:
            set_g = set(group)
            # make the array before current a set
            set_bg = set(groups[idx_group - 1])
            # make the array after current a set
            set_ag = set()
            if idx_group + 1 < len(groups):    
                set_ag = set(groups[idx_group + 1])
            if len(set_bg.intersection(set_g)) > 0:
                ordered_union = list(set_bg.union(set_g))
                ordered_union.sort()
                reduced_groups.append(ordered_union)
            if len(set_ag.intersection(set_g)) > 0:
                ordered_union = list(set_ag.union(set_g))
                ordered_union.sort()
                reduced_groups.append(ordered_union)
            if len(reduced_groups) > 0:
                is_in_rg = False
                for a_rg in reduced_groups:
                    if set_g.issubset(a_rg):
                        is_in_rg = True
                        break
                if not is_in_rg:
                    reduced_groups.append(list(set_g))
    return_group = []
    for sentence_group in reduced_groups:
        full_sentence = ""
        for single_sentence in sentence_group:
            full_sentence += sentences[single_sentence].strip()
        if not full_sentence in return_group:
            return_group.append(full_sentence)
    return return_group

# get the http strings from references to data
def get_http_ref(sentence):
    http_frag = ""
    if 'http' in sentence.lower():
        idx_http = sentence.lower().index('http')
        http_frag = sentence[idx_http:]
        space_in_ref = True
        while " " in http_frag:
            space_idx = http_frag.rfind(" ")
            http_frag = http_frag[:space_idx]
        if(http_frag[-1:]=="."):
            http_frag = http_frag[:-1]
    return http_frag

In [None]:
if current_step == 2:
    # get publication data from the ukch app
    db_pubs = pr_fns.get_pub_data(ukchapp_db)

    # get the list of dois already mined for data 
    input_file = './data_load/pub_data_add202012.csv'
    id_field = 'num'
    processed, headings = csvh.get_csv_data(input_file, id_field)
    for id_num in processed:
        current_title = processed[id_num]['doi']
    processed[1]['num']

    processed_dois = []
    for entry in processed:
        if not processed[entry]['doi'] in processed_dois:
            processed_dois.append( processed[entry]['doi'])
    last_checked = 970 # id of the last article checked for data
    data_records = {}
    data_mentions = {}
    ref_count = mention_count = 0
    for a_pub in notebook.tqdm(db_pubs):
        data_refs = []
        data_sents = []
        if a_pub[0] > last_checked:
            pub_id = a_pub[0]
            pub_title = a_pub[1]
            pub_doi = a_pub[2]
            pub_url = a_pub[3]
            pub_pdf = a_pub[4]
            #pub_html = a_pub[5]
            if pub_pdf == 'None':
                print("*************************")
                print("Missing PDF for:", pub_doi)
                print("*************************")
            else:
                pdf_file = "pdf_files/" + pub_pdf
                if not Path(pdf_file).is_file():
                    print("*************************")
                    print("Missing file for:", pdf_file, "for", pub_doi)
                    print("*************************")
                else: 
                    print("PDF filename", pdf_file)
                    pdf_text = get_pdf_text(pdf_file)
                    ref_sentences = get_ref_sentences(pdf_text)
                    data_sentences = get_all_data_sentences(pdf_text)
                    for r_sentence in ref_sentences:
                        dt_link = get_http_ref(r_sentence)
                        if 'supplem' in r_sentence.lower():
                            data_refs.append({'type':'supplementary',"desc":r_sentence, 'data_url':dt_link})
                        else:
                            data_refs.append({'type':'supporting',"desc":r_sentence, 'data_url':dt_link})
                    for d_sentence in data_sentences:
                        dt_link = get_http_ref(d_sentence)
                        if 'supplem' in d_sentence.lower():
                            data_sents.append({'type':'supplementary',"desc":d_sentence, 'data_url':dt_link})
                        else:
                            data_sents.append({'type':'supporting',"desc":d_sentence, 'data_url':dt_link})
            if data_refs != []:
                for data_ref in data_refs:
                    data_record = {'id':pub_id, 'doi':pub_doi}    
                    data_record.update(data_ref)
                    data_records[ref_count] = data_record
                    ref_count += 1
            if data_sents != []:
                for data_sent in data_sents:
                    sentence_record = {'id':pub_id, 'doi':pub_doi}    
                    sentence_record.update(data_sent)
                    data_mentions[mention_count] = sentence_record
                    mention_count += 1

  0%|          | 0/750 [00:00<?, ?it/s]

PDF filename pdf_files/s41570-023-00470-5.pdf
PDF filename pdf_files/c6fd00202a.pdf
PDF filename pdf_files/ijms-24-14779-v2.pdf
PDF filename pdf_files/ChemPlusChem-2023-Price-Impact_of_Porous_Silica_Nanosphere_Architectures.pdf
PDF filename pdf_files/10.1016_j.apcata.2023.119442.pdf
PDF filename pdf_files/ChemPlusChem - 2023 - Aljohani - Enhancing Hydrogen Production from the Photoreforming of Lignin.pdf
PDF filename pdf_files/ChemPlusChem - 2023 - Peng - A Facile Synthesis Route to AuPd Alloys for the Selective Oxidation of 5‐Hydroxymethylfurfural.pdf
PDF filename pdf_files/d3cp03167e.pdf
PDF filename pdf_files/sun-et-al-2023-potassium-promoted-limestone-for-preferential-direct-hydrogenation-of-carbonates-in-integrated-co2.pdf
PDF filename pdf_files/smith-et-al-2023-evaluating-heterodinuclear-mg(ii)m(ii)-(m-mn-fe-ni-cu-and-zn)-catalysts-for-the-chemical-recycling-of.pdf
PDF filename pdf_files/catalysts-13-01489-v2.pdf
PDF filename pdf_files/qiu-et-al-2023-compositional-evolution-of-in

#### Write to csv
Write the results to a csv file to be checked to verify if data mentions actually point to a data object

In [29]:
#if len(data_records) > 0:
#    csvh.write_csv_data(data_records, 'pdf_data.csv')
if current_step == 2:    
    if len(data_mentions) > 0:
        csvh.write_csv_data(data_mentions, 'pdf_mentions202408.csv')

Verify if the mentions of data or information actually can be linked to data objects

In [30]:
from IPython.display import clear_output

if current_step == 3:
    print(ukchapp_db)
    print(len(app_pubs))
    # Open results file
    data_mentions, dm_headers = csvh.get_csv_data('pdf_mentions202110.csv')
    print(dm_headers)
    art_id = ''
    for dm in data_mentions:
        if data_mentions[dm]['action']=='':
            clear_output()
            print ("*******************************************")
            print ("Article id  :", data_mentions[dm]['id'])
            print ("DOI         :", data_mentions[dm]['doi'])
            print ("Type        :", data_mentions[dm]['type'], '\tLine:', dm)
            print ("Description :\n\t", data_mentions[dm]['desc'])
            print ("data_url :", data_mentions[dm]['data_url'])
            print ("*******************************************")
            decide_action = False
            while not decide_action:
                print('Action:')
                print('\ta) review')
                print('\tb) none')
                print('\tSelect a or b:')
                lts = input()
                if lts == "a":
                    data_mentions[dm]['action'] = 'review'
                    decide_action = True
                elif lts == "b":
                    data_mentions[dm]['action'] = 'none'
                    decide_action = True
        art__id = data_mentions[dm]['id']
        if dm > 1700:
            break
    if len(data_mentions) > 0:
       csvh.write_csv_data(data_mentions, 'pdf_mentions202110.csv')
    

In [31]:
# clear the output after each loop cycle
from IPython.display import clear_output

# display editable spreadsheet
import ipysheet


# show gds parameters in a spreadsheet on jupyter
def show_gds(gds_group):
    gds_list = gds_to_list(gds_group)
    #print(gds_list)
    #add 10 more rows in case we need more parameters
    for i in range(10):
        gds_list.append([(len(gds_list)-1)+1,None,None,None,None])
    a_sheet = ipysheet.sheet(rows=len(gds_list), columns=len(gds_list[0]))
    ipysheet.cell_range(gds_list)
    display(a_sheet)
    return a_sheet

if current_step == 3:
    print(ukchapp_db)
    print(len(app_pubs))
    # Open results file
    data_mentions, dm_headers = csvh.get_csv_data('pdf_mentions202110.csv')
    print(dm_headers)
    art_id = ''
    terminate = False
    additional_rows = {}
    for dm in data_mentions:
        if data_mentions[dm]['action']=='review':
            clear_output()
            print ("*******************************************")
            print ("Article id  :", data_mentions[dm]['id'])
            print ("DOI         :", data_mentions[dm]['doi'])
            print ("Type        :", data_mentions[dm]['type'], '\tLine:', dm)
            print ("Description :\n\t", data_mentions[dm]['desc'])
            print ("data_url :", data_mentions[dm]['data_url'])
            print ("*******************************************")
            decide_action = False
            while not decide_action:
                print('Action:')
                print('\ta) review: https://doi.org/'+data_mentions[dm]['doi'])
                print('\ts) add new row')
                print('\td) next')
                print('\tf) terminate')
                print('\tSelect a, s, d, f:')
                lts = input()
                if lts == "a":
                    data_mentions[dm]['action'] = 'reviewed'
                    print ('https://doi.org/'+data_mentions[dm]['doi'])
                    print ('link:',data_mentions[dm]['link'])
                    add_this = input()
                    data_mentions[dm]['link'] = add_this
                    print ('issue:',data_mentions[dm]['issue'])
                    add_this = input()
                    data_mentions[dm]['issue'] = add_this
                    print ('name:',data_mentions[dm]['name'])
                    add_this = input()
                    data_mentions[dm]['name'] = add_this
                    print ('file:',data_mentions[dm]['file'])
                    add_this = input()
                    data_mentions[dm]['file'] = add_this
                if lts == "s":
                    #add a new row
                    new_idx = len(data_mentions) + len(additional_rows) + 1
                    additional_rows[new_idx] = data_mentions[dm]
                    print ('link:',additional_rows[new_idx]['link'])
                    add_this = input()
                    additional_rows[new_idx]['link'] = add_this
                    print ('issue:',additional_rows[new_idx]['issue'])
                    add_this = input()
                    additional_rows[new_idx]['issue'] = add_this
                    print ('name:',additional_rows[new_idx]['name'])
                    add_this = input()
                    additional_rows[new_idx]['name'] = add_this
                    print ('file:',additional_rows[new_idx]['file'])
                    add_this = input()
                    additional_rows[new_idx]['file'] = add_this
                elif lts == "d":
                    if data_mentions[dm]['action'] != 'reviewed':
                        data_mentions[dm]['action'] = 'none'
                    decide_action = True
                elif lts == 'f':
                    decide_action = True
                    terminate = True
        art__id = data_mentions[dm]['id']
        if dm > 1700 or terminate:
            break
    if len(additional_rows)> 0 :
        for nr in additional_rows:
            for a_field in dm_headers:
                data_mentions[nr][a_field] = additional_rows[nr][a_field]
    if len(data_mentions) > 0:
       csvh.write_csv_data(data_mentions, 'pdf_mentions202110.csv')

In [None]:
filter_mentions = {}
for dm in data_mentions:
    if data_mentions[dm]['action'] in ['add', 'reviewed']:
        filter_mentions[dm]={}
        for a_field in dm_headers:
            filter_mentions[dm][a_field] = data_mentions[dm][a_field]
print('filtered mentions:', len(filter_mentions))

In [None]:
new_do_id_list =[]
for fm in filter_mentions:
    art_id = int(filter_mentions[fm]["id"])
    if not art_id in new_do_id_list:
        new_do_id_list.append(art_id)

# currend app DB
ukchapp_db = "db_files/app_db20211005.sqlite3"

no_data_pubs = pr_fns.get_pub_app_no_data(ukchapp_db)
#print(len(ids_w_data))
print(len(no_data_pubs))
print(new_do_id_list, len(new_do_id_list))
filter_mentions


int_idx = 0
revised_list = {}
if Path("./html_revised202111.csv").is_file():
    revised_list, rl_headers = csvh.get_csv_data('html_revised202111.csv')
    int_idx = len(revised_list)
    
already_revised =[]
for fm in revised_list:
    art_id = int(revised_list[fm]["id"])
    if not art_id in already_revised:
        already_revised.append(art_id)
    
for ndp in no_data_pubs:
    if not ndp[0] in new_do_id_list and ndp[0] > 616 and not ndp[0] in already_revised:
        int_idx += 1
        pub_id = ndp[0]
        pub_title = ndp[1]
        pub_doi = ndp[2]
        pub_url = ndp[3]
        data_record = {'id':pub_id, 'doi':pub_doi, 'title':pub_title} 
        print ('id',pub_id, '\n', pub_title)
        decide_action = False
        terminate = False
        while not decide_action:
            print('Action:')
            print(pub_url)
            print("https://doi.org/"+pub_doi)
            print('\ta) no data' )
            print('\ts) review')
            print('\td) next')
            print('\tf) terminate')
            print('\tSelect a, s, d, f:')
            lts = input()
            if lts == "a":
                data_record['action'] = 'no data'
                data_record['issue'] = "no data availability or supplementary data mentioned in html or pdf versions or article"
                revised_list[int_idx] = data_record
                decide_action = True
            if lts == "s":
                data_record['action'] = 'review'
                print ('issue:',data_mentions[dm]['issue'])
                add_this = input()
                data_record['issue'] = add_this
                revised_list[int_idx] = data_record
                decide_action = True
            if lts == "d":
                decide_action = True
            elif lts == 'f':
                decide_action = True
                terminate = True
        if terminate:
            break

if len(revised_list) > 0:
    csvh.write_csv_data(revised_list, 'html_revised202111.csv')

In [None]:
if len(revised_list) > 0:
    csvh.write_csv_data(revised_list, 'html_revised202111.csv')
revised_list

In [None]:
# functions for ChemDataExtractor
# not used for mining data references (suplementary/raw) or to get pdf metadata
from chemdataextractor import Document

# A function for getting a list of files from the directory
# This will be modified to get the list from a csv file
def get_files_list (source_dir):
    i_counter = 0
    files_list = []
    for filepath in sorted(source_dir.glob('*.pdf')):
        i_counter += 1
        files_list.append(filepath)
    return files_list

def cde_read_pdfs(a_file):
    pdf_f = open(a_file, 'rb')
    doc = Document.from_file(pdf_f)
    return doc

def find_doi(element_text):
    cr_re_01 = '10.\d{4,9}/[-._;()/:A-Z0-9]+'
    compare = re.search(cr_re_01, element_text, re.IGNORECASE)
    if compare != None:
        return compare.group()
    return ""

def get_db_id(doi_value, db_name = "app_db.sqlite3"):
    db_conn = dbh.DataBaseAdapter(db_name)
    table = 'articles'   
    id_val = db_conn.get_value(table, "id", "doi", doi_value)
    db_conn.close()
    if id_val != None:
        return id_val[0]
    else:
        return 0

def get_db_title(doi_value, db_name = "app_db.sqlite3"):
    db_conn = dbh.DataBaseAdapter(db_name)
    table = 'articles'   
    id_val = db_conn.get_value(table, "title", "doi", doi_value)
    db_conn.close()
    if id_val != None:
        return id_val[0]
    else:
        return 0

def get_close_dois(str_name, db_name = "prev_search.sqlite3"):
    db_conn = dbh.DataBaseAdapter(db_name)
    search_in = 'articles'
    fields_required = "id, doi, title, pdf_file"
    filter_str = "doi like '%"+str_name+"%';"

    db_titles = db_conn.get_values(search_in, fields_required, filter_str)
    db_conn.close()
    return db_titles

Get the name of the current app db file:

In [None]:
# app db file with path: db_files/app_db.sqlite3
ukchapp_db = "db_files/app_db2.sqlite3"
while not Path(ukchapp_db).is_file():
    print('Please enter the name of app db file:')
    ukchapp_db = input()
ukchapp_db



In [None]:
# get names and links for references in data mentions
data_mentions, dm_fields = csvh.get_csv_data('pdf_mentions_filtered_02.csv', 'num')

for dm in data_mentions:
    print("https://doi.org/" + data_mentions[dm]['doi'])
    ref_name = data_mentions[dm]['ref_name']
    while ref_name == "":
        print('Please enter the name of data object:')
        ref_name = input()
    ref_link = data_mentions[dm]['ref_link']
    while ref_link == "":
        print('Please enter the data object link:')
        ref_link = input()
    data_mentions[dm]['ref_name'] = ref_name
    data_mentions[dm]['ref_link'] = ref_link


In [None]:
len(data_records)

In [None]:
data_mentions

In [None]:
from inspect import getmembers, isfunction

In [None]:
help(pdfminer.high_level)

In [20]:
!jupyter --version

'jupyter' is not recognized as an internal or external command,
operable program or batch file.


In [21]:
from platform import python_version
python_version()

'3.9.2rc1'

In [22]:
db_conn = dbh.DataBaseAdapter(ukchapp_db)
table_name = "Articles"
column_name = "pdf_file"
db_conn.connection.execute("ALTER TABLE " + table_name + " DROP COLUMN " + column_name + ";");


OperationalError: near "DROP": syntax error