# CR Vs DB Verify
check if the results from cr search are in the DB
- get list of CSV search results files
- join searches as a set (unique references)
- open DB
- search if occurrences in DB
- create a list of new candidates

In [1]:
# Connecting to the db
import lib.handle_db as dbh

# read and write csv files
import lib.handle_csv as csv_rw

# date functions
from datetime import datetime, date, timedelta

# managing files and file paths
from pathlib import Path

# import custom functions (common to various notebooks)
import processing_functions as pr_fns

In [2]:
# get list of CSV search results files
base_dir = './pub_search_crossref'
csv_flies_list = ['cr_ol_202204.csv','cr_online_lookup.csv','cr_archive_lookup.csv']
collect_csv = {}
out_file = Path(base_dir, "cr_all202204.csv")

In [3]:
# join searches as a set (unique references)
for a_csv in csv_flies_list:
    a_file = Path(base_dir,a_csv)
    if a_file.is_file():
        print("**********************************************")
        print("checking ", a_file)
        csv_data, csv_headings = csv_rw.get_csv_data(a_file)
        print("Found: ", a_file, " entries: ", len(csv_data))
        for an_id in csv_data:
            a_doi = csv_data[an_id]['doi']
            alt_doi = "";
            if ("/anie." in a_doi):
                alt_doi = a_doi.replace("/anie.","/ange.")
            elif ("/ange." in a_doi):
                alt_doi = a_doi.replace("/ange.","/anie.")
            new_id = len(collect_csv)+1
            already_collected = False
            for collected_id in collect_csv:
                if a_doi == collect_csv[collected_id]['doi'] or \
                   alt_doi == collect_csv[collected_id]['doi']:
                    print ('reviewing:', csv_data[an_id]['doi'],"\nFound ", collect_csv[collected_id])
                    collect_csv[collected_id]['occurrence'] += 1
                    collect_csv[collected_id]['alt_doi'] = alt_doi
                    already_collected = True
                    break
            if not already_collected: 
                print("adding", new_id, a_doi)
                collect_csv[new_id] = csv_data[an_id]
                collect_csv[new_id]['occurrence'] = 1
                collect_csv[new_id]['alt_doi'] = ''
# WRITE TO FILE
if len(collect_csv) > 0:
    csv_rw.write_csv_data(collect_csv, out_file) 

**********************************************
checking  pub_search_crossref\cr_ol_202204.csv
Found:  pub_search_crossref\cr_ol_202204.csv  entries:  45
adding 1 10.1016/j.catcom.2022.106421
adding 2 10.1002/anie.202015016
reviewing: 10.1002/ange.202015016 
Found  {'authors': 'Omori, Naomi, Candeo, Alessia, Mosca, Sara, Lezcano‐Gonzalez, Ines, Robinson, Ian K., Li, Luxi, Greenaway, Alex G., Collier, Paul, Beale, Andrew M.', 'year': '2021', 'title': 'Multimodal Imaging of Autofluorescent Sites Reveals Varied Chemical Speciation in SSZ‐13 Crystals', 'doi': '10.1002/anie.202015016', 'award': 'EP/K014714/1', 'ukch_affi': 'FALSE', 'search': '2022-03-01 to 2022-03-07', 'occurrence': 1, 'alt_doi': ''}
adding 3 10.1039/d1cy02001c
adding 4 10.1002/chem.202104198
adding 5 10.1021/acs.chemrev.1c00493
adding 6 10.1021/acscatal.9b01820
adding 7 10.1098/rsos.211353
adding 8 10.1021/acs.nanolett.9b01733
adding 9 10.1016/j.catcom.2021.106392
adding 10 10.1021/acs.jcim.8b00940
adding 11 10.1021/acscata

adding 284 10.1039/c6fd00010j
adding 285 10.1039/c6fd00189k
adding 286 10.1039/c6fd00195e
adding 287 10.1039/c6me00061d
adding 288 10.1039/c6nr00053c
adding 289 10.1039/c6re00140h
adding 290 10.1039/c6sc04130b
adding 291 10.1039/c6tb01774f
adding 292 10.1039/c7cc05982e
adding 293 10.1039/c7cp04144f
adding 294 10.1039/c7cy00965h
adding 295 10.1039/c7dt01022b
adding 296 10.1039/c7dt03395h
adding 297 10.1039/c7dt04805j
adding 298 10.1039/c7fd00210f
adding 299 10.1039/c7fd00216e
adding 300 10.1039/c7fd00221a
adding 301 10.1039/c7ta10892c
adding 302 10.1039/c8cc01880d
adding 303 10.1039/c8cc07444e
adding 304 10.1039/c8cp01022f
adding 305 10.1039/c8cp02381f
adding 306 10.1039/c8cp04216k
adding 307 10.1039/c8cp06736h
adding 308 10.1039/c8cy00422f
adding 309 10.1039/c8cy90033g
adding 310 10.1039/c8dt05051a
adding 311 10.1039/c8fd00002f
adding 312 10.1039/c8fd00005k
adding 313 10.1039/c8nj03632b
adding 314 10.1039/c8ra10509j
adding 315 10.1039/c8sc03312a
adding 316 10.1039/c8ta12263f
adding 317

In [12]:
# CR search results
base_dir = './WebOfScience'
in_file_name = "cr_check_wos_2023b.csv"
out_file_name = "cr_check_wos_2023c.csv"

# open DB
db_name = 'production'

# 1 currend app DB
ukchapp_db = "db_files/" + db_name + ".sqlite3"

out_file = Path(base_dir, out_file_name)
in_file = Path(base_dir, in_file_name)

csv_data, csv_headings = csv_rw.get_csv_data(in_file)

# get publication data from the ukch app
app_pubs = pr_fns.get_pub_data(ukchapp_db)

for collected_id in csv_data:
    a_doi = csv_data[collected_id]['doi']
    alt_doi = "";
    if ("/anie." in a_doi):
        alt_doi = a_doi.replace("/anie.","/ange.")
    elif ("/ange." in a_doi):
        alt_doi = a_doi.replace("/ange.","/anie.")
    found_in_db = False
    for a_pub in app_pubs:
        pub_id = a_pub[0]
        pub_title = a_pub[1]
        pub_doi = a_pub[2]
        pub_url = a_pub[3]
        pub_pdf = a_pub[4]
        if a_doi == pub_doi or \
            alt_doi == pub_doi:
            found_in_db = True
    csv_data[collected_id]['in_db'] = found_in_db

In [9]:
# WRITE TO FILE
if len(csv_data) > 0:
    csv_rw.write_csv_data(csv_data, out_file) 

In [17]:
## Run the first cell and then run this one
base_dir = './WebOfScience'
out_file_name = "cr_check_wos_2023c.csv"

out_file = Path(base_dir, out_file_name)

csv_data, csv_headings = csv_rw.get_csv_data(out_file)

for an_id in csv_data:
    if (csv_data[an_id]['ask']==''):#and csv_data[an_id]['Reason']!= 'one or more authors with UKCH affiliation'):
        print (csv_data[an_id])
        print ("https://doi.org/"+csv_data[an_id]['doi'])
        print("ask if should be added")
        should_ask = input()
        csv_data[an_id]['ask']=should_ask
        print("reason for asking")
        reason_to_ask = input()
        csv_data[an_id]['Reason']=reason_to_ask
        
        

{'authors': 'Brookes, Catherine, Bowker, Michael, Gibson, Emma K., Gianolio, Diego, Mohammed, Khaled M. H., Parry, Stephen, Rogers, Scott M., Silverwood, Ian P., Wells, Peter P.', 'year': '2018', 'title': 'Correction: <i>In situ</i> spectroscopic investigations of MoO<sub>x</sub>/Fe<sub>2</sub>O<sub>3</sub> catalysts for the selective oxidation of methanol', 'doi': '10.1039/c8cy90033g', 'awards': '', 'mcc': '0', 'ukch': '0', 'ukch_affiliation': '1', 'found_by_cr_search': '0', 'in_DB': 'No', 'ask': '', 'acknowledgement': '', 'Reason': 'one or more authors with UKCH affiliation', 'in_db': 'FALSE', 'award_in_ack': '', 'error': 'one or more'}
https://doi.org/10.1039/c8cy90033g
ask if should be added
no
reason for asking
correction to article
{'authors': 'Jabłońska, Magdalena, Arán, Miren Agote, Beale, Andrew M., Góra-Marek, Kinga, Delahay, Gérard, Petitto, Carolina, Pacultová, Kateřina, Palkovits, Regina', 'year': '2019', 'title': 'Catalytic decomposition of N<sub>2</sub>O over Cu–Al–O<sub

yes
reason for asking
affiliation, Peter and Richard
{'authors': 'Ortiz-Roldan, Jose M., Balestra, Salvador R. G., Bueno-Perez, Rocio, Calero, Sofía, Garcia-Perez, Elena, Catlow, C. Richard A., Ruiz-Salvador, A. Rabdel, Hamad, Said', 'year': '2022', 'title': 'Understanding the stability and structural properties of ordered nanoporous metals towards their rational synthesis', 'doi': '10.1098/rspa.2022.0201', 'awards': '', 'mcc': '0', 'ukch': '0', 'ukch_affiliation': '1', 'found_by_cr_search': '0', 'in_DB': 'No', 'ask': '', 'acknowledgement': '', 'Reason': 'one or more authors with UKCH affiliation', 'in_db': 'FALSE', 'award_in_ack': '', 'error': 'one or more'}
https://doi.org/10.1098/rspa.2022.0201
ask if should be added
yes
reason for asking
affiliation, Richard
{'authors': 'Wilding, Martin C., Benmore, Chris, Headen, Thomas F., Di Mino, Camilla, Miller, Thomas S., Suter, Theo M., Corà, Furio, Clancy, Adam J., Sella, Andrea, McMillan, Paul, Howard, Christopher A.', 'year': '2023', 'tit

In [18]:
# WRITE TO FILE
if len(csv_data) > 0:
    csv_rw.write_csv_data(csv_data, out_file) 

In [19]:
ukch_awards = ['EP/R026939/1', 'EP/R026815/1', 'EP/R026645/1', 'EP/R027129/1', 'EP/M013219/1',
               'EP/K014706/2', 'EP/K014668/1', 'EP/K014854/1', 'EP/K014714/1',]

for an_id in csv_data:
    if (csv_data[an_id]['Reason']!='' ):
        awards_list = []
        for an_aw in ukch_awards:
            if an_aw in csv_data[an_id]['Reason']:
                awards_list.append(an_aw)
        if awards_list != []:
            csv_data[an_id]['award_in_ack'] = str(awards_list)

# WRITE TO FILE
if len(csv_data) > 0:
    csv_rw.write_csv_data(csv_data, out_file) 

In [8]:
csv_data

{1: {'authors': 'Wang, Mei, Papaioannou, Evangelos I., Metcalfe, Ian S., Naden, Aaron, Savaniu, Cristian D., Irvine, John T. S.',
  'year': '2023',
  'title': 'The Exsolution of Cu Particles from Doped Barium Cerate Zirconate via Barium Cuprate Intermediate Phases',
  'doi': '10.1002/adfm.202302102',
  'awards': '',
  'mcc': '0',
  'ukch': '0',
  'ukch_affiliation': '0',
  'found_by_cr_search': '0',
  'in_DB': 'No',
  'ask': 'yes',
  'acknowledgement': ' the China Scholarship Commission (MW) received financial support from the UK Catalysis Hub funded by EPSRC Grant reference EP/R027129/1.',
  'Reason': 'crossref does not include the UKCH grant number',
  'in_db': 'FALSE'},
 2: {'authors': 'Pi, Yutong, Ma, Yanfu, Wang, Xinyao, Price, Cameron‐Alexander Hurd, Li, Haitao, Liu, Qinglong, Wang, Liwei, Chen, Hongyu, Hou, Guangjin, Su, Bao‐Lian, Liu, Jian',
  'year': '2022',
  'title': 'Multilevel Hollow Phenolic Resin Nanoreactors with Precise Metal Nanoparticles Spatial Location toward Promi