# CR Vs DB Verify
check if the results from cr search are in the DB
- get list of CSV search results files
- join searches as a set (unique references)
- open DB
- search if occurrences in DB
- create a list of new candidates

In [1]:
# Connecting to the db
import lib.handle_db as dbh

# read and write csv files
import lib.handle_csv as csv_rw

# date functions
from datetime import datetime, date, timedelta

# managing files and file paths
from pathlib import Path

# import custom functions (common to various notebooks)
import processing_functions as pr_fns

In [2]:
# get list of CSV search results files
base_dir = './pub_search_crossref'
csv_flies_list = ['cr_ol_202204.csv','cr_online_lookup.csv','cr_archive_lookup.csv']
collect_csv = {}
out_file = Path(base_dir, "cr_all202204.csv")

In [3]:
# join searches as a set (unique references)
for a_csv in csv_flies_list:
    a_file = Path(base_dir,a_csv)
    if a_file.is_file():
        print("**********************************************")
        print("checking ", a_file)
        csv_data, csv_headings = csv_rw.get_csv_data(a_file)
        print("Found: ", a_file, " entries: ", len(csv_data))
        for an_id in csv_data:
            a_doi = csv_data[an_id]['doi']
            alt_doi = "";
            if ("/anie." in a_doi):
                alt_doi = a_doi.replace("/anie.","/ange.")
            elif ("/ange." in a_doi):
                alt_doi = a_doi.replace("/ange.","/anie.")
            new_id = len(collect_csv)+1
            already_collected = False
            for collected_id in collect_csv:
                if a_doi == collect_csv[collected_id]['doi'] or \
                   alt_doi == collect_csv[collected_id]['doi']:
                    print ('reviewing:', csv_data[an_id]['doi'],"\nFound ", collect_csv[collected_id])
                    collect_csv[collected_id]['occurrence'] += 1
                    collect_csv[collected_id]['alt_doi'] = alt_doi
                    already_collected = True
                    break
            if not already_collected: 
                print("adding", new_id, a_doi)
                collect_csv[new_id] = csv_data[an_id]
                collect_csv[new_id]['occurrence'] = 1
                collect_csv[new_id]['alt_doi'] = ''
# WRITE TO FILE
if len(collect_csv) > 0:
    csv_rw.write_csv_data(collect_csv, out_file) 

**********************************************
checking  pub_search_crossref\cr_ol_202204.csv
Found:  pub_search_crossref\cr_ol_202204.csv  entries:  45
adding 1 10.1016/j.catcom.2022.106421
adding 2 10.1002/anie.202015016
reviewing: 10.1002/ange.202015016 
Found  {'authors': 'Omori, Naomi, Candeo, Alessia, Mosca, Sara, Lezcano‐Gonzalez, Ines, Robinson, Ian K., Li, Luxi, Greenaway, Alex G., Collier, Paul, Beale, Andrew M.', 'year': '2021', 'title': 'Multimodal Imaging of Autofluorescent Sites Reveals Varied Chemical Speciation in SSZ‐13 Crystals', 'doi': '10.1002/anie.202015016', 'award': 'EP/K014714/1', 'ukch_affi': 'FALSE', 'search': '2022-03-01 to 2022-03-07', 'occurrence': 1, 'alt_doi': ''}
adding 3 10.1039/d1cy02001c
adding 4 10.1002/chem.202104198
adding 5 10.1021/acs.chemrev.1c00493
adding 6 10.1021/acscatal.9b01820
adding 7 10.1098/rsos.211353
adding 8 10.1021/acs.nanolett.9b01733
adding 9 10.1016/j.catcom.2021.106392
adding 10 10.1021/acs.jcim.8b00940
adding 11 10.1021/acscata

adding 284 10.1039/c6fd00010j
adding 285 10.1039/c6fd00189k
adding 286 10.1039/c6fd00195e
adding 287 10.1039/c6me00061d
adding 288 10.1039/c6nr00053c
adding 289 10.1039/c6re00140h
adding 290 10.1039/c6sc04130b
adding 291 10.1039/c6tb01774f
adding 292 10.1039/c7cc05982e
adding 293 10.1039/c7cp04144f
adding 294 10.1039/c7cy00965h
adding 295 10.1039/c7dt01022b
adding 296 10.1039/c7dt03395h
adding 297 10.1039/c7dt04805j
adding 298 10.1039/c7fd00210f
adding 299 10.1039/c7fd00216e
adding 300 10.1039/c7fd00221a
adding 301 10.1039/c7ta10892c
adding 302 10.1039/c8cc01880d
adding 303 10.1039/c8cc07444e
adding 304 10.1039/c8cp01022f
adding 305 10.1039/c8cp02381f
adding 306 10.1039/c8cp04216k
adding 307 10.1039/c8cp06736h
adding 308 10.1039/c8cy00422f
adding 309 10.1039/c8cy90033g
adding 310 10.1039/c8dt05051a
adding 311 10.1039/c8fd00002f
adding 312 10.1039/c8fd00005k
adding 313 10.1039/c8nj03632b
adding 314 10.1039/c8ra10509j
adding 315 10.1039/c8sc03312a
adding 316 10.1039/c8ta12263f
adding 317

In [6]:
# CR search results
base_dir = './WebOfScience'
in_file_name = "cr_check_wos_2023b.csv"
out_file_name = "cr_check_wos_2023c.csv"

# open DB
db_name = 'production'

# 1 currend app DB
ukchapp_db = "db_files/" + db_name + ".sqlite3"

out_file = Path(base_dir, out_file_name)
in_file = Path(base_dir, in_file_name)

csv_data, csv_headings = csv_rw.get_csv_data(in_file)

# get publication data from the ukch app
app_pubs = pr_fns.get_pub_data(ukchapp_db)

for collected_id in csv_data:
    a_doi = csv_data[collected_id]['doi']
    alt_doi = "";
    if ("/anie." in a_doi):
        alt_doi = a_doi.replace("/anie.","/ange.")
    elif ("/ange." in a_doi):
        alt_doi = a_doi.replace("/ange.","/anie.")
    found_in_db = False
    for a_pub in app_pubs:
        pub_id = a_pub[0]
        pub_title = a_pub[1]
        pub_doi = a_pub[2]
        pub_url = a_pub[3]
        pub_pdf = a_pub[4]
        if a_doi == pub_doi or \
            alt_doi == pub_doi:
            found_in_db = True
    csv_data[collected_id]['in_db'] = found_in_db

In [9]:
# WRITE TO FILE
if len(csv_data) > 0:
    csv_rw.write_csv_data(csv_data, out_file) 

In [23]:
## Run the first cell and then run this one
base_dir = './WebOfScience'
out_file_name = "cr_check_wos_2023c.csv"

out_file = Path(base_dir, out_file_name)

csv_data, csv_headings = csv_rw.get_csv_data(out_file)

for an_id in csv_data:
    if (csv_data[an_id]['ask']==''):
        print (csv_data[an_id])
        print ("https://doi.org/"+csv_data[an_id]['doi'])
        print("ask if should be added")
        should_ask = input()
        csv_data[an_id]['ask']=should_ask
        print("reason for asking")
        reason_to_ask = input()
        csv_data[an_id]['Reason']=reason_to_ask
        
        

{'authors': 'Mattey, Ashley P., Ford, Grayson J., Citoler, Joan, Baldwin, Christopher, Marshall, James R., Palmer, Ryan B., Thompson, Matthew, Turner, Nicholas J., Cosgrove, Sebastian C., Flitsch, Sabine L.', 'year': '2021', 'title': 'Development of Continuous Flow Systems to Access Secondary Amines Through Previously Incompatible Biocatalytic Cascades**', 'doi': '10.1002/anie.202103805', 'awards': '', 'mcc': '0', 'ukch': '0', 'ukch_affiliation': '0', 'found_by_cr_search': '0', 'in_DB': '', 'ask': '', 'acknowledgement': '', 'Reason': '', 'in_db': 'FALSE'}
https://doi.org/10.1002/anie.202103805
ask if should be added
yes
reason for asking
S.C.C. acknowledges the UK Catalysis Hub and Keele University for funding.
{'authors': 'Radley, Emily, Davidson, John, Foster, Jake, Obexer, Richard, Bell, Elizabeth L., Green, Anthony P.', 'year': '2023', 'title': 'Engineering Enzymes for Environmental Sustainability', 'doi': '10.1002/anie.202309305', 'awards': '', 'mcc': '0', 'ukch': '0', 'ukch_affil

yes
reason for asking
The authors wish to acknowledge the Diamond Light Source and the UK Catalysis Hub for provision of beamtime (proposal number SP19850-1).
{'authors': 'Islam, Mohammed J., Granollers Mesa, Marta, Osatiashtiani, Amin, Manayil, Jinesh C., Isaacs, Mark A., Taylor, Martin J., Tsatsos, Sotirios, Kyriakou, Georgios', 'year': '2021', 'title': 'PdCu single atom alloys supported on alumina for the selective hydrogenation of furfural', 'doi': '10.1016/j.apcatb.2021.120652', 'awards': '', 'mcc': '0', 'ukch': '0', 'ukch_affiliation': '0', 'found_by_cr_search': '0', 'in_DB': '', 'ask': '', 'acknowledgement': '', 'Reason': '', 'in_db': 'FALSE'}
https://doi.org/10.1016/j.apcatb.2021.120652
ask if should be added
yes
reason for asking
The authors wish to acknowledge the Diamond Light Source and the UK Catalysis Hub for provision of beamtime (proposal number SP19850-5 and SP19850-6)
{'authors': 'Hoekstra, Jacco, Beale, Andrew M., Soulimani, Fouad, Versluijs-Helder, Marjan, van de Kl

KeyboardInterrupt: Interrupted by user

no


In [24]:
# WRITE TO FILE
if len(csv_data) > 0:
    csv_rw.write_csv_data(csv_data, out_file) 