# Search publications using the Crossref API

Use crossref restful API to get the works based on the award codes 

This one still works better than the direct search by award

In [1]:
# Connecting to the db
import lib.handle_db as dbh

# read and write csv files
import lib.handle_csv as csv_rw

# file management
from pathlib import Path

# date functions
from datetime import datetime, date, timedelta

#CR libraries
from crossref.restful import Works, Etiquette

# search for UKCH Awards in CR record
def award_in_crossref(aw, award_list):
    ukch_wks =[]
    not_revised = []
    for wk in aw:      
        awd_list = []
        if 'funder' in wk.keys():
            for fdr in wk['funder']:
                if 'award' in fdr.keys():
                   awds = 0
                   for awd in fdr['award']:
                        if awd in award_list:
                            awd_list.append(awd)
                            #print (fdr)
                            break
        else:
            not_revised.append(wk)
        if len(awd_list) > 0:
            ukch_wks.append(wk)
    return ukch_wks, not_revised

def collect_keys(aw, wk_keys):
    for wk in aw:
        these_keys = wk.keys()
        for a_key in these_keys:
            if a_key in wk_keys:
                wk_keys[a_key] += 1
            else:
                wk_keys[a_key] = 1
    return wk_keys

def collect_fdr_keys(aw, wk_keys):
    for wk in aw:
        if 'funder' in wk.keys():
            for fdr in wk['funder']:
                these_keys = fdr.keys()
                for a_key in these_keys:
                    if a_key in wk_keys:
                        wk_keys[a_key] += 1
                    else:
                        wk_keys[a_key] = 1
    return wk_keys

def collect_awds(aw, wk_keys):
    for wk in aw:
        if 'funder' in wk.keys():
            for fdr in wk['funder']:
                if 'award' in fdr.keys():
                   awds = 0
                   for awd in fdr['award']:
                        if awd in wk_keys:
                            wk_keys[awd] += 1
                        else:
                            wk_keys[awd] = 1
    return wk_keys

# search for UKCH Affiliation in CR record
def affi_in_crossref(aw):
    ukch_wks = []
    for wk in aw:
        ukch_affiliation = False
        if 'author' in wk.keys():
            for autr in wk['author']:
                if 'affiliation' in autr.keys():
                    for affi in autr['affiliation']:
                        if "UK Catalysis Hub" in affi['name']:
                            ukch_affiliation = True
                            break
                    if ukch_affiliation:
                        ukch_wks.append(wk)
                        break
    return  ukch_wks

Define the awards to look for. In this case we also look for partial matches because the sometimes the publications drop the last part (/1 or /2)


In [5]:
my_etiquette = Etiquette('UK Catalysis Hub - Catalysis Data Infrastructure', 
                         'Prototype 1', 
                         'https://ukcatalysishub.co.uk/core/', 
                         'nieva@rc-harwell.ac.uk')

ukch_awards = ['EP/R026939/1', 'EP/R026815/1', 'EP/R026645/1', 'EP/R027129/1', 'EP/M013219/1',
               'EP/K014706/2', 'EP/K014668/1', 'EP/K014854/1', 'EP/K014714/1',
               'EP/R026939', 'EP/R026815', 'EP/R026645', 'EP/R027129', 'EP/M013219',
               'EP/K014706', 'EP/K014668', 'EP/K014854', 'EP/K014714',
              ]

mcc_awards = ['EP/R029431/1', 'EP/P020194/1', 'EP/T022213/1', 'EP/D504872/1', 'EP/F067496/1','EP/X035859/1', 
              'EP/W032260/1', 'EP/L000202/1', 'EP/R029431','EP/P020194', 'EP/T022213', 'EP/D504872', 
              'EP/F067496','EP/X035859', 'EP/W032260', 'EP/L000202']

ukch_affi_synonyms = ["UK Catalysis Hub"]

award_list = ukch_awards + mcc_awards

works = Works(etiquette=my_etiquette)

start_date = end_date = date(2024, 4, 30) #started 7,1
stop_date = date(2024, 6, 25)

out_dir = './pub_search_crossref/cr_results_202406/'

pubs_with_award = []
skiped_works =[]
wk_keys = {}
fd_keys = {}
awds_lst = {}
foud_pubs = {}
while end_date < stop_date + timedelta(days=1):
    end_date = start_date + timedelta(days=1)
    print ("From:", str(start_date), "to",  str(end_date))
    # works with from_published_date and until_published_date 
    # next test with from_deposit_date and until_deposit_date
    # Valid filters for this route are: alternative_id, archive, article_number, assertion, assertion-group, 
    #    award.funder, award.number, category-name, clinical-trial-number, container-title, content-domain,
    #    directory, doi, from-accepted-date, from-created-date, from-deposit-date, from-event-end-date,
    #    from-event-start-date, from-index-date, from-issued-date, from-online-pub-date, from-posted-date,
    #    from-print-pub-date, from-pub-date, from-update-date, full-text.application, full-text.type, 
    #    full-text.version, funder, funder-doi-asserted-by, group-title, has-abstract, has-affiliation,
    #    has-archive, has-assertion, has-authenticated-orcid, has-award, has-clinical-trial-number,
    #    has-content-domain, has-domain-restriction, has-event, has-full-text, has-funder, has-funder-doi,
    #    has-license, has-orcid, has-references, has-relation, has-update, has-update-policy, is-update, 
    #    isbn, issn, license.delay, license.url, license.version, location, member, orcid, prefix,
    #    relation.object, relation.object-type, relation.type, type, type-name, until-accepted-date,
    #    until-created-date, until-deposit-date, until-event-end-date, until-event-start-date,
    #    until-index-date, until-issued-date, until-online-pub-date, until-posted-date,
    #    until-print-pub-date, until-pub-date, until-update-date, update-type, updates

    print(works.filter(has_funder='true').filter(from_deposit_date=str(start_date)).filter(until_deposit_date=str(end_date)).url)
    pub_w_grant = works.filter(has_funder='true').filter(from_deposit_date=str(start_date)).filter(until_deposit_date=str(end_date))
    
    aw, _  = award_in_crossref(pub_w_grant, award_list)
    ukch_wks = []
    
    for wk in aw:     
        awd_list = [] 
        for fdr in wk['funder']:
            if 'award' in fdr.keys():
               awds = 0
               for awd in fdr['award']:
                   if awd in award_list:
                       awd_list.append(awd)
        if len(awd_list) > 0:
            ukch_wks.append(wk)
    
    foud_pubs = {}
    if len(ukch_wks) > 0:
        foud_pubs = {}
        for wk in ukch_wks:
            mcc_pub = ukch_pub = False
            art_authors = ""
            if 'author' in wk.keys() :
                for autr in wk['author']:
                    if art_authors == "":
                        art_authors = autr['family'] + (", "+ autr ['given'] if 'given' in autr.keys() else "" )
                    else:
                        art_authors += ", " + autr['family']+ (", "+ autr ['given'] if 'given' in autr.keys() else "" )
            fund_award = ""
            for fdr in wk['funder']:
                if 'award' in fdr.keys():
                  for awd in fdr['award']:
                       if awd in award_list:
                            if fund_award  == "":
                                fund_award = awd
                            else:
                                fund_award += ", " +awd
                            if awd in ukch_awards:
                                ukch_pub = True
                            if awd in mcc_awards:
                                mcc_pub = True
            ol_year = 0
            pr_year = 0
            pub_year = 0
            if 'published-online' in wk.keys() and 'date-parts' in wk['published-online'].keys():
                ol_year = int(wk['published-online']['date-parts'][0][0])
            if 'published-print' in wk.keys() and 'date-parts' in wk['published-print'].keys():
                pr_year = int(wk['published-print']['date-parts'][0][0])
            if pr_year > 0 and ol_year > 0:
                if pr_year > ol_year:
                    pub_year = ol_year
                else:
                    pub_year = pr_year
            elif ol_year > 0:
                pub_year = ol_year
            elif pr_year > 0:
                pub_year = pr_year

            print(art_authors,"|",pub_year,"|",wk['title'][0],
                  "|", wk['DOI'],"|", fund_award)     
            this_pub = {}
            this_pub['authors'] = art_authors
            this_pub['year'] = pub_year
            this_pub['title'] = wk['title'][0]
            this_pub['DOI'] = wk['DOI']
            this_pub['awards'] = fund_award
            
            this_pub['mcc'] = 1 if mcc_pub else 0
            this_pub['ukch'] = 1 if ukch_pub else 0
            if not wk['DOI'] in foud_pubs:
                 foud_pubs[wk['DOI']]= this_pub

        
        # WRITE TO FILE
        if len(foud_pubs) > 0:
            csv_rw.write_csv_data(foud_pubs, out_dir+'cr_check_'+str(end_date)+'a.csv') 
        
    start_date = end_date + timedelta(days=1)

From: 2024-04-30 to 2024-05-01
https://api.crossref.org/works?filter=has-funder%3Atrue%2Cfrom-deposit-date%3A2024-04-30%2Cuntil-deposit-date%3A2024-05-01
From: 2024-05-02 to 2024-05-03
https://api.crossref.org/works?filter=has-funder%3Atrue%2Cfrom-deposit-date%3A2024-05-02%2Cuntil-deposit-date%3A2024-05-03
Gautom, Trishnamoni, Dheeman, Dharmendra, Levy, Colin, Butterfield, Thomas, Alvarez Gonzalez, Guadalupe, Le Roy, Philip, Caiger, Lewis, Fisher, Karl, Johannissen, Linus, Dixon, Neil | 2021 | Structural basis of terephthalate recognition by solute binding protein TphC | 10.1038/s41467-021-26508-0 | EP/M013219/1
Zhu, Qiang, Qu, Hang, Avci, Gokay, Hafizi, Roohollah, Zhao, Chengxi, Day, Graeme M., Jelfs, Kim E., Little, Marc A., Cooper, Andrew I. | 2024 | Computationally guided synthesis of a hierarchical [4[2+3]+6] porous organic ‘cage of cages’ | 10.1038/s44160-024-00531-7 | EP/R029431, EP/X035859
From: 2024-05-04 to 2024-05-05
https://api.crossref.org/works?filter=has-funder%3Atrue%2C

Al-Ajeil, Ruba, Mohammed, Abdul Khayum, Pal, Pratibha, Addicoat, Matthew A., Nair, Surabhi Suresh, Kumar, Dayanand, Syed, Abdul Momin, Rezk, Ayman, Singh, Nirpendra, Nayfeh, Ammar, El-Atab, Nazek, Shetty, Dinesh | 2024 | A carbonyl-decorated two-dimensional polymer as a charge-trapping layer for non-volatile memory storage devices with a high endurance and wide memory window | 10.1039/d4mh00201f | EP/X035859, EP/T022213
From: 2024-05-26 to 2024-05-27
https://api.crossref.org/works?filter=has-funder%3Atrue%2Cfrom-deposit-date%3A2024-05-26%2Cuntil-deposit-date%3A2024-05-27
From: 2024-05-28 to 2024-05-29
https://api.crossref.org/works?filter=has-funder%3Atrue%2Cfrom-deposit-date%3A2024-05-28%2Cuntil-deposit-date%3A2024-05-29
From: 2024-05-30 to 2024-05-31
https://api.crossref.org/works?filter=has-funder%3Atrue%2Cfrom-deposit-date%3A2024-05-30%2Cuntil-deposit-date%3A2024-05-31
Rogers, Matthew, Habib, Ahasan, Teobaldi, Gilberto, Moorsom, Timothy, Johansson, J. Olof, Hedley, Luke, Keatley, P

Hao, Xiaoge, Quirk, James A., Zhao, Feipeng, Alahakoon, Sandamini H, Ma, Jiabin, Fu, Jiamin, Kim, Jung Tae, Li, Weihan, Li, Minsi, Zhang, Shumin, Duan, Hui, Huang, Yining, Dawson, James A., Sun, Xueliang | 2024 | Regulating Ion Diffusion and Stability in Amorphous Thiosilicate‐Based Solid Electrolytes Through Edge‐Sharing Local Structures | 10.1002/aenm.202304556 | EP/L000202, EP/L000202/1, EP/R029431, EP/T022213
Newland, Stephanie H., Sinkler, Wharton, Mezza, Thomas, Bare, Simon R., Raja, Robert | 2016 | Influence of dopant substitution mechanism on catalytic properties within hierarchical architectures | 10.1098/rspa.2016.0095 | EP/K014714/1
Conway, Lewis J., Hermann, Andreas | 2019 | High Pressure Hydrocarbons Revisited: From van der Waals Compounds to Diamond | 10.3390/geosciences9050227 | EP/P020194
From: 2024-06-19 to 2024-06-20
https://api.crossref.org/works?filter=has-funder%3Atrue%2Cfrom-deposit-date%3A2024-06-19%2Cuntil-deposit-date%3A2024-06-20
Jyoti, Shaswati, Vijay, Aditi,

In [8]:
if len(foud_pubs) > 0:
    csv_rw.write_csv_data(foud_pubs, out_dir+'cr_check_'+str(end_date)+'a.csv') 
start_date = end_date + timedelta(days=1)

In [None]:
start_date = end_date + timedelta(days=1)
start_date

In [None]:
foud_pubs

In [None]:
works.filter(from_created_date=str(start_date)).filter(until_created_date=str(end_date))%3A

In [None]:
works = Works(etiquette=my_etiquette)
works.filter("award.number%3AEP/K014706/2").url


In [9]:
foud_pubs

{'10.1016/j.cattod.2024.114867': {'authors': 'Bowker, M.',
  'year': 2024,
  'title': 'The importance of Pd carbide formation for reactions with ethene and other organic molecules',
  'DOI': '10.1016/j.cattod.2024.114867',
  'awards': 'EP/R026939/1, EP/R026815/1',
  'mcc': 0,
  'ukch': 1},
 '10.1021/acs.jpclett.4c00865': {'authors': 'Campbell, Emma, Sazanovich, Igor V., Towrie, Michael, Watson, Michael J., Lezcano-Gonzalez, Ines, Beale, Andrew M.',
  'year': 2024,
  'title': 'Methanol-to-Olefins Studied by UV Raman Spectroscopy as Compared to Visible Wavelength: Capitalization on Resonance Enhancement',
  'DOI': '10.1021/acs.jpclett.4c00865',
  'awards': 'EP/R026815/1',
  'mcc': 0,
  'ukch': 1},
 '10.1039/d4ey00026a': {'authors': 'Belami, Debora, Lindley, Matthew, Jonnalagadda, Umesh S., Goncalves Bullock, Annie Mae, Fan, Qianwenhao, Liu, Wen, Haigh, Sarah J., Kwan, James, Regmi, Yagya N., King, Laurie A.',
  'year': 2024,
  'title': 'Active and highly durable supported catalysts for p