In [None]:
# Connecting to the db
import lib.handle_db as dbh

# read and write csv files
import lib.handle_csv as csv_rw

# date functions
from datetime import datetime, date, timedelta

# use habanero to query crossref api
from habanero import Crossref
cr = Crossref()

# search for UKCH Awards in CR record
def award_in_crossref(aw):
    ukch_wks =[]
    not_revised = []
    for wk in aw:      
        awd_list = []
        if 'funder' in wk.keys():
            for fdr in wk['funder']:
                if 'award' in fdr.keys():
                   awds = 0
                   for awd in fdr['award']:
                        if awd in ['EP/R026939/1', 'EP/R026815/1', 'EP/R026645/1', 'EP/R027129/1', 'EP/M013219/1',
                                  'EP/K014706/2', 'EP/K014668/1', 'EP/K014854/1', 'EP/K014714/1']:
                            awd_list.append(awd)
        else:
            not_revised.append(wk)
        if len(awd_list) > 0:
            ukch_wks.append(wk)
    return ukch_wks, not_revised

# search for UKCH Affiliation in CR record
def affi_in_crossref(aw):
    ukch_wks = []
    for wk in aw:
        ukch_affiliation = False
        if 'author' in wk.keys():
            for autr in wk['author']:
                if 'affiliation' in autr.keys():
                    for affi in autr['affiliation']:
                        if "UK Catalysis Hub" in affi['name']:
                            ukch_affiliation = True
                            break
                    if ukch_affiliation:
                        ukch_wks.append(wk)
                        break
    return  ukch_wks

In [None]:
start_date = date(2021, 1, 1)
end_date = date(2021, 1, 1)

pubs_with_award = []
skiped_works =[]
while end_date < datetime.now().date():
    end_date = start_date + timedelta(days=9)
    print ("From:", str(start_date), "to",  str(end_date))
    cr_filter =  {'has_funder': True, 'from-pub-date':str(start_date), 'until-pub-date':str(end_date)}
    cr_filter =  {'has_funder': True, 'from-deposit-date':str(start_date), 'until-deposit-date':str(end_date)}
    result = cr.works(filter = cr_filter, cursor = "*", limit = 500)
    print(cr_filter)
    #print (result)
    for res_500 in result:
        print("Results:",len(res_500['message']['items']))
        pubs_found, wks_ignored = award_in_crossref(res_500['message']['items'])
        pubs_with_award += pubs_found
        skiped_works += wks_ignored
    start_date = end_date + timedelta(days=1)

In [None]:
foud_pubs = {}
for wk in pubs_with_award:
    art_authors = ""
    if 'author' in wk.keys() :
        for autr in wk['author']:
            if art_authors == "":
                art_authors = autr['family']+", " + (", "+ autr ['given'] if 'given' in autr.keys() else "" )
            else:
                art_authors += ", " + autr['family']+ (", "+ autr ['given'] if 'given' in autr.keys() else "" )
    fund_award = ""
    for fdr in wk['funder']:
        if 'award' in fdr.keys():
          for awd in fdr['award']:
               if awd in ['EP/R026939/1', 'EP/R026815/1', 'EP/R026645/1', 'EP/R027129/1', 'EP/M013219/1',
                          'EP/K014706/2', 'EP/K014668/1', 'EP/K014854/1', 'EP/K014714/1']:
                    if fund_award  == "":
                        fund_award = awd
                    else:
                        fund_award += ", " +awd
                    
    ol_year = 0
    pr_year = 0
    pub_year = 0
    if 'published-online' in wk.keys() and 'date-parts' in wk['published-online'].keys():
        ol_year = int(wk['published-online']['date-parts'][0][0])
    if 'published-print' in wk.keys() and 'date-parts' in wk['published-print'].keys():
        pr_year = int(wk['published-print']['date-parts'][0][0])
    if pr_year > 0 and ol_year > 0:
        if pr_year > ol_year:
            pub_year = ol_year
        else:
            pub_year = pr_year
    elif ol_year > 0:
        pub_year = ol_year
    elif pr_year > 0:
        pub_year = pr_year
        
    print(art_authors,"|",pub_year,"|",wk['title'][0],
          "|", wk['DOI'],"|", fund_award)     
    this_pub = {}
    this_pub['authors'] = art_authors
    this_pub['year'] = pub_year
    this_pub['title'] = wk['title'][0]
    this_pub['DOI'] = wk['DOI']
    if not wk['DOI'] in foud_pubs:
         foud_pubs[wk['DOI']]= this_pub

if len(foud_pubs) > 0:
    csv_rw.write_csv_data(foud_pubs, 'cr_check_202111x.csv') 
if len(skiped_works) > 0:
    print(skiped_works)

In [None]:
if len(skiped_works) > 0:
    print(skiped_works)
    print(len(skiped_works))

In [None]:
ukch_wks =[]
aw = result[6]['message']['items']
for wk in aw:      
    awd_list = []
    if 'funder' in wk.keys():
        for fdr in wk['funder']:
            if 'award' in wk.keys():
               awds = 0
               for awd in fdr['award']:
                    if awd in ['EP/R026939/1', 'EP/R026815/1', 'EP/R026645/1', 'EP/R027129/1', 'EP/M013219/1',
                              'EP/K014706/2', 'EP/K014668/1', 'EP/K014854/1', 'EP/K014714/1']:
                        awd_list.append(awd)
        if len(awd_list) > 0:
            ukch_wks.append(wk)
    else: 
        print(wk)
    
ukch_wks

In [None]:
{'indexed': {'date-parts': [[2021, 11, 2]], 
             'date-time': '2021-11-02T17:07:03Z', 'timestamp': 1635872823737}, 
 'publisher': 'James S. McDonnell Foundation', 
 'award-start': {'date-parts': [[2021, 2, 1]]}, 
 'award': '2020-1456', 'DOI': '10.37717/2020-1456', 'type': 'grant', 
 'created': {'date-parts': [[2021, 1, 8]], 'date-time': '2021-01-08T15:52:55Z', 'timestamp': 1610121175000},
 'source': 'Crossref',
 'prefix': '10.37717', 
 'member': '24568', 
 'project': [{'project-title': [{'title': "In support of the postdoctoral fellow's salary, fringe benefits and research plan.", 'language': 'en'}], 'project-description': [{'description': "In support of the postdoctoral fellow's salary, fringe benefits and research plan.", 'language': 'en'}], 
              'lead-investigator': [{'given': 'Stefany', 'family': 'Moreno-Gámez', 'affiliation': [{'name': 'Massachusetts Institute of Technology'}]}], 'award-amount': {'amount': 200000.0, 'currency': 'USD'}, 'award-start': {'date-parts': [[2021, 2, 1]]}, 'award-end': {'date-parts': [[2023, 1, 31]]}, 'funding': [{'type': 'fellowship', 'scheme': 'Postdoctoral Fellowship Award', 'award-amount': {'amount': 200000.0, 'currency': 'USD', 'percentage': 100}, 'funder': {'name': 'James S. McDonnell Foundation', 'id': [{'id': '10.13039/100000913', 'id-type': 'DOI', 'asserted-by': 'publisher'}]}}]}], 'deposited': {'date-parts': [[2021, 1, 8]], 
              'date-time': '2021-01-08T15:52:56Z', 'timestamp': 1610121176000}, 
 'score': 0.0, 
 'issued': {'date-parts': [[2021, 2, 1]]},
 'URL': 'http://dx.doi.org/10.37717/2020-1456'}