# Analyze Canada-Spain Academic Publications II

#### Updated: Sep 12, 2022

#  

Unpack and surface concepts data for each publication. Identify Spanish experts in key areas of interest to GAC-MDRID.

In [18]:
import pandas as pd
import numpy as np
import ast
import requests
import time
import os

In [2]:
dataDir = '/Users/rnaidoo/Documents/Canada-Secure/GAC/2022_MDRID/Projects_data/OpenAlex/'

#  

#### Functions:

In [3]:
def construct_query_string_cursor(endpoint, filters, cursor='*'):
    
    filter_param = f'filter={",".join(filters)}'
    filtered_works_url = f'https://api.openalex.org/{endpoint}?mailto=naidoo@ualberta.ca&{filter_param}&per-page=200&cursor={cursor}'
    print(filtered_works_url)
    
    return filtered_works_url

In [4]:
def isolate_esp_first_auth_pubs(df_resp):

    esp_first_indicies = []
    for i in range(0, len(df_resp)):
        authorships_dict = df_resp['authorships'][i]
        for j in range(0, len(authorships_dict)):
            if authorships_dict[j]['author_position'] == 'first': 
                first_auth_insts = authorships_dict[j]['institutions']
                if len(first_auth_insts) > 0:
                    for k in range(0, len(first_auth_insts)): 
                        if len(first_auth_insts[k]) > 0:
                            if first_auth_insts[k]['country_code'] == 'ES':
                                esp_first_indicies.append(i)
                        
    df_esp_first = df_resp.iloc[esp_first_indicies]
    
    return df_esp_first

#  

#### Find publications with concepts relevant to foreign policy and Canada-Spain relations - Canada-Spain first author + collaborator subset:

Combine Canadian and Spanish first-author publications:

In [5]:
df_can = pd.read_csv(dataDir + 'works_can_first_auth_esp_col_proc.csv')
df_esp = pd.read_csv(dataDir + 'works_esp_first_auth_can_col_proc.csv')
df_tot = pd.concat([df_can, df_esp]).sort_values('cited_by_count', ascending=False)
print(len(df_tot))
df_tot.head()

118


Unnamed: 0,id,doi,title,display_name,publication_year,publication_date,ids,host_venue,type,open_access,...,abstract_inverted_index,cited_by_api_url,counts_by_year,updated_date,created_date,first_author,first_auth_orcid,first_auth_inst,first_auth_country,collaborators_of_interest
0,https://openalex.org/W2277258071,https://doi.org/10.1016/s0140-6736(16)00163-x,Endovascular thrombectomy after large-vessel i...,Endovascular thrombectomy after large-vessel i...,2016,2016-04-23,{'openalex': 'https://openalex.org/W2277258071...,"{'id': 'https://openalex.org/V49861241', 'issn...",journal-article,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,"{'In': [0, 30, 88], '2015,': [1], 'five': [2, ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 466}, {'year...",2022-09-02T12:08:38.820533,2016-06-24,Mayank Goyal,https://orcid.org/0000-0001-9060-2109,University of Calgary,CA,{'Antoni Dávalos': {'col_auth_orcid': 'https:/...
1,https://openalex.org/W2112455323,https://doi.org/10.1002/mds.26424,MDS clinical diagnostic criteria for Parkinson...,MDS clinical diagnostic criteria for Parkinson...,2015,2015-10-01,{'openalex': 'https://openalex.org/W2112455323...,"{'id': 'https://openalex.org/V163027424', 'iss...",journal-article,"{'is_oa': True, 'oa_status': 'green', 'oa_url'...",...,"{'This': [0], 'document': [1], 'presents': [2]...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 500}, {'year...",2022-09-06T03:07:23.885671,2016-06-24,Ronald B. Postuma,https://orcid.org/0000-0002-6468-4734,Montreal General Hospital,CA,"{'Jose A. Obeso': {'col_auth_orcid': None, 'co..."
2,https://openalex.org/W2467227824,https://doi.org/10.1038/kisup.2012.73,Notice,Notice,2013,2013-01-01,{'openalex': 'https://openalex.org/W2467227824...,"{'id': 'https://openalex.org/V4210177655', 'is...",journal-article,"{'is_oa': True, 'oa_status': 'bronze', 'oa_url...",...,,https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 132}, {'year...",2022-09-02T09:43:19.149729,2016-07-22,Adeera Levin,,University of British Columbia,CA,"{'Alm deFrancisco': {'col_auth_orcid': None, '..."
0,https://openalex.org/W2280783985,https://doi.org/10.1016/s1474-4422(15)00401-9,A clinical approach to diagnosis of autoimmune...,A clinical approach to diagnosis of autoimmune...,2016,2016-04-01,{'openalex': 'https://openalex.org/W2280783985...,"{'id': 'https://openalex.org/V70053155', 'issn...",journal-article,"{'is_oa': True, 'oa_status': 'green', 'oa_url'...",...,"{'Encephalitis': [0], 'is': [1], 'a': [2, 14, ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 259}, {'year...",2022-09-01T04:18:12.009049,2016-06-24,Francesc Graus,https://orcid.org/0000-0002-8924-8322,Consorci Institut D'Investigacions Biomediques...,ES,{'Susanne M. Benseler': {'col_auth_orcid': 'ht...
1,https://openalex.org/W2116358746,https://doi.org/10.1093/gerona/gls119,Searching for an Operational Definition of Fra...,Searching for an Operational Definition of Fra...,2013,2013-01-01,{'openalex': 'https://openalex.org/W2116358746...,"{'id': 'https://openalex.org/V969433497', 'iss...",journal-article,"{'is_oa': True, 'oa_status': 'bronze', 'oa_url...",...,"{'There': [0, 88], 'is': [1, 128, 149], 'no': ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 45}, {'year'...",2022-09-07T09:57:34.467656,2016-06-24,Leocadio Rodríguez-Mañas,https://orcid.org/0000-0002-6551-1333,Hospital Universitario de Getafe,ES,"{'Howard Bergman': {'col_auth_orcid': None, 'c..."


Surface concepts related to each publication:

In [6]:
concepts_ = df_tot.iloc[0]['concepts']
concepts = ast.literal_eval(concepts_)

In [7]:
len(concepts)

14

In [8]:
concepts[0]

{'id': 'https://openalex.org/C71924100',
 'wikidata': 'https://www.wikidata.org/wiki/Q11190',
 'display_name': 'Medicine',
 'level': 0,
 'score': '0.88283634'}

In [9]:
for concept in concepts:
    print(concept['display_name'])

Medicine
Stroke (engine)
Modified Rankin Scale
Population
Meta-analysis
Randomized controlled trial
Odds ratio
Clinical trial
Occlusion
Random effects model
Internal medicine
Logistic regression
Physical therapy
Emergency medicine


In [10]:
df_concepts = pd.DataFrame()
ind = 0
for i in range(0, len(df_tot)):
    pub_id = df_tot.iloc[i]['id']
    
    #Get concepts
    concepts_ = df_tot.iloc[i]['concepts']
    concepts = ast.literal_eval(concepts_)
    for j in range(0, len(concepts)):
        ind+=1
        row = pd.DataFrame(data={
            'pub_id': pub_id,
            'concept': concepts[j]['display_name'],
            'concept_id': concepts[j]['id'],
            'concept_level': concepts[j]['level']
        }, index=[ind])
        df_concepts = pd.concat([df_concepts, row])
        

In [11]:
df_concepts['concept'] = df_concepts['concept'].str.lower()
df_concepts

Unnamed: 0,pub_id,concept,concept_id,concept_level
1,https://openalex.org/W2277258071,medicine,https://openalex.org/C71924100,0
2,https://openalex.org/W2277258071,stroke (engine),https://openalex.org/C2780645631,2
3,https://openalex.org/W2277258071,modified rankin scale,https://openalex.org/C2780931571,4
4,https://openalex.org/W2277258071,population,https://openalex.org/C2908647359,2
5,https://openalex.org/W2277258071,meta-analysis,https://openalex.org/C95190672,2
...,...,...,...,...
1114,https://openalex.org/W2888310913,multiple sclerosis,https://openalex.org/C2780640218,2
1115,https://openalex.org/W2888310913,medicine,https://openalex.org/C71924100,0
1116,https://openalex.org/W2888310913,adverse effect,https://openalex.org/C197934379,2
1117,https://openalex.org/W2888310913,immunology,https://openalex.org/C203014093,1


In [12]:
df_concepts.to_csv(dataDir + 'concepts_by_pub_can_esp_first_auth.csv', index=False)

Find publications with concepts relevant to foreign policy and Canada-Spain relations:

In [13]:
df_concepts = pd.read_csv(dataDir + 'concepts_by_pub_can_esp_first_auth.csv')
df_concepts

Unnamed: 0,pub_id,concept,concept_id,concept_level
0,https://openalex.org/W2277258071,medicine,https://openalex.org/C71924100,0
1,https://openalex.org/W2277258071,stroke (engine),https://openalex.org/C2780645631,2
2,https://openalex.org/W2277258071,modified rankin scale,https://openalex.org/C2780931571,4
3,https://openalex.org/W2277258071,population,https://openalex.org/C2908647359,2
4,https://openalex.org/W2277258071,meta-analysis,https://openalex.org/C95190672,2
...,...,...,...,...
1113,https://openalex.org/W2888310913,multiple sclerosis,https://openalex.org/C2780640218,2
1114,https://openalex.org/W2888310913,medicine,https://openalex.org/C71924100,0
1115,https://openalex.org/W2888310913,adverse effect,https://openalex.org/C197934379,2
1116,https://openalex.org/W2888310913,immunology,https://openalex.org/C203014093,1


In [14]:
df_concepts.loc[df_concepts['concept'].str.contains('foreign')]

Unnamed: 0,pub_id,concept,concept_id,concept_level


Using a variety of search terms, there do not seem to be any publications with concepts-of-interest to foreign policy and Canada-Spain relations.

Broaden search to publications with a spanish first author, and then to publications with Spanish collaborator.

#  

#### Publications with Spanish first author

Query yearly batches, for the last 5 years. Isolate those publications with Spanish first authors.

In [16]:
endpoint = 'works'
filters = ['institutions.country_code:ES', 'publication_year:2022'] #'publication_year:>2017'
save_filename = 'works_esp_first_auth_2022.csv'
batch_size = 10

start = time.time()

#Start cursor pagination
filtered_works_url = construct_query_string_cursor(endpoint=endpoint, filters=filters)
response = requests.get(filtered_works_url)
response_count = response.json()['meta']['count']
print('Total number of results: ' + str(response_count))
pages = round(np.floor(response_count/200))
remainder = np.remainder(response_count, 200)
print('Processing ' + str(pages) + ' additional pages...')
df_response = pd.DataFrame(response.json()['results'])
df_esp = isolate_esp_first_auth_pubs(df_response)
df_esp.to_csv(dataDir + save_filename, index=False)
next_cursor = response.json()['meta']['next_cursor']

#Process batches of pages
batches = round(np.floor(pages/batch_size))
batch_remainder = np.remainder(pages, batch_size)

#Loop through pages in batch
for i in range(0, batches):
    df_batch = pd.DataFrame()
    for j in range(0, batch_size):
        filtered_works_url = construct_query_string_cursor(endpoint=endpoint, filters=filters, cursor=next_cursor)
        response = requests.get(filtered_works_url)
        df_response = pd.DataFrame(response.json()['results'])
        df_resp_proc = isolate_esp_first_auth_pubs(df_response)
        df_batch = pd.concat([df_batch, df_resp_proc])
        next_cursor = response.json()['meta']['next_cursor']
    df_esp_load = pd.read_csv(dataDir + save_filename)
    df_esp = pd.concat([df_esp_load, df_batch])
    df_esp.to_csv(dataDir + save_filename, index=False)
        
#Loop through pages in final batch
df_batch = pd.DataFrame()
for i in range(0, batch_remainder):
    filtered_works_url = construct_query_string_cursor(endpoint=endpoint, filters=filters, cursor=next_cursor)
    response = requests.get(filtered_works_url)
    df_response = pd.DataFrame(response.json()['results'])
    df_resp_proc = isolate_esp_first_auth_pubs(df_response)
    df_batch = pd.concat([df_batch, df_resp_proc])
    next_cursor = response.json()['meta']['next_cursor']
df_esp_load = pd.read_csv(dataDir + save_filename)
df_esp = pd.concat([df_esp_load, df_batch])
df_esp.to_csv(dataDir + save_filename, index=False)
    
end = time.time()   
t = end - start
print('Runtime: ' + str(round(t/60, 1)) + ' min')

https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=*
Total number of results: 72807
Processing 364 additional pages...
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlsxMiwgJ2h0dHBzOi8vb3BlbmFsZXgub3JnL1c0MjA1MDkxODkwJ10i
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=Ils4LCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzMxMTcwNDYyNTEnXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=Ils3LCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyMTMzMzUxOTQnXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=Ils2LCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyMTQ0NzkzNzknXSI=
https://api.openalex.org/work

https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlsxLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyMTA0MjA1NTUnXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlsxLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyMTA1OTM2NDYnXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlsxLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyMTA3ODk0OTInXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlsxLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyMTEwNDQyNzUnXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlsxLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyMTI4NjYyNTknXSI=
https://api.openalex.org/works?mailto=na

https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyMDAyNzAyOTQnXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyMDA1Mzc4ODAnXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyMDUxNzM3OTInXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyMDUyOTcwMzcnXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyMDU0MDc3ODcnXSI=
https://api.openalex.org/works?mailto=na

https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyMTMxODgwMzcnXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyMTMyNTU0NTcnXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyMTMzMjI3MzAnXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyMTMzNzk1NTMnXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyMTM0NDY0MTknXSI=
https://api.openalex.org/works?mailto=na

https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyMjExMjczMjUnXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyMjExNDgyNzQnXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyMjM0NDc3NjknXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyMjM1MDc2MDUnXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyMjM1NzIyNDYnXSI=
https://api.openalex.org/works?mailto=na

https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyMjkwMDM2NjYnXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyMjkwMTU3OTQnXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyMjkwMjg4NDInXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyMjkwNDQyNTQnXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyMjkwNjE4ODknXSI=
https://api.openalex.org/works?mailto=na

https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyODI1ODQyNDMnXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyODI4MzUwOTYnXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyODI5MjE0MzUnXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyODI5MzU0NTInXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyODI5NDY5OTYnXSI=
https://api.openalex.org/works?mailto=na

https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyODU3MzU3MTUnXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyODU4MDE5OTYnXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyODYwMDE1MzEnXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyODYyMTgzMjAnXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyODYyODY5MjEnXSI=
https://api.openalex.org/works?mailto=na

https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyOTMxNzMzNjYnXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyOTMyMjU1MDEnXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyOTMzMzA1MzcnXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyOTM0Njk4MDYnXSI=
https://api.openalex.org/works?mailto=naidoo@ualberta.ca&filter=institutions.country_code:ES,publication_year:2022&per-page=200&cursor=IlswLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQyOTM1NTc3NjMnXSI=
https://api.openalex.org/works?mailto=na

#  

Further process data, as yearly batches:

In [26]:
for file in os.listdir(dataDir + 'works_esp_first_auth/'):
    if 'works_esp_first_auth_' in file:
        print('Processing... ' + file)
        df1 = pd.read_csv(dataDir + 'works_esp_first_auth/' + file)
        df1['first_author'] = pd.Series()
        df1['first_auth_orcid'] = pd.Series()
        df1['first_auth_inst'] = pd.Series()
        df1['first_auth_country'] = pd.Series()
        df1['collaborators_of_interest'] = pd.Series()
        
        for i in range(0, len(df1)):
            auth_list_ = df1.iloc[i]['authorships']
            auth_list = ast.literal_eval(auth_list_)

            #Get first author details
            if auth_list[0]['author_position'] == 'first':
                if len(auth_list[0]['institutions'][0]) > 0:
                    if auth_list[0]['institutions'][0]['country_code'] == 'ES':
                        first_auth = auth_list[0]['author']['display_name']
                        first_auth_orcid = auth_list[0]['author']['orcid']
                        first_auth_inst = auth_list[0]['institutions'][0]['display_name']
                        first_auth_country = auth_list[0]['institutions'][0]['country_code']

            #Get collaborator authors' details
            collab_list = {}
            df_collabs = pd.DataFrame()
            for auth in auth_list:
                for inst in auth['institutions']:
                    if len(inst) > 0:
                        if inst['country_code'] == 'CA':
                            col_auth = auth['author']['display_name']
                            col_auth_orcid = auth['author']['orcid']
                            col_auth_inst = inst['display_name']
                            col_auth_country = inst['country_code']
                            col_auth_dict = {
                                'col_auth_orcid': col_auth_orcid,
                                'col_auth_inst': col_auth_inst,
                                'col_auth_country': col_auth_country
                            }
                            collab_list[col_auth] = col_auth_dict

            #Add columns
            df1.iloc[i,26] = first_auth
            df1.iloc[i,27] = first_auth_orcid
            df1.iloc[i,28] = first_auth_inst
            df1.iloc[i,29] = first_auth_country
            df1.iloc[i,30] = str(collab_list)
    
        df1.to_csv(dataDir + 'works_esp_first_auth/' + file[:-4] + '_proc.csv', index=False)

Processing... works_esp_first_auth_2017.csv


  df1['first_author'] = pd.Series()
  df1['first_auth_orcid'] = pd.Series()
  df1['first_auth_inst'] = pd.Series()
  df1['first_auth_country'] = pd.Series()
  df1['collaborators_of_interest'] = pd.Series()


Processing... works_esp_first_auth_2018.csv


  df1['first_author'] = pd.Series()
  df1['first_auth_orcid'] = pd.Series()
  df1['first_auth_inst'] = pd.Series()
  df1['first_auth_country'] = pd.Series()
  df1['collaborators_of_interest'] = pd.Series()


Processing... works_esp_first_auth_2019.csv


  df1['first_author'] = pd.Series()
  df1['first_auth_orcid'] = pd.Series()
  df1['first_auth_inst'] = pd.Series()
  df1['first_auth_country'] = pd.Series()
  df1['collaborators_of_interest'] = pd.Series()


Processing... works_esp_first_auth_2021.csv


  df1['first_author'] = pd.Series()
  df1['first_auth_orcid'] = pd.Series()
  df1['first_auth_inst'] = pd.Series()
  df1['first_auth_country'] = pd.Series()
  df1['collaborators_of_interest'] = pd.Series()


Processing... works_esp_first_auth_2020.csv


  df1['first_author'] = pd.Series()
  df1['first_auth_orcid'] = pd.Series()
  df1['first_auth_inst'] = pd.Series()
  df1['first_auth_country'] = pd.Series()
  df1['collaborators_of_interest'] = pd.Series()


Processing... works_esp_first_auth_2022.csv


  df1['first_author'] = pd.Series()
  df1['first_auth_orcid'] = pd.Series()
  df1['first_auth_inst'] = pd.Series()
  df1['first_auth_country'] = pd.Series()
  df1['collaborators_of_interest'] = pd.Series()


#  