# Analyze Spain First Author Publications

#### Updated: Sep 22, 2022

#  

Analyze subset of publications with Spanish first authors. Use this subset to identify top Spanish researchers related to concepts of interest to GAC-MDRID.

In [45]:
import pandas as pd
import numpy as np
import os
import ast
import time

In [46]:
dataDir = '/Users/rnaidoo/Documents/Canada-Secure/GAC/2022_MDRID/Projects_data/OpenAlex/works_esp_first_auth/'

#  

#### Functions:

In [3]:
def search_concepts(search_query, df_concepts):
    
    search_terms = search_query.split(' AND ')
    for i in range(0, len(search_terms)):
        search_terms[i] = search_terms[i].lower()
    print(search_terms)
    for i in range(0, len(search_terms)):
        if i == 0:
            df_search = df_concepts.loc[df_concepts['concept'].str.contains(search_terms[i])]
        else:
            df_search_ = df_concepts.loc[df_concepts['concept'].str.contains(search_terms[i])]
            df_search_ = df_search_.rename(columns={
                'concept': 'concept'+str(i+1),
                'concept_id': 'concept_id'+str(i+1),
                'concept_level': 'concept_level'+str(i+1)
            })
            df_search = df_search.merge(df_search_, how='inner', on='pub_id')
    print(str(len(df_search)) + ' papers related to search...')
    
    return df_search

In [62]:
def search_title(search_query, df_pubs, keep_case=False):
    
    search_terms = search_query.split(' AND ')
    if not keep_case:
        for i in range(0, len(search_terms)):
            search_terms[i] = search_terms[i].lower()
        df_pubs['title'] = df_pubs['title'].str.lower()
    print(search_terms)
    for i in range(0, len(search_terms)):
        if i == 0:
            df_search = df_pubs.loc[df_pubs['title'].str.contains(search_terms[i])]
        else:
            df_search = df_search.loc[df_search['title'].str.contains(search_terms[i])]
            #df_search = pd.concat([df_search, df_search_])
    print(str(len(df_search)) + ' papers related to search...')
    
    return df_search

In [4]:
def retrieve_papers_byConcept(search_query, df_search, df_pubs):
    
    df_merge = df_pubs.merge(df_search, left_on='id', right_on='pub_id')
    print(str(len(df_merge)) + ' papers retrieved...')
    df_result = df_merge[['first_author', 'first_auth_inst', 'concept', 'concept_level', 'title', 'publication_date', 'cited_by_count', 'collaborators_of_interest', 'first_auth_orcid', 'id']]
    df_result = df_result.sort_values(['cited_by_count', 'publication_date', 'concept_level'], ascending=[False, False, False]).reset_index(drop=True)
    df_result = df_result.drop_duplicates(subset=['id'], keep='first')
    print(str(len(df_result)) + ' unique papers retrieved.')
    df_result = df_result.drop(columns=['concept_level'])
    df_result = df_result.rename(columns={
        'first_author': 'Spanish Researcher',
        'first_auth_inst': 'Institution',
        'concept': 'Concept',
        'title': 'Publication Title',
        'publication_date': 'Publication Date',
        'cited_by_count': 'Citation Count',
        'collaborators_of_interest': 'Canadian Collaborators',
        'first_auth_orcid': 'Academic Profile (ORCID)',
        'id': 'Publication Profile'
    })
    df_result = df_result.replace(np.nan, '')
    df_result['Canadian Collaborators'] = df_result['Canadian Collaborators'].replace('{}', 'N')

    #Process Canadian Collaborators
    for i in range(0, len(df_result)):
        collab_dict_ = df_result['Canadian Collaborators'].iloc[i]
        if collab_dict_ != 'N':
            collab_dict = ast.literal_eval(collab_dict_)
            if len(collab_dict) > 0:
                collab_str = ''
                for auth in collab_dict:
                    if collab_str != '':
                        collab_str += ', '
                    collab_str += auth + ' (' + collab_dict[auth]['col_auth_inst']
                    if collab_dict[auth]['col_auth_orcid'] == None:
                        collab_str += ')'
                    else: 
                        collab_str += ', ' + collab_dict[auth]['col_auth_orcid'] + ')'
                df_result.iloc[i,6] = collab_str
                
    return df_result        

In [47]:
def retrieve_papers_byTitle(search_query, df_search):
    
    print(str(len(df_search)) + ' papers retrieved...')
    df_result = df_search[['first_author', 'first_auth_inst', 'title', 'publication_date', 'cited_by_count', 'collaborators_of_interest', 'first_auth_orcid', 'id']]
    df_result = df_result.sort_values(['cited_by_count', 'publication_date'], ascending=[False, False]).reset_index(drop=True)
    df_result = df_result.drop_duplicates(subset=['id'], keep='first')
    print(str(len(df_result)) + ' unique papers retrieved.')
    df_result = df_result.rename(columns={
        'first_author': 'Spanish Researcher',
        'first_auth_inst': 'Institution',
        'title': 'Publication Title',
        'publication_date': 'Publication Date',
        'cited_by_count': 'Citation Count',
        'collaborators_of_interest': 'Canadian Collaborators',
        'first_auth_orcid': 'Academic Profile (ORCID)',
        'id': 'Publication Profile'
    })
    df_result = df_result.replace(np.nan, '')
    df_result['Canadian Collaborators'] = df_result['Canadian Collaborators'].replace('{}', 'N')
    
    #Process Canadian Collaborators
    for i in range(0, len(df_result)):
        collab_dict_ = df_result['Canadian Collaborators'].iloc[i]
        if collab_dict_ != 'N':
            collab_dict = ast.literal_eval(collab_dict_)
            if len(collab_dict) > 0:
                collab_str = ''
                for auth in collab_dict:
                    if collab_str != '':
                        collab_str += ', '
                    collab_str += auth + ' (' + collab_dict[auth]['col_auth_inst']
                    if collab_dict[auth]['col_auth_orcid'] == None:
                        collab_str += ')'
                    else: 
                        collab_str += ', ' + collab_dict[auth]['col_auth_orcid'] + ')'
                df_result.iloc[i,5] = collab_str
                
    return df_result        

In [39]:
def write_result(search_query, df_result, dataDir_save, search_by_title=False):
    
    sheet_name_ = search_query[0:30]
    if search_by_title:
        sbt_str = 'search_title '
    else:
        sbt_str = ''
    writer = pd.ExcelWriter(dataDir_save + 'works_by_search/' + 'works_by_' + sbt_str + search_query + '.xlsx')
    df_result.to_excel(writer, sheet_name=sheet_name_, index=False)
    # Auto-adjust columns' width
    for column in df_result:
        if column == 'Publication Title':
            column_width = 100
        elif column == 'Canadian Collaborators':
            if len(df_result['Canadian Collaborators'].unique()) > 1:
                column_width = 75
            else:
                column_width = max(df_result[column].astype(str).map(len).max(), len(column))
        else:
            column_width = max(df_result[column].astype(str).map(len).max(), len(column))
        col_idx = df_result.columns.get_loc(column)
        writer.sheets[sheet_name_].set_column(col_idx, col_idx, column_width)

    writer.save()

#  

#### Combine yearly batches, remove duplicate publications, split by year again:

In [None]:
df_total = pd.DataFrame()
for file in os.listdir(dataDir):
    if 'works_esp_first_auth_20' in file:
        if '_proc' in file:
            #print(file)
            df_load = pd.read_csv(dataDir + file)
            print('Opening ' + file + '...rows: ' + str(len(df_load)))
            df_total = pd.concat([df_total, df_load])

In [None]:
df_total

In [None]:
df_total = df_total.sort_values('publication_date')
df_total.tail()

In [None]:
df1 = df_total.drop_duplicates(subset=['id'])
df1

In [None]:
df1.to_csv(dataDir + 'works_esp_first_auth_since2017.csv', index=False)

In [None]:
df1 = pd.read_csv(dataDir + 'works_esp_first_auth_since2017.csv')
df1

In [None]:
pub_years = df1['publication_year'].unique()
pub_years

In [None]:
for year in pub_years:
    df1_y = df1.loc[df1['publication_year'] == year]
    df1_y.to_csv(dataDir + 'works_esp_first_auth_' + str(year) + '_proc2.csv', index=False)

#  

#### Process data to surface concepts related to each publication, by year:

Manually process each year:

In [3]:
year = 2022
df_pubs = pd.read_csv(dataDir + 'works_esp_first_auth_' + str(year) + '_proc2.csv')
print(len(df_pubs))
df_pubs.head()

51883


Unnamed: 0,id,doi,title,display_name,publication_year,publication_date,ids,host_venue,type,open_access,...,abstract_inverted_index,cited_by_api_url,counts_by_year,updated_date,created_date,first_author,first_auth_orcid,first_auth_inst,first_auth_country,collaborators_of_interest
0,https://openalex.org/W4226281971,https://doi.org/10.1093/ageing/afac101,Reduced humoral response 3 months following BN...,Reduced humoral response 3 months following BN...,2022,2022-01-01,{'openalex': 'https://openalex.org/W4226281971...,"{'id': 'https://openalex.org/V31768639', 'issn...",journal-article,"{'is_oa': True, 'oa_status': 'hybrid', 'oa_url...",...,"{'SARS-CoV-2': [0], 'vaccination': [1, 63, 138...",https://api.openalex.org/works?filter=cites:W4...,"[{'year': 2022, 'cited_by_count': 1}]",2022-08-31T18:40:51.450873,2022-05-05,Macedonia Trigueros,,Institut d'Investigació en Ciències de la Salu...,ES,{}
1,https://openalex.org/W4226221802,https://doi.org/10.3390/su14031232,Residents’ Perception of the Impact of Sports ...,Residents’ Perception of the Impact of Sports ...,2022,2022-01-01,{'openalex': 'https://openalex.org/W4226221802...,"{'id': 'https://openalex.org/V10134376', 'issn...",journal-article,"{'is_oa': True, 'oa_status': 'gold', 'oa_url':...",...,"{'The': [0, 101, 191], 'analysis': [1], 'of': ...",https://api.openalex.org/works?filter=cites:W4...,"[{'year': 2022, 'cited_by_count': 1}]",2022-09-02T17:53:17.853093,2022-05-05,Rómulo Jacobo González-García,,Valencia Catholic University Saint Vincent Martyr,ES,{}
2,https://openalex.org/W4226080913,https://doi.org/10.1016/j.redare.2020.10.012,Difficult airway due to angioedema caused afte...,Difficult airway due to angioedema caused afte...,2022,2022-01-01,{'openalex': 'https://openalex.org/W4226080913...,"{'id': 'https://openalex.org/V4210180527', 'is...",journal-article,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,,https://api.openalex.org/works?filter=cites:W4...,"[{'year': 2022, 'cited_by_count': 1}]",2022-08-19T00:56:23.484775,2022-05-05,M Laurens Acevedo,,Vall d'Hebron Hospital Universitari,ES,{}
3,https://openalex.org/W4225485825,https://doi.org/10.5603/cj.a2021.0125,Atrial low voltage areas: A comparison between...,Atrial low voltage areas: A comparison between...,2022,2022-01-01,{'openalex': 'https://openalex.org/W4225485825...,"{'id': 'https://openalex.org/V9665008', 'issn_...",journal-article,"{'is_oa': True, 'oa_status': 'gold', 'oa_url':...",...,"{'Background:': [0], 'Atrial': [1], 'fibrosis'...",https://api.openalex.org/works?filter=cites:W4...,"[{'year': 2022, 'cited_by_count': 1}]",2022-07-29T00:48:42.569626,2022-05-05,Ana Andrés Lahuerta,https://orcid.org/0000-0002-5432-4364,Valencia Catholic University Saint Vincent Martyr,ES,{}
4,https://openalex.org/W4285817857,https://doi.org/10.1093/gigascience/giac065,Toward global integration of biodiversity big ...,Toward global integration of biodiversity big ...,2022,2022-01-01,{'openalex': 'https://openalex.org/W4285817857...,"{'id': 'https://openalex.org/V2735135405', 'is...",journal-article,"{'is_oa': True, 'oa_status': 'gold', 'oa_url':...",...,"{'Abstract': [0], 'Metazoan': [1], 'metabarcod...",https://api.openalex.org/works?filter=cites:W4...,[],2022-09-03T20:34:58.392970,2022-07-19,Paula Arribas,https://orcid.org/0000-0002-0358-8271,Instituto de Productos Naturales y Agrobiología,ES,"{'Jeremy R deWaard': {'col_auth_orcid': None, ..."


In [4]:
batch_size = 10000

start = time.time()
ind = 0
for i in range(0, len(df_pubs)):
    if i == 0:
        start_batch = time.time()
        df_concepts_batch = pd.DataFrame()
    
    pub_id = df_pubs.iloc[i]['id']
    #Get concepts
    concepts_ = df_pubs.iloc[i]['concepts']
    concepts = ast.literal_eval(concepts_)
    for j in range(0, len(concepts)):
        ind+=1
        row = pd.DataFrame(data={
            'pub_id': pub_id,
            'concept': concepts[j]['display_name'],
            'concept_id': concepts[j]['id'],
            'concept_level': concepts[j]['level']
        }, index=[ind])
        df_concepts_batch = pd.concat([df_concepts_batch, row]) 
    
    if (np.remainder(i, batch_size) == 0) & (i>0):
        if i == batch_size:
            df_concepts_batch.to_csv(dataDir + 'concepts_by_pub_esp_first_auth_' + str(year) + '.csv', index=False)
        else:
            df_concepts = pd.read_csv(dataDir + 'concepts_by_pub_esp_first_auth_' + str(year) + '.csv')
            df_concepts = pd.concat([df_concepts, df_concepts_batch])
            df_concepts.to_csv(dataDir + 'concepts_by_pub_esp_first_auth_' + str(year) + '.csv', index=False)
            print(str(len(df_concepts)) + ' rows saved.')
        end_batch = time.time()
        t_batch = end_batch - start_batch
        rate_batch = batch_size/t_batch
        t_remaining = round(((len(df_pubs) - i)/rate_batch)/60,0)
        print('Processing row ' + str(i) + '...at rate ' + str(round(rate_batch,0)) + ' rows/s...' + str(t_remaining) + ' min estimated remaining.')
        start_batch = time.time()
        df_concepts_batch = pd.DataFrame()
        
end = time.time()   
t = end - start
print('Runtime: ' + str(round(t/60, 1)) + ' min')

Processing row 10000...at rate 122.0 rows/s...6.0 min estimated remaining.
168388 rows saved.
Processing row 20000...at rate 119.0 rows/s...4.0 min estimated remaining.
253246 rows saved.
Processing row 30000...at rate 119.0 rows/s...3.0 min estimated remaining.
336955 rows saved.
Processing row 40000...at rate 120.0 rows/s...2.0 min estimated remaining.
415842 rows saved.
Processing row 50000...at rate 134.0 rows/s...0.0 min estimated remaining.
Runtime: 6.9 min


In [5]:
df_concepts = pd.read_csv(dataDir + 'concepts_by_pub_esp_first_auth_' + str(year) + '.csv')
df_concepts['concept'] = df_concepts['concept'].str.lower()
df_concepts

Unnamed: 0,pub_id,concept,concept_id,concept_level
0,https://openalex.org/W4226281971,medicine,https://openalex.org/C71924100,0
1,https://openalex.org/W4226281971,vaccination,https://openalex.org/C22070199,2
2,https://openalex.org/W4226281971,neutralization,https://openalex.org/C14086860,3
3,https://openalex.org/W4226281971,antibody,https://openalex.org/C159654299,2
4,https://openalex.org/W4226281971,immune system,https://openalex.org/C8891405,2
...,...,...,...,...
415837,https://openalex.org/W4294489704,sarcoidosis,https://openalex.org/C2781301800,2
415838,https://openalex.org/W4294489704,dactylitis,https://openalex.org/C2778019847,5
415839,https://openalex.org/W4294489704,medicine,https://openalex.org/C71924100,0
415840,https://openalex.org/W4294489704,dermatology,https://openalex.org/C16005928,1


In [6]:
df_concepts.to_csv(dataDir + 'concepts_by_pub_esp_first_auth_' + str(year) + '.csv', index=False)

Combine yearly batches

In [10]:
df_total = pd.DataFrame()
for file in os.listdir(dataDir):
    if 'concepts_by_pub_esp_first_auth_' in file:
        #print(file)
        df_load = pd.read_csv(dataDir + file)
        print('Opening ' + file + '...rows: ' + str(len(df_load)))
        df_total = pd.concat([df_total, df_load])

concepts_by_pub_esp_first_auth_2017.csv
Opening concepts_by_pub_esp_first_auth_2017.csv...rows: 598153
concepts_by_pub_esp_first_auth_2019.csv
Opening concepts_by_pub_esp_first_auth_2019.csv...rows: 512535
concepts_by_pub_esp_first_auth_2018.csv
Opening concepts_by_pub_esp_first_auth_2018.csv...rows: 528216
concepts_by_pub_esp_first_auth_2022.csv
Opening concepts_by_pub_esp_first_auth_2022.csv...rows: 415842
concepts_by_pub_esp_first_auth_2020.csv
Opening concepts_by_pub_esp_first_auth_2020.csv...rows: 542484
concepts_by_pub_esp_first_auth_2021.csv
Opening concepts_by_pub_esp_first_auth_2021.csv...rows: 685239


In [11]:
df_total

Unnamed: 0,pub_id,concept,concept_id,concept_level
0,https://openalex.org/W2963641747,political science,https://openalex.org/C17744445,0
1,https://openalex.org/W2963641747,humanities,https://openalex.org/C15708023,1
2,https://openalex.org/W2472444605,software,https://openalex.org/C2777904410,2
3,https://openalex.org/W2472444605,unit (ring theory),https://openalex.org/C122637931,2
4,https://openalex.org/W2472444605,engineering,https://openalex.org/C127413603,0
...,...,...,...,...
685234,https://openalex.org/W4200444401,surface finish,https://openalex.org/C71039073,2
685235,https://openalex.org/W4200444401,dioptre,https://openalex.org/C259533,3
685236,https://openalex.org/W4200444401,surface roughness,https://openalex.org/C107365816,2
685237,https://openalex.org/W4200444401,root mean square,https://openalex.org/C71907059,2


In [13]:
df_total = df_total.sort_values('concept_level').reset_index(drop=True)
df_total.tail()

Unnamed: 0,pub_id,concept,concept_id,concept_level
3282464,https://openalex.org/W3048970145,metapneumovirus,https://openalex.org/C2911218186,5
3282465,https://openalex.org/W3112118800,cell-free fetal dna,https://openalex.org/C152110520,5
3282466,https://openalex.org/W3083927243,transimpedance amplifier,https://openalex.org/C92631468,5
3282467,https://openalex.org/W2990774577,autotransformer,https://openalex.org/C22958824,5
3282468,https://openalex.org/W3088816417,magnetocrystalline anisotropy,https://openalex.org/C56803174,5


In [14]:
df_total.to_csv(dataDir + 'concepts_by_pub_esp_first_auth_since2017.csv', index=False)

#  

#### Identify top Spanish researchers related to concepts of interest to GAC-MDRID:

Ref: evernote:///view/10251849/s92/c2b57ab9-cbf3-423c-86e7-5e2094ce32a3/8137299e-1b5c-c964-fcc6-1a32ffc0d0f9

In [6]:
df_pubs = pd.read_csv(dataDir + 'works_esp_first_auth_since2017.csv')
print(len(df_pubs))
df_pubs.head()

458602


Unnamed: 0,id,doi,title,display_name,publication_year,publication_date,ids,host_venue,type,open_access,...,abstract_inverted_index,cited_by_api_url,counts_by_year,updated_date,created_date,first_author,first_auth_orcid,first_auth_inst,first_auth_country,collaborators_of_interest
0,https://openalex.org/W2963641747,,"Joan Marcet e Lucía Medina (eds.), La política...","Joan Marcet e Lucía Medina (eds.), La política...",2017,2017-01-01,{'openalex': 'https://openalex.org/W2963641747...,"{'id': None, 'issn_l': None, 'issn': None, 'di...",,"{'is_oa': False, 'oa_status': None, 'oa_url': ...",...,,https://api.openalex.org/works?filter=cites:W2...,[],2022-08-08T10:54:07.500056,2019-07-30,Steven Forti,https://orcid.org/0000-0002-7027-0220,Instituto de Historia,ES,{}
1,https://openalex.org/W2472444605,https://doi.org/10.1061/(asce)ei.1943-5541.000...,Developing Topographic Surveying Software to T...,Developing Topographic Surveying Software to T...,2017,2017-01-01,{'openalex': 'https://openalex.org/W2472444605...,"{'id': 'https://openalex.org/V170370859', 'iss...",journal-article,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,"{'AbstractIn': [0], 'this': [1], 'study,': [2]...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 1}, {'year':...",2022-08-09T14:16:07.295905,2016-07-22,Miguel Castro-García,https://orcid.org/0000-0003-4157-6164,University of Castilla-La Mancha,ES,{}
2,https://openalex.org/W2478043544,https://doi.org/10.1016/j.rpsm.2016.04.002,Inducción de hipocapnia e hiperoxia con maniob...,Inducción de hipocapnia e hiperoxia con maniob...,2017,2017-01-01,{'openalex': 'https://openalex.org/W2478043544...,"{'id': 'https://openalex.org/V2898614270', 'is...",journal-article,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,"{'Resumen': [0], 'Introduccion': [3], 'La': [5...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2021, 'cited_by_count': 1}, {'year':...",2022-08-22T06:24:56.670484,2016-08-23,Aida de Arriba-Arnau,https://orcid.org/0000-0002-7877-7341,University of Barcelona,ES,{}
3,https://openalex.org/W2482508491,https://doi.org/10.1016/j.spinee.2016.08.007,Preoperative and postoperative sagittal plane ...,Preoperative and postoperative sagittal plane ...,2017,2017-01-01,{'openalex': 'https://openalex.org/W2482508491...,"{'id': 'https://openalex.org/V112180307', 'iss...",journal-article,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,"{'Abstract': [0], 'Background': [3], 'Context'...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2021, 'cited_by_count': 2}, {'year':...",2022-06-24,2016-08-23,Felisa Sánchez-Mariscal,,Hospital Universitario de Getafe,ES,{}
4,https://openalex.org/W2484989076,https://doi.org/10.1007/978-3-319-09096-2_9,Looking Into the Profile of Music Audiences,Looking Into the Profile of Music Audiences,2017,2017-01-01,{'openalex': 'https://openalex.org/W2484989076...,"{'id': 'https://openalex.org/V3121261024', 'is...",book-chapter,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,"{'The': [0], 'main': [1, 98], 'aims': [2], 'of...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 1}, {'year':...",2022-08-08T13:09:36.373379,2016-08-23,Víctor Fernández-Blanco,https://orcid.org/0000-0003-2096-9460,University of Oviedo,ES,{}


In [7]:
df_concepts = pd.read_csv(dataDir + 'concepts_by_pub_esp_first_auth_since2017.csv')
df_concepts

Unnamed: 0,pub_id,concept,concept_id,concept_level
0,https://openalex.org/W2963641747,political science,https://openalex.org/C17744445,0
1,https://openalex.org/W2784174820,political science,https://openalex.org/C17744445,0
2,https://openalex.org/W3042865676,sociology,https://openalex.org/C144024400,0
3,https://openalex.org/W2783557752,medicine,https://openalex.org/C71924100,0
4,https://openalex.org/W3042865676,psychology,https://openalex.org/C15744967,0
...,...,...,...,...
3282464,https://openalex.org/W3048970145,metapneumovirus,https://openalex.org/C2911218186,5
3282465,https://openalex.org/W3112118800,cell-free fetal dna,https://openalex.org/C152110520,5
3282466,https://openalex.org/W3083927243,transimpedance amplifier,https://openalex.org/C92631468,5
3282467,https://openalex.org/W2990774577,autotransformer,https://openalex.org/C22958824,5


'federalism'

In [29]:
search_query = 'cities'

df_search = search_concepts(search_query=search_query, df_concepts=df_concepts)
df_result = retrieve_papers(search_query=search_query, df_search=df_search, df_pubs=df_pubs)
write_result(search_query=search_query, df_result=df_result, dataDir_save=(dataDir))
df_result.head()

['cities']
5 papers related to search...
5 papers retrieved...
5 unique papers retrieved.


Unnamed: 0,Spanish Researcher,Institution,Concept,Publication Title,Publication Date,Citation Count,Canadian Collaborators,Academic Profile (ORCID),Publication Profile
0,Maria Patricio Mulero,University of Barcelona,creative cities,From creative city to generative governance of...,2017-09-01,13,N,,https://openalex.org/W2612447370
1,Marc Pradel-Miquel,University of Barcelona,creative cities,Kiezkulturnetz vs. Kreativquartier: Social inn...,2017-03-01,10,N,https://orcid.org/0000-0001-5067-2866,https://openalex.org/W2404090734
2,Diana Gutiérrez Posada,University of Oviedo,creative cities,Creative Clusters and Creative Multipliers: Ev...,2022-07-28,0,N,https://orcid.org/0000-0002-0435-8056,https://openalex.org/W4288084280
3,Joaquim Rius-Ulldemolins,University of Valencia,creative cities,"Regional Entrepreneurialism, ‘Creative City’ a...",2021-01-01,0,N,,https://openalex.org/W4205354168
4,María F Carrascal,University of Seville,creative cities,"“Laboratorio Q”, Seville: creative production ...",2019-01-02,0,N,,https://openalex.org/W2891143210


#  

For a series of search queries:

In [233]:
search_queries = [
    'European Union',
    'Ukraine',
]

In [234]:
for search_query in search_queries:
    df_search = search_concepts(search_query=search_query, df_concepts=df_concepts)
    df_result = retrieve_papers(search_query=search_query, df_search=df_search, df_pubs=df_pubs)
    write_result(search_query=search_query, df_result=df_result)
    print('')

['european union']
1735 papers related to search...
1735 papers retrieved...
1704 unique papers retrieved.

['ukraine']
0 papers related to search...
0 papers retrieved...
0 unique papers retrieved.



#  

#### Search keywords by title:

In [33]:
df_pubs2 = df_pubs.fillna('')
df_pubs2.head()

Unnamed: 0,id,doi,title,display_name,publication_year,publication_date,ids,host_venue,type,open_access,...,abstract_inverted_index,cited_by_api_url,counts_by_year,updated_date,created_date,first_author,first_auth_orcid,first_auth_inst,first_auth_country,collaborators_of_interest
0,https://openalex.org/W2963641747,,"Joan Marcet e Lucía Medina (eds.), La política...","Joan Marcet e Lucía Medina (eds.), La política...",2017,2017-01-01,{'openalex': 'https://openalex.org/W2963641747...,"{'id': None, 'issn_l': None, 'issn': None, 'di...",,"{'is_oa': False, 'oa_status': None, 'oa_url': ...",...,,https://api.openalex.org/works?filter=cites:W2...,[],2022-08-08T10:54:07.500056,2019-07-30,Steven Forti,https://orcid.org/0000-0002-7027-0220,Instituto de Historia,ES,{}
1,https://openalex.org/W2472444605,https://doi.org/10.1061/(asce)ei.1943-5541.000...,Developing Topographic Surveying Software to T...,Developing Topographic Surveying Software to T...,2017,2017-01-01,{'openalex': 'https://openalex.org/W2472444605...,"{'id': 'https://openalex.org/V170370859', 'iss...",journal-article,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,"{'AbstractIn': [0], 'this': [1], 'study,': [2]...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 1}, {'year':...",2022-08-09T14:16:07.295905,2016-07-22,Miguel Castro-García,https://orcid.org/0000-0003-4157-6164,University of Castilla-La Mancha,ES,{}
2,https://openalex.org/W2478043544,https://doi.org/10.1016/j.rpsm.2016.04.002,Inducción de hipocapnia e hiperoxia con maniob...,Inducción de hipocapnia e hiperoxia con maniob...,2017,2017-01-01,{'openalex': 'https://openalex.org/W2478043544...,"{'id': 'https://openalex.org/V2898614270', 'is...",journal-article,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,"{'Resumen': [0], 'Introduccion': [3], 'La': [5...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2021, 'cited_by_count': 1}, {'year':...",2022-08-22T06:24:56.670484,2016-08-23,Aida de Arriba-Arnau,https://orcid.org/0000-0002-7877-7341,University of Barcelona,ES,{}
3,https://openalex.org/W2482508491,https://doi.org/10.1016/j.spinee.2016.08.007,Preoperative and postoperative sagittal plane ...,Preoperative and postoperative sagittal plane ...,2017,2017-01-01,{'openalex': 'https://openalex.org/W2482508491...,"{'id': 'https://openalex.org/V112180307', 'iss...",journal-article,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,"{'Abstract': [0], 'Background': [3], 'Context'...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2021, 'cited_by_count': 2}, {'year':...",2022-06-24,2016-08-23,Felisa Sánchez-Mariscal,,Hospital Universitario de Getafe,ES,{}
4,https://openalex.org/W2484989076,https://doi.org/10.1007/978-3-319-09096-2_9,Looking Into the Profile of Music Audiences,Looking Into the Profile of Music Audiences,2017,2017-01-01,{'openalex': 'https://openalex.org/W2484989076...,"{'id': 'https://openalex.org/V3121261024', 'is...",book-chapter,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,"{'The': [0], 'main': [1, 98], 'aims': [2], 'of...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 1}, {'year':...",2022-08-08T13:09:36.373379,2016-08-23,Víctor Fernández-Blanco,https://orcid.org/0000-0003-2096-9460,University of Oviedo,ES,{}


In [66]:
search_query = 'Canada-EU Free Trade Agreement'

df_search = search_title(search_query=search_query, df_pubs=df_pubs2, keep_case=False)
df_result = retrieve_papers_byTitle(search_query=search_query, df_search=df_search)
write_result(search_query=search_query, df_result=df_result, dataDir_save=(dataDir), search_by_title=True)
df_result.head()

['canada-eu free trade agreement']
0 papers related to search...
0 papers retrieved...
0 unique papers retrieved.


Unnamed: 0,Spanish Researcher,Institution,Publication Title,Publication Date,Citation Count,Canadian Collaborators,Academic Profile (ORCID),Publication Profile
