# Regional Profile

#### Updated: Sep 15, 2022

#  

The Ambassador will make a visit to the University of Granada in December 2022. Prepare a profile of the university, to surface the top academics overall, the top academics for particular subject-areas, and the top collaborators with Canada. Generalize the production of regional profiles:

In [2]:
import pandas as pd
import numpy as np
import ast

In [3]:
dataDir = '/Users/rnaidoo/Documents/Canada-Secure/GAC/2022_MDRID/Projects_data/OpenAlex/'

#  

#### Functions:

In [4]:
def search_institutions(df_pubs, inst_search_terms=[]):
    
    inst_search_results = []
    for search_term in inst_search_terms:
        inst_search_results = inst_search_results + list(df_pubs.loc[df_pubs['first_auth_inst'].str.contains(search_term)]['first_auth_inst'].unique())
    df_insts = pd.DataFrame({'inst_search': inst_search_results})
    
    df_pubs2 = df_pubs.merge(df_insts, how='inner', left_on='first_auth_inst', right_on='inst_search')
    
    return [df_insts, df_pubs2]

In [5]:
def top_institutions(df_pubs, reg_name, top=100):
    
    df_pubs['publications'] = 1
    df_top_inst = df_pubs[['cited_by_count', 'publications', 'first_auth_inst']].groupby(['first_auth_inst']).sum()
    df_top_inst = df_top_inst.sort_values('cited_by_count', ascending=False)
    df_top_inst = df_top_inst.replace(np.nan, '')
    df_top_inst = df_top_inst.rename(columns={
        'cited_by_count': 'by citations',
        'publications': 'by publications'
    })
    if len(df_top_inst) < top:
        top = len(df_top_inst)
    df_top_inst.index.names = ['Top-' + str(top) + ' research institutions in ' + reg_name]
    
    return df_top_inst.head(top)

In [6]:
def top_researchers(df_pubs, reg_name, top=100):
    
    df_pubs['publications'] = 1
    df_top_ac = df_pubs[['cited_by_count', 'publications', 'first_author']].groupby(['first_author']).sum()
    df_top_ac = df_top_ac.sort_values('cited_by_count', ascending=False)
    df_top_ac2 = df_top_ac.merge(df_pubs[['first_author', 'first_auth_inst', 'first_auth_orcid']], how='left', on='first_author')
    df_top_ac2 = df_top_ac2.replace(np.nan, '')
    df_top_ac2 = df_top_ac2.rename(columns={
        'cited_by_count': 'by citations',
        'publications': 'by publications',
        'first_auth_inst': 'institution',
        'first_auth_orcid': 'orcid'
    })
    df_top_ac2 = df_top_ac2.drop_duplicates(subset=['orcid'])
    df_top_ac2 = df_top_ac2.drop_duplicates(subset=['first_author'])
    df_top_ac2 = df_top_ac2.set_index('first_author')
    if len(df_top_ac2) < top:
        top = len(df_top_ac2)
    df_top_ac2.index.names = ['Top-' + str(top) + ' academics in ' + reg_name]
    
    return df_top_ac2.head(top)

In [7]:
def search_concepts(search_query, df_concepts):
    
    search_terms = search_query.split(' AND ')
    for i in range(0, len(search_terms)):
        search_terms[i] = search_terms[i].lower()
    print(search_terms)
    for i in range(0, len(search_terms)):
        if i == 0:
            df_search = df_concepts.loc[df_concepts['concept'].str.contains(search_terms[i])]
        else:
            df_search_ = df_concepts.loc[df_concepts['concept'].str.contains(search_terms[i])]
            df_search_ = df_search_.rename(columns={
                'concept': 'concept'+str(i+1),
                'concept_id': 'concept_id'+str(i+1),
                'concept_level': 'concept_level'+str(i+1)
            })
            df_search = df_search.merge(df_search_, how='inner', on='pub_id')
    print(str(len(df_search)) + ' papers related to search...')
    
    return df_search

In [8]:
def write_pubs_by_concept_search(search_query, df_result, dataDir_save, fn_prefix=''):
    
    sheet_name_ = search_query[0:30]
    writer = pd.ExcelWriter(dataDir_save + fn_prefix + 'works_by_' + search_query + '.xlsx')
    df_result.to_excel(writer, sheet_name=sheet_name_, index=False)
    # Auto-adjust columns' width
    for column in df_result:
        if column == 'Publication Title':
            column_width = 100
        elif column == 'Canadian Collaborators':
            if len(df_result['Canadian Collaborators'].unique()) > 1:
                column_width = 75
            else:
                column_width = max(df_result[column].astype(str).map(len).max(), len(column))
        else:
            column_width = max(df_result[column].astype(str).map(len).max(), len(column))
        col_idx = df_result.columns.get_loc(column)
        writer.sheets[sheet_name_].set_column(col_idx, col_idx, column_width)

    writer.save()

In [9]:
def top_researchers_by_concept(search_query, df_pubs, df_concepts, reg_name, dataDir_save, top=100):

    df_search = search_concepts(search_query=search_query, df_concepts=df_concepts)
    df_pubs_search = df_pubs.merge(df_search, left_on='id', right_on='pub_id')
    print(str(len(df_pubs_search)) + ' papers retrieved...')
    
    df_result = df_pubs_search[['first_author', 'first_auth_inst', 'concept', 'concept_level', 'title', 'publication_date', 'cited_by_count', 'collaborators_of_interest', 'first_auth_orcid', 'id']]
    df_result = df_result.sort_values(['cited_by_count', 'publication_date', 'concept_level'], ascending=[False, False, False]).reset_index(drop=True)
    df_result = df_result.drop_duplicates(subset=['id'], keep='first')
    print(str(len(df_result)) + ' unique papers retrieved.')
    df_result = df_result.drop(columns=['concept_level'])
    df_result = df_result.rename(columns={
        'first_author': 'Spanish Researcher',
        'first_auth_inst': 'Institution',
        'concept': 'Concept',
        'title': 'Publication Title',
        'publication_date': 'Publication Date',
        'cited_by_count': 'Citation Count',
        'collaborators_of_interest': 'Canadian Collaborators',
        'first_auth_orcid': 'Academic Profile (ORCID)',
        'id': 'Publication Profile'
    })
    df_result = df_result.replace(np.nan, '')
    df_result['Canadian Collaborators'] = df_result['Canadian Collaborators'].replace('{}', 'N')

    #Process Canadian Collaborators
    for i in range(0, len(df_result)):
        collab_dict_ = df_result['Canadian Collaborators'].iloc[i]
        if collab_dict_ != 'N':
            collab_dict = ast.literal_eval(collab_dict_)
            if len(collab_dict) > 0:
                collab_str = ''
                for auth in collab_dict:
                    if collab_str != '':
                        collab_str += ', '
                    collab_str += auth + ' (' + collab_dict[auth]['col_auth_inst']
                    if collab_dict[auth]['col_auth_orcid'] == None:
                        collab_str += ')'
                    else: 
                        collab_str += ', ' + collab_dict[auth]['col_auth_orcid'] + ')'
                df_result.iloc[i,6] = collab_str
                
    #Write results to Excel spreadsheet
    write_pubs_by_concept_search(search_query=search_query, df_result=df_result, dataDir_save=dataDir_save, fn_prefix=reg_name+'_')
    
    df_pubs_search['publications'] = 1
    df_top_ac = df_pubs_search[['cited_by_count', 'publications', 'first_author']].groupby(['first_author']).sum()
    df_top_ac = df_top_ac.sort_values('cited_by_count', ascending=False)
    df_top_ac2 = df_top_ac.merge(df_pubs[['first_author', 'first_auth_inst', 'first_auth_orcid']], how='left', on='first_author')
    df_top_ac2 = df_top_ac2.replace(np.nan, '')
    df_top_ac2 = df_top_ac2.rename(columns={
        'cited_by_count': 'by citations',
        'publications': 'by publications',
        'first_auth_inst': 'institution',
        'first_auth_orcid': 'orcid'
    })
    df_top_ac2 = df_top_ac2.drop_duplicates(subset=['orcid'])
    df_top_ac2 = df_top_ac2.drop_duplicates(subset=['first_author'])
    df_top_ac2 = df_top_ac2.set_index('first_author')
    if len(df_top_ac2) < top:
        top = len(df_top_ac2)
    df_top_ac2.index.names = ['Top-' + str(top) + ' academics in ' + reg_name + ' for ' + search_query]
    
    return df_top_ac2.head(top)

In [10]:
def write_pubs_by_top_collab_Can(df_result, dataDir_save, fn_prefix=''):

    sheet_name_ = 'top-pubs with Canadian collab'
    writer = pd.ExcelWriter(dataDir_save + fn_prefix + 'works_can_col.xlsx')
    df_result.to_excel(writer, sheet_name=sheet_name_, index=False)
    # Auto-adjust columns' width
    for column in df_result:
        if column == 'Publication Title':
            column_width = 100
        elif column == 'Canadian Collaborators':
            column_width = 75
        else:
            column_width = max(df_result[column].astype(str).map(len).max(), len(column))
        col_idx = df_result.columns.get_loc(column)
        writer.sheets[sheet_name_].set_column(col_idx, col_idx, column_width)

    writer.save()

In [11]:
def top_collab_Can(df_pubs, reg_name, dataDir_save, top=100):
    
    df_can_col = df_pubs.loc[df_pubs['collaborators_of_interest'] != '{}']
    
    df_result = df_can_col[['first_author', 'first_auth_inst', 'title', 'publication_date', 'cited_by_count', 'collaborators_of_interest', 'first_auth_orcid', 'id']]
    df_result = df_result.sort_values(['cited_by_count', 'publication_date'], ascending=[False, False]).reset_index(drop=True)
    df_result = df_result.drop_duplicates(subset=['id'], keep='first')
    print(str(len(df_result)) + ' unique papers retrieved.')
    df_result = df_result.rename(columns={
        'first_author': 'Spanish Researcher',
        'first_auth_inst': 'Institution',
        'title': 'Publication Title',
        'publication_date': 'Publication Date',
        'cited_by_count': 'Citation Count',
        'collaborators_of_interest': 'Canadian Collaborators',
        'first_auth_orcid': 'Academic Profile (ORCID)',
        'id': 'Publication Profile'
    })
    df_result = df_result.replace(np.nan, '')

    #Process Canadian Collaborators
    for i in range(0, len(df_result)):
        collab_dict_ = df_result['Canadian Collaborators'].iloc[i]
        if collab_dict_ != 'N':
            collab_dict = ast.literal_eval(collab_dict_)
            if len(collab_dict) > 0:
                collab_str = ''
                for auth in collab_dict:
                    if collab_str != '':
                        collab_str += ', '
                    collab_str += auth + ' (' + collab_dict[auth]['col_auth_inst']
                    if collab_dict[auth]['col_auth_orcid'] == None:
                        collab_str += ')'
                    else: 
                        collab_str += ', ' + collab_dict[auth]['col_auth_orcid'] + ')'
                df_result.iloc[i,5] = collab_str
    
    #Write results to Excel spreadsheet
    write_pubs_by_top_collab_Can(df_result=df_result, dataDir_save=dataDir_save, fn_prefix=reg_name+'_')
    
    df_can_col['publications'] = 1
    df_top_ac = df_can_col[['cited_by_count', 'publications', 'first_author']].groupby(['first_author']).sum()
    df_top_ac = df_top_ac.sort_values('cited_by_count', ascending=False)
    df_top_ac2 = df_top_ac.merge(df_pubs[['first_author', 'first_auth_inst', 'first_auth_orcid']], how='left', on='first_author')
    df_top_ac2 = df_top_ac2.replace(np.nan, '')
    df_top_ac2 = df_top_ac2.rename(columns={
        'cited_by_count': 'by citations',
        'publications': 'by publications',
        'first_auth_inst': 'institution',
        'first_auth_orcid': 'orcid'
    })
    df_top_ac2 = df_top_ac2.drop_duplicates(subset=['orcid'])
    df_top_ac2 = df_top_ac2.drop_duplicates(subset=['first_author'])
    df_top_ac2 = df_top_ac2.set_index('first_author')
    if len(df_top_ac2) < top:
        top = len(df_top_ac2)
    df_top_ac2.index.names = ['Top-' + str(top) + ' academics in ' + reg_name + ' that collaborate with Canadian researchers']
    
    return df_top_ac2.head(top)

#  

#### Load requisite data:

In [12]:
df_pubs = pd.read_csv(dataDir + 'works_esp_first_auth/' + 'works_esp_first_auth_since2017.csv')
print(len(df_pubs))
df_pubs.head()

458602


Unnamed: 0,id,doi,title,display_name,publication_year,publication_date,ids,host_venue,type,open_access,...,abstract_inverted_index,cited_by_api_url,counts_by_year,updated_date,created_date,first_author,first_auth_orcid,first_auth_inst,first_auth_country,collaborators_of_interest
0,https://openalex.org/W2963641747,,"Joan Marcet e Lucía Medina (eds.), La política...","Joan Marcet e Lucía Medina (eds.), La política...",2017,2017-01-01,{'openalex': 'https://openalex.org/W2963641747...,"{'id': None, 'issn_l': None, 'issn': None, 'di...",,"{'is_oa': False, 'oa_status': None, 'oa_url': ...",...,,https://api.openalex.org/works?filter=cites:W2...,[],2022-08-08T10:54:07.500056,2019-07-30,Steven Forti,https://orcid.org/0000-0002-7027-0220,Instituto de Historia,ES,{}
1,https://openalex.org/W2472444605,https://doi.org/10.1061/(asce)ei.1943-5541.000...,Developing Topographic Surveying Software to T...,Developing Topographic Surveying Software to T...,2017,2017-01-01,{'openalex': 'https://openalex.org/W2472444605...,"{'id': 'https://openalex.org/V170370859', 'iss...",journal-article,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,"{'AbstractIn': [0], 'this': [1], 'study,': [2]...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 1}, {'year':...",2022-08-09T14:16:07.295905,2016-07-22,Miguel Castro-García,https://orcid.org/0000-0003-4157-6164,University of Castilla-La Mancha,ES,{}
2,https://openalex.org/W2478043544,https://doi.org/10.1016/j.rpsm.2016.04.002,Inducción de hipocapnia e hiperoxia con maniob...,Inducción de hipocapnia e hiperoxia con maniob...,2017,2017-01-01,{'openalex': 'https://openalex.org/W2478043544...,"{'id': 'https://openalex.org/V2898614270', 'is...",journal-article,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,"{'Resumen': [0], 'Introduccion': [3], 'La': [5...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2021, 'cited_by_count': 1}, {'year':...",2022-08-22T06:24:56.670484,2016-08-23,Aida de Arriba-Arnau,https://orcid.org/0000-0002-7877-7341,University of Barcelona,ES,{}
3,https://openalex.org/W2482508491,https://doi.org/10.1016/j.spinee.2016.08.007,Preoperative and postoperative sagittal plane ...,Preoperative and postoperative sagittal plane ...,2017,2017-01-01,{'openalex': 'https://openalex.org/W2482508491...,"{'id': 'https://openalex.org/V112180307', 'iss...",journal-article,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,"{'Abstract': [0], 'Background': [3], 'Context'...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2021, 'cited_by_count': 2}, {'year':...",2022-06-24,2016-08-23,Felisa Sánchez-Mariscal,,Hospital Universitario de Getafe,ES,{}
4,https://openalex.org/W2484989076,https://doi.org/10.1007/978-3-319-09096-2_9,Looking Into the Profile of Music Audiences,Looking Into the Profile of Music Audiences,2017,2017-01-01,{'openalex': 'https://openalex.org/W2484989076...,"{'id': 'https://openalex.org/V3121261024', 'is...",book-chapter,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,"{'The': [0], 'main': [1, 98], 'aims': [2], 'of...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 1}, {'year':...",2022-08-08T13:09:36.373379,2016-08-23,Víctor Fernández-Blanco,https://orcid.org/0000-0003-2096-9460,University of Oviedo,ES,{}


In [13]:
df_concepts = pd.read_csv(dataDir + 'works_esp_first_auth/' + 'concepts_by_pub_esp_first_auth_since2017.csv')
df_concepts

Unnamed: 0,pub_id,concept,concept_id,concept_level
0,https://openalex.org/W2963641747,political science,https://openalex.org/C17744445,0
1,https://openalex.org/W2784174820,political science,https://openalex.org/C17744445,0
2,https://openalex.org/W3042865676,sociology,https://openalex.org/C144024400,0
3,https://openalex.org/W2783557752,medicine,https://openalex.org/C71924100,0
4,https://openalex.org/W3042865676,psychology,https://openalex.org/C15744967,0
...,...,...,...,...
3282464,https://openalex.org/W3048970145,metapneumovirus,https://openalex.org/C2911218186,5
3282465,https://openalex.org/W3112118800,cell-free fetal dna,https://openalex.org/C152110520,5
3282466,https://openalex.org/W3083927243,transimpedance amplifier,https://openalex.org/C92631468,5
3282467,https://openalex.org/W2990774577,autotransformer,https://openalex.org/C22958824,5


#  

#### Analyze a region of interest:

In [16]:
reg_name = 'Madrid'

dataDir_reg = dataDir+'works_esp_first_auth/Regional/'+reg_name+'/'

In [19]:
inst_search_terms = [
    
    #Madrid
    'Madrid',
    'IE',
    'Consejo Superior',
    'Spanish National Research Council'
    
    #Barcelona
    #'Barc',
    #'Cata'
    
    #Granada
    #'Granada',
    #'Andalu'
]

df_insts, df_pubs_reg = search_institutions(df_pubs=df_pubs, inst_search_terms=inst_search_terms)
df_insts['inst_search'].unique()

array(['Technical University of Madrid',
       'Complutense University of Madrid',
       'Autonomous University of Madrid',
       'Carlos III University of Madrid',
       'Madrid Institute for Advanced Studies',
       'European University of Madrid', 'Comunidad de Madrid',
       'MD Anderson Cancer Center Madrid',
       'Instituto de Ciencia de Materiales de Madrid',
       'Universidad a Distancia de Madrid', 'IVI Madrid Clinic',
       'Real Conservatorio Superior de Música de Madrid',
       'Hospital Universitario Quirónsalud Madrid',
       'Ayuntamiento de Madrid', 'Hospital Universitario HM Madrid',
       'Madrid Health Service',
       'Empresa Municipal de Transportes de Madrid', 'IE University',
       'Consejo Superior de Deportes',
       'Spanish National Research Council'], dtype=object)

In [30]:
top_institutions(df_pubs=df_pubs_reg, reg_name=reg_name, top=10)

Unnamed: 0_level_0,by citations,by publications
Top-10 research institutions in Madrid,Unnamed: 1_level_1,Unnamed: 2_level_1
Complutense University of Madrid,75243,16730
Autonomous University of Madrid,57436,10015
Technical University of Madrid,41565,8008
Carlos III University of Madrid,20935,5357
Madrid Institute for Advanced Studies,6812,564
European University of Madrid,6614,1002
Instituto de Ciencia de Materiales de Madrid,5620,517
IE University,2191,650
Hospital Universitario Quirónsalud Madrid,406,118
MD Anderson Cancer Center Madrid,321,68


In [31]:
top_researchers(df_pubs=df_pubs_reg, reg_name=reg_name, top=10)

Unnamed: 0_level_0,by citations,by publications,institution,orcid
Top-10 academics in Madrid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Joan B. Soriano,1644,21,Autonomous University of Madrid,https://orcid.org/0000-0001-9740-2994
Luis Paz-Ares,1328,18,Complutense University of Madrid,https://orcid.org/0000-0002-1947-3364
Mariano Sanz,1210,19,Complutense University of Madrid,https://orcid.org/0000-0002-6293-5755
Ernesto Panadero,847,8,Autonomous University of Madrid,https://orcid.org/0000-0003-0859-3616
Susana Campuzano,822,39,Complutense University of Madrid,https://orcid.org/0000-0002-9928-6613
Clara González-Sanguino,752,11,Complutense University of Madrid,https://orcid.org/0000-0001-7020-0604
Elena P. Moreno-Jiménez,730,3,Autonomous University of Madrid,https://orcid.org/0000-0002-9865-4875
Riccardo Frisenda,723,12,Madrid Institute for Advanced Studies,https://orcid.org/0000-0003-1728-7354
Luis Sordo,700,1,Complutense University of Madrid,https://orcid.org/0000-0003-1760-9472
Ruben Tolosana,525,26,Autonomous University of Madrid,https://orcid.org/0000-0002-9393-3066


In [32]:
top_collab_Can(df_pubs=df_pubs_reg, reg_name=reg_name, dataDir_save=dataDir_reg, top=10)

291 unique papers retrieved.


Unnamed: 0_level_0,by citations,by publications,institution,orcid
Top-10 academics in Madrid that collaborate with Canadian researchers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Joan B. Soriano,359,1,Autonomous University of Madrid,https://orcid.org/0000-0001-9740-2994
Miguel Martin,323,1,Complutense University of Madrid,https://orcid.org/0000-0001-6156-0739
Luis Paz-Ares,206,2,Complutense University of Madrid,https://orcid.org/0000-0002-1947-3364
Mikko Ketokivi,169,2,IE University,https://orcid.org/0000-0003-4510-4949
Pablo A. García-Salaberri,109,3,Carlos III University of Madrid,https://orcid.org/0000-0002-3918-5415
Raúl Ochoa-Hueso,83,1,Autonomous University of Madrid,https://orcid.org/0000-0002-1839-6926
Weiguang Cui,68,1,Autonomous University of Madrid,https://orcid.org/0000-0002-2113-4863
Javier Escaned,62,1,Complutense University of Madrid,https://orcid.org/0000-0003-4932-0112
Mohamed Khayet,59,2,Complutense University of Madrid,https://orcid.org/0000-0002-5117-2975
Erving Ximendes,54,1,Autonomous University of Madrid,https://orcid.org/0000-0001-7182-0573


In [33]:
search_query = 'foreign policy'
top_researchers_by_concept(search_query=search_query, df_pubs=df_pubs_reg, df_concepts=df_concepts, reg_name=reg_name, dataDir_save=dataDir_reg, top=10)

['foreign policy']
58 papers related to search...
11 papers retrieved...
11 unique papers retrieved.


Unnamed: 0_level_0,by citations,by publications,institution,orcid
Top-2 academics in Madrid for foreign policy,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
José Miguel Calvillo Cisneros,1,1,Complutense University of Madrid,
Javier de Carlos Izquierdo,0,1,Complutense University of Madrid,https://orcid.org/0000-0001-5118-9681


In [34]:
search_query = 'international relations'
top_researchers_by_concept(search_query=search_query, df_pubs=df_pubs_reg, df_concepts=df_concepts, reg_name=reg_name, dataDir_save=dataDir_reg, top=10)

['international relations']
345 papers related to search...
57 papers retrieved...
57 unique papers retrieved.


Unnamed: 0_level_0,by citations,by publications,institution,orcid
Top-10 academics in Madrid for international relations,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Eduardo Romanos,13,1,Complutense University of Madrid,https://orcid.org/0000-0002-0200-3470
Ruth Cobos,12,1,Autonomous University of Madrid,https://orcid.org/0000-0002-3411-3009
Emanuela Lombardo,12,1,Complutense University of Madrid,https://orcid.org/0000-0001-7644-6891
Marta Domínguez Pérez,6,1,Complutense University of Madrid,https://orcid.org/0000-0002-3145-6671
Javier Catalán,6,2,Autonomous University of Madrid,https://orcid.org/0000-0001-7284-143X
María Navas-Loro,5,1,Technical University of Madrid,https://orcid.org/0000-0003-1011-5023
Bryan Rooney,5,1,Carlos III University of Madrid,https://orcid.org/0000-0003-4314-4699
Cándida Gago García,4,1,Complutense University of Madrid,https://orcid.org/0000-0003-2315-7943
Luisa Martín Rojo,3,1,Autonomous University of Madrid,https://orcid.org/0000-0003-2462-1955
Alice Martini,3,2,Autonomous University of Madrid,https://orcid.org/0000-0003-0513-4422


In [35]:
search_query = 'climate change'
top_researchers_by_concept(search_query=search_query, df_pubs=df_pubs_reg, df_concepts=df_concepts, reg_name=reg_name, dataDir_save=dataDir_reg, top=10)

['climate change']
2032 papers related to search...
201 papers retrieved...
187 unique papers retrieved.


Unnamed: 0_level_0,by citations,by publications,institution,orcid
Top-10 academics in Madrid for climate change,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Hilal M.S. Al-Maamary,134,2,Complutense University of Madrid,
Raúl Ochoa-Hueso,71,1,Autonomous University of Madrid,https://orcid.org/0000-0002-1839-6926
Margarita Ruiz-Ramos,57,2,Technical University of Madrid,https://orcid.org/0000-0003-0212-3381
Eduardo Moreno-Jiménez,56,2,Autonomous University of Madrid,https://orcid.org/0000-0002-2125-1197
Alfredo Rodríguez,55,3,Technical University of Madrid,https://orcid.org/0000-0001-7987-1623
David Santillán,54,3,Technical University of Madrid,https://orcid.org/0000-0002-9749-0522
Eduardo Aguilera,51,3,Technical University of Madrid,https://orcid.org/0000-0003-4382-124X
Sonia Condés,48,2,Technical University of Madrid,https://orcid.org/0000-0002-4438-8305
Kinfe Asayehegn,48,3,Technical University of Madrid,https://orcid.org/0000-0001-7247-2574
Álvaro Rubio-Cuadrado,48,4,Technical University of Madrid,https://orcid.org/0000-0001-5299-6063


#  

## Development

#### Determine all of the institutions in Granada:

In [None]:
df_pubs = pd.read_csv(dataDir + 'works_esp_first_auth/' + 'works_esp_first_auth_since2017.csv')
df_pubs.head()

In [None]:
inst_search_terms = [
    'Granada',
    'Andalu'
]

In [None]:
inst_search_results = []
for search_term in inst_search_terms:
    inst_search_results = inst_search_results + list(df_pubs.loc[df_pubs['first_auth_inst'].str.contains(search_term)]['first_auth_inst'].unique())

In [None]:
df_insts = pd.DataFrame({'inst_search': inst_search_results})
df_insts

In [None]:
df_pubs2 = df_pubs.merge(df_insts, how='inner', left_on='first_auth_inst', right_on='inst_search')
print(len(df_pubs2))
df_pubs2.head()

Top regional institutions, overall:

In [None]:
df_pubs2['publications'] = 1

df_top_inst = df_pubs2[['cited_by_count', 'publications', 'first_auth_inst']].groupby(['first_auth_inst']).sum()
df_top_inst = df_top_inst.sort_values('cited_by_count', ascending=False)
df_top_inst = df_top_inst.rename(columns={
    'cited_by_count': 'by citations',
    'publications': 'by publications'
})
df_top_inst.index.names = ['top research institutions in Andalucía']
df_top_inst.head(10)

Top regional researchers, overall:

In [None]:
df_top_ac = df_pubs2[['cited_by_count', 'publications', 'first_author']].groupby(['first_author']).sum()
df_top_ac = df_top_ac.sort_values('cited_by_count', ascending=False)
df_top_ac2 = df_top_ac.merge(df_pubs2[['first_author', 'first_auth_inst', 'first_auth_orcid']], how='left', on='first_author')
df_top_ac2 = df_top_ac2.rename(columns={
    'cited_by_count': 'by citations',
    'publications': 'by publications',
    'first_auth_inst': 'institution',
    'first_auth_orcid': 'orcid'
})
df_top_ac2 = df_top_ac2.drop_duplicates(subset=['orcid'])
df_top_ac2 = df_top_ac2.set_index('first_author')
df_top_ac2.index.names = ['Top-20 academics in Andalucía']
df_top_ac2.head(20)

Top regional researchers in a particular subject area:

In [None]:
df_concepts = pd.read_csv(dataDir + 'works_esp_first_auth/' + 'concepts_by_pub_esp_first_auth_since2017.csv')
df_concepts

'climate change'

In [None]:
search_query = 'climate change'

dataDir_save = dataDir + 'works_esp_first_auth/Regional/Granada/'
df_search = search_concepts(search_query=search_query, df_concepts=df_concepts)
df_result = retrieve_papers(search_query=search_query, df_search=df_search, df_pubs=df_pubs2)
write_result(search_query=search_query, df_result=df_result, dataDir_save=dataDir_save, fn_prefix='Granada_')
df_result.head()

Top collaborators with Canada, overall:

In [None]:
df_can_col = df_pubs2.loc[df_pubs2['collaborators_of_interest'] != '{}']
print(len(df_can_col))
df_can_col.head()

In [None]:
df_result = df_can_col[['first_author', 'first_auth_inst', 'title', 'publication_date', 'cited_by_count', 'collaborators_of_interest', 'first_auth_orcid', 'id']]
df_result = df_result.sort_values(['cited_by_count', 'publication_date'], ascending=[False, False]).reset_index(drop=True)
df_result = df_result.drop_duplicates(subset=['id'], keep='first')
print(str(len(df_result)) + ' unique papers retrieved.')
df_result = df_result.rename(columns={
    'first_author': 'Spanish Researcher',
    'first_auth_inst': 'Institution',
    'title': 'Publication Title',
    'publication_date': 'Publication Date',
    'cited_by_count': 'Citation Count',
    'collaborators_of_interest': 'Canadian Collaborators',
    'first_auth_orcid': 'Academic Profile (ORCID)',
    'id': 'Publication Profile'
})
df_result = df_result.replace(np.nan, '')

#Process Canadian Collaborators
for i in range(0, len(df_result)):
    collab_dict_ = df_result['Canadian Collaborators'].iloc[i]
    if collab_dict_ != 'N':
        collab_dict = ast.literal_eval(collab_dict_)
        if len(collab_dict) > 0:
            collab_str = ''
            for auth in collab_dict:
                if collab_str != '':
                    collab_str += ', '
                collab_str += auth + ' (' + collab_dict[auth]['col_auth_inst']
                if collab_dict[auth]['col_auth_orcid'] == None:
                    collab_str += ')'
                else: 
                    collab_str += ', ' + collab_dict[auth]['col_auth_orcid'] + ')'
            df_result.iloc[i,5] = collab_str

In [None]:
df_result.head()

In [None]:
sheet_name_ = 'top-pubs with Canadian collab'
writer = pd.ExcelWriter(dataDir_save + 'Granada_works_can_col.xlsx')
df_result.to_excel(writer, sheet_name=sheet_name_, index=False)
# Auto-adjust columns' width
for column in df_result:
    if column == 'Publication Title':
        column_width = 100
    elif column == 'Canadian Collaborators':
        column_width = 75
    else:
        column_width = max(df_result[column].astype(str).map(len).max(), len(column))
    col_idx = df_result.columns.get_loc(column)
    writer.sheets[sheet_name_].set_column(col_idx, col_idx, column_width)

writer.save()

In [None]:
df_top_ac = df_can_col[['cited_by_count', 'publications', 'first_author']].groupby(['first_author']).sum()
df_top_ac = df_top_ac.sort_values('cited_by_count', ascending=False)
df_top_ac2 = df_top_ac.merge(df_pubs2[['first_author', 'first_auth_inst', 'first_auth_orcid']], how='left', on='first_author')
df_top_ac2 = df_top_ac2.rename(columns={
    'cited_by_count': 'by citations',
    'publications': 'by publications',
    'first_auth_inst': 'institution',
    'first_auth_orcid': 'orcid'
})
df_top_ac2 = df_top_ac2.drop_duplicates(subset=['orcid'])
df_top_ac2 = df_top_ac2.set_index('first_author')
df_top_ac2.index.names = ['Top-10 academics in Andalucía that collaborate with Canadian researchers']
df_top_ac2.head(10)