# Canada-IE Academic Collaborations

#### Updated: Sep 23, 2022

#  

Chris Berzins: "Could you do a search for Canadian academic collaboration with “IE University” (previously known an “Instituto Empresa”)? I don’t suspect there is a lot, if any, first author collaboration, so would be good to go broader. If anything much comes up, I’d like to send it to the Dean/Provost who I met yesterday, and discussed our project with."

- IE first authors, Canadian collaborators
- Canadian first authors, IE collaborators

Ultimatley, generalize this for all regions.

In [1]:
import pandas as pd
import numpy as np
import ast

In [2]:
dataDir = '/Users/rnaidoo/Documents/Canada-Secure/GAC/2022_MDRID/Projects_data/OpenAlex/'

#  

#### Functions:

In [3]:
def search_institutions(df_pubs, inst_search_terms=[]):
    
    inst_search_results = []
    for search_term in inst_search_terms:
        inst_search_results = inst_search_results + list(df_pubs.loc[df_pubs['first_auth_inst'].str.contains(search_term)]['first_auth_inst'].unique())
    df_insts = pd.DataFrame({'inst_search': inst_search_results})
    
    df_pubs2 = df_pubs.merge(df_insts, how='inner', left_on='first_auth_inst', right_on='inst_search')
    
    return [df_insts, df_pubs2]

In [4]:
def top_collab_Can(df_pubs, reg_name, dataDir_save, top=100):
    
    df_can_col = df_pubs.loc[df_pubs['collaborators_of_interest'] != '{}']
    
    df_result = df_can_col[['first_author', 'first_auth_inst', 'title', 'publication_date', 'cited_by_count', 'collaborators_of_interest', 'first_auth_orcid', 'id']]
    df_result = df_result.sort_values(['cited_by_count', 'publication_date'], ascending=[False, False]).reset_index(drop=True)
    df_result = df_result.drop_duplicates(subset=['id'], keep='first')
    print(str(len(df_result)) + ' unique papers retrieved.')
    df_result = df_result.rename(columns={
        'first_author': 'Spanish Researcher',
        'first_auth_inst': 'Institution',
        'title': 'Publication Title',
        'publication_date': 'Publication Date',
        'cited_by_count': 'Citation Count',
        'collaborators_of_interest': 'Canadian Collaborators',
        'first_auth_orcid': 'Academic Profile (ORCID)',
        'id': 'Publication Profile'
    })
    df_result = df_result.replace(np.nan, '')

    #Process Canadian Collaborators
    for i in range(0, len(df_result)):
        collab_dict_ = df_result['Canadian Collaborators'].iloc[i]
        if collab_dict_ != 'N':
            collab_dict = ast.literal_eval(collab_dict_)
            if len(collab_dict) > 0:
                collab_str = ''
                for auth in collab_dict:
                    if collab_str != '':
                        collab_str += ', '
                    collab_str += auth + ' (' + collab_dict[auth]['col_auth_inst']
                    if collab_dict[auth]['col_auth_orcid'] == None:
                        collab_str += ')'
                    else: 
                        collab_str += ', ' + collab_dict[auth]['col_auth_orcid'] + ')'
                df_result.iloc[i,5] = collab_str
    
    #Write results to Excel spreadsheet
    write_pubs_by_top_collab_Can(df_result=df_result, dataDir_save=dataDir_save, fn_prefix=reg_name+'_')
    
    #Produce summary table
    df_can_col['publications'] = 1
    df_top_ac = df_can_col[['cited_by_count', 'publications', 'first_author']].groupby(['first_author']).sum()
    df_top_ac = df_top_ac.sort_values('cited_by_count', ascending=False)
    df_top_ac2 = df_top_ac.merge(df_pubs[['first_author', 'first_auth_inst', 'first_auth_orcid']], how='left', on='first_author')
    df_top_ac2 = df_top_ac2.replace(np.nan, '')
    df_top_ac2 = df_top_ac2.rename(columns={
        'cited_by_count': 'by citations',
        'publications': 'by publications',
        'first_auth_inst': 'institution',
        'first_auth_orcid': 'orcid'
    })
    df_top_ac2 = df_top_ac2.drop_duplicates(subset=['orcid'])
    df_top_ac2 = df_top_ac2.drop_duplicates(subset=['first_author'])
    df_top_ac2 = df_top_ac2.set_index('first_author')
    if len(df_top_ac2) < top:
        top = len(df_top_ac2)
    df_top_ac2.index.names = ['Top-' + str(top) + ' academics in ' + reg_name + ' that collaborate with Canadian researchers']
    
    return df_top_ac2.head(top)

In [5]:
def write_pubs_by_top_collab_Can(df_result, dataDir_save, fn_prefix=''):

    sheet_name_ = 'top-pubs with Canadian collab'
    writer = pd.ExcelWriter(dataDir_save + fn_prefix + 'works_can_col.xlsx')
    df_result.to_excel(writer, sheet_name=sheet_name_, index=False)
    # Auto-adjust columns' width
    for column in df_result:
        if column == 'Publication Title':
            column_width = 100
        elif column == 'Canadian Collaborators':
            column_width = 75
        else:
            column_width = max(df_result[column].astype(str).map(len).max(), len(column))
        col_idx = df_result.columns.get_loc(column)
        writer.sheets[sheet_name_].set_column(col_idx, col_idx, column_width)

    writer.save()

#  

#### Find IE first authors, with Canadian collaborators:

In [6]:
df_pubs = pd.read_csv(dataDir + 'works_ES_first_auth/' + 'works_ES_first_auth_since2017.csv')
print(len(df_pubs))
df_pubs.head()

458602


Unnamed: 0,id,doi,title,display_name,publication_year,publication_date,ids,host_venue,type,open_access,...,abstract_inverted_index,cited_by_api_url,counts_by_year,updated_date,created_date,first_author,first_auth_orcid,first_auth_inst,first_auth_country,collaborators_of_interest
0,https://openalex.org/W2963641747,,"Joan Marcet e Lucía Medina (eds.), La política...","Joan Marcet e Lucía Medina (eds.), La política...",2017,2017-01-01,{'openalex': 'https://openalex.org/W2963641747...,"{'id': None, 'issn_l': None, 'issn': None, 'di...",,"{'is_oa': False, 'oa_status': None, 'oa_url': ...",...,,https://api.openalex.org/works?filter=cites:W2...,[],2022-08-08T10:54:07.500056,2019-07-30,Steven Forti,https://orcid.org/0000-0002-7027-0220,Instituto de Historia,ES,{}
1,https://openalex.org/W2472444605,https://doi.org/10.1061/(asce)ei.1943-5541.000...,Developing Topographic Surveying Software to T...,Developing Topographic Surveying Software to T...,2017,2017-01-01,{'openalex': 'https://openalex.org/W2472444605...,"{'id': 'https://openalex.org/V170370859', 'iss...",journal-article,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,"{'AbstractIn': [0], 'this': [1], 'study,': [2]...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 1}, {'year':...",2022-08-09T14:16:07.295905,2016-07-22,Miguel Castro-García,https://orcid.org/0000-0003-4157-6164,University of Castilla-La Mancha,ES,{}
2,https://openalex.org/W2478043544,https://doi.org/10.1016/j.rpsm.2016.04.002,Inducción de hipocapnia e hiperoxia con maniob...,Inducción de hipocapnia e hiperoxia con maniob...,2017,2017-01-01,{'openalex': 'https://openalex.org/W2478043544...,"{'id': 'https://openalex.org/V2898614270', 'is...",journal-article,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,"{'Resumen': [0], 'Introduccion': [3], 'La': [5...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2021, 'cited_by_count': 1}, {'year':...",2022-08-22T06:24:56.670484,2016-08-23,Aida de Arriba-Arnau,https://orcid.org/0000-0002-7877-7341,University of Barcelona,ES,{}
3,https://openalex.org/W2482508491,https://doi.org/10.1016/j.spinee.2016.08.007,Preoperative and postoperative sagittal plane ...,Preoperative and postoperative sagittal plane ...,2017,2017-01-01,{'openalex': 'https://openalex.org/W2482508491...,"{'id': 'https://openalex.org/V112180307', 'iss...",journal-article,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,"{'Abstract': [0], 'Background': [3], 'Context'...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2021, 'cited_by_count': 2}, {'year':...",2022-06-24,2016-08-23,Felisa Sánchez-Mariscal,,Hospital Universitario de Getafe,ES,{}
4,https://openalex.org/W2484989076,https://doi.org/10.1007/978-3-319-09096-2_9,Looking Into the Profile of Music Audiences,Looking Into the Profile of Music Audiences,2017,2017-01-01,{'openalex': 'https://openalex.org/W2484989076...,"{'id': 'https://openalex.org/V3121261024', 'is...",book-chapter,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,"{'The': [0], 'main': [1, 98], 'aims': [2], 'of...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 1}, {'year':...",2022-08-08T13:09:36.373379,2016-08-23,Víctor Fernández-Blanco,https://orcid.org/0000-0003-2096-9460,University of Oviedo,ES,{}


In [7]:
reg_name = 'IE'

dataDir_reg = dataDir+'Regional/'+reg_name+'/'

In [8]:
inst_search_terms = [
    'IE',
    'Instituto Empresa'
]

df_insts, df_pubs_reg = search_institutions(df_pubs=df_pubs, inst_search_terms=inst_search_terms)
df_insts['inst_search'].unique()

array(['IE University'], dtype=object)

In [9]:
top_collab_Can(df_pubs=df_pubs_reg, reg_name=reg_name, dataDir_save=dataDir_reg)

22 unique papers retrieved.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_can_col['publications'] = 1


Unnamed: 0_level_0,by citations,by publications,institution,orcid
Top-5 academics in IE that collaborate with Canadian researchers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Mikko Ketokivi,169,2,IE University,https://orcid.org/0000-0003-4510-4949
Alvaro E. Arenas,34,2,IE University,https://orcid.org/0000-0003-1183-1283
Stefanie Beninger,32,4,IE University,https://orcid.org/0000-0002-6956-7625
Chrystelle Sola,6,1,IE University,https://orcid.org/0000-0002-8375-9121
Kyle John Wilby,0,1,IE University,https://orcid.org/0000-0002-1670-2512


#  

#### Find Canadian first authors, with IE collaborators:

In [10]:
df_pubs = pd.read_csv(dataDir + 'works_CA_first_auth/' + 'works_CA_first_auth_since2017.csv')
print(len(df_pubs))
df_pubs.head()

506581


Unnamed: 0,id,doi,title,display_name,publication_year,publication_date,ids,host_venue,type,open_access,...,abstract_inverted_index,cited_by_api_url,counts_by_year,updated_date,created_date,first_author,first_auth_orcid,first_auth_inst,first_auth_country,collaborators_of_interest
0,https://openalex.org/W2582434976,https://doi.org/10.1007/978-1-4842-2598-1_12,Using Resource Quotas,Using Resource Quotas,2017,2017-01-01,{'openalex': 'https://openalex.org/W2582434976...,"{'id': None, 'issn_l': None, 'issn': None, 'di...",book-chapter,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,"{'In': [0], 'Chapter': [1], '10': [2], 'we': [...",https://api.openalex.org/works?filter=cites:W2...,[],2022-06-30,2017-02-03,Deepak Vohra,,Peace Arch Hospital,CA,{}
1,https://openalex.org/W2598730797,https://doi.org/10.1109/iccnc.2017.7876167,An improved SDN-based fabric for flexible data...,An improved SDN-based fabric for flexible data...,2017,2017-01-01,{'openalex': 'https://openalex.org/W2598730797...,"{'id': None, 'issn_l': None, 'issn': None, 'di...",proceedings-article,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,"{'Data': [0], 'centers': [1], 'play': [2], 'an...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2020, 'cited_by_count': 1}]",2022-08-22T07:21:54.950801,2017-04-07,Wei Hou,,University of Ottawa,CA,{}
2,https://openalex.org/W2523535834,https://doi.org/10.1007/978-3-319-42304-3_1,Power-Shaping Configurable Microprocessors for...,Power-Shaping Configurable Microprocessors for...,2017,2017-01-01,{'openalex': 'https://openalex.org/W2523535834...,"{'id': None, 'issn_l': None, 'issn': None, 'di...",book-chapter,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,"{'The': [0], '“Internet': [1], 'of': [2, 8, 52...",https://api.openalex.org/works?filter=cites:W2...,[],2022-08-18T08:44:40.666311,2016-09-30,Fabio Campi,,Simon Fraser University,CA,{}
3,https://openalex.org/W2940082676,,Les contours de l'organisation du travail soum...,Les contours de l'organisation du travail soum...,2017,2017-01-01,{'openalex': 'https://openalex.org/W2940082676...,"{'id': None, 'issn_l': None, 'issn': None, 'di...",,"{'is_oa': False, 'oa_status': None, 'oa_url': ...",...,,https://api.openalex.org/works?filter=cites:W2...,[],2022-08-07T09:01:08.519143,2019-04-25,Marc Nihoul,,Université Laval Faculty of Law,CA,{}
4,https://openalex.org/W2732588845,https://doi.org/10.1161/strokeaha.117.017622,In-Patient Code Stroke,In-Patient Code Stroke,2017,2017-01-01,{'openalex': 'https://openalex.org/W2732588845...,"{'id': 'https://openalex.org/V62532593', 'issn...",journal-article,"{'is_oa': True, 'oa_status': 'bronze', 'oa_url...",...,"{'Stroke': [0], 'is': [1], 'a': [2], 'relative...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 5}, {'year':...",2022-08-30T10:39:20.384673,2017-07-14,Charles D. Kassardjian,https://orcid.org/0000-0001-7117-8787,University of Toronto,CA,{}


In [41]:
df_pubs_esp_col = df_pubs.loc[df_pubs['collaborators_of_interest'] != '{}']
print(len(df_pubs_esp_col))
df_pubs_esp_col.head()

3504


Unnamed: 0,id,doi,title,display_name,publication_year,publication_date,ids,host_venue,type,open_access,...,abstract_inverted_index,cited_by_api_url,counts_by_year,updated_date,created_date,first_author,first_auth_orcid,first_auth_inst,first_auth_country,collaborators_of_interest
234,https://openalex.org/W2915281934,,Stereotactic radiosurgery for vestibular schwa...,Stereotactic radiosurgery for vestibular schwa...,2017,2017-01-01,{'openalex': 'https://openalex.org/W2915281934...,"{'id': None, 'issn_l': None, 'issn': None, 'di...",,"{'is_oa': False, 'oa_status': None, 'oa_url': ...",...,"{'Objectives': [0], 'The': [1, 86], 'aim': [2]...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 1}, {'year':...",2022-09-12T07:48:22.394298,2019-03-02,May N. Tsao,,Princess Margaret Cancer Centre,CA,{'Roberto Martinez': {'col_auth_orcid': 'https...
459,https://openalex.org/W2562287101,https://doi.org/10.1183/13993003.e0016-2016,Executive Summary: 2017 ERS/ATS standards for ...,Executive Summary: 2017 ERS/ATS standards for ...,2017,2017-01-01,{'openalex': 'https://openalex.org/W2562287101...,"{'id': 'https://openalex.org/V4210207896', 'is...",journal-article,"{'is_oa': True, 'oa_status': 'bronze', 'oa_url...",...,"{'This': [0], 'document': [1], 'summarises': [...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 2}, {'year':...",2022-09-03T06:43:42.662189,2017-01-06,Brian L. Graham,,University of Saskatchewan,CA,{'Felip Burgos': {'col_auth_orcid': 'https://o...
1560,https://openalex.org/W2342386562,https://doi.org/10.1007/s10494-016-9737-2,Coherent Structures in a Non-equilibrium Large...,Coherent Structures in a Non-equilibrium Large...,2017,2017-01-01,{'openalex': 'https://openalex.org/W2342386562...,"{'id': 'https://openalex.org/V4210200752', 'is...",journal-article,"{'is_oa': True, 'oa_status': 'green', 'oa_url'...",...,"{'The': [0, 19, 53, 147, 220], 'characteristic...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 2}, {'year':...",2022-09-06T07:41:49.345081,2016-06-24,Yvan Maciel,https://orcid.org/0000-0003-1993-472X,Université Laval,CA,"{'Mark P. Simens': {'col_auth_orcid': None, 'c..."
1951,https://openalex.org/W2273470132,https://doi.org/10.1016/j.jpubeco.2016.10.002,"Compulsory voting, turnout, and government spe...","Compulsory voting, turnout, and government spe...",2017,2017-01-01,{'openalex': 'https://openalex.org/W2273470132...,"{'id': 'https://openalex.org/V199447588', 'iss...",journal-article,"{'is_oa': True, 'oa_status': 'bronze', 'oa_url...",...,"{'Abstract': [0], 'We': [3], 'study': [4], 'a'...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 3}, {'year':...",2022-09-20T09:24:42.941174,2016-06-24,Mitchell Hoffman,,University of Toronto,CA,{'Gianmarco León': {'col_auth_orcid': 'https:/...
2474,https://openalex.org/W2474041279,https://doi.org/10.1016/j.proci.2016.06.119,Structural effects of biodiesel on soot format...,Structural effects of biodiesel on soot format...,2017,2017-01-01,{'openalex': 'https://openalex.org/W2474041279...,"{'id': 'https://openalex.org/V4210174958', 'is...",journal-article,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,"{'Abstract': [0], 'Structural': [3], 'effects'...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 9}, {'year':...",2022-09-14T20:38:19.828906,2016-07-22,Chiara Saggese,https://orcid.org/0000-0002-4043-1054,University of Toronto,CA,"{'Javier Barba': {'col_auth_orcid': None, 'col..."


In [42]:
df_pubs_esp_col['collaborators_of_interest'].iloc[0]

"{'Roberto Martinez': {'col_auth_orcid': 'https://orcid.org/0000-0001-7342-9332', 'col_auth_inst': 'Hospital Ruber Internacional', 'col_auth_country': 'ES'}}"

In [43]:
IE_col_indicies = []
for i in range(0, len(df_pubs_esp_col)):
    collab_dict_ = df_pubs_esp_col['collaborators_of_interest'].iloc[i]
    collab_dict = ast.literal_eval(collab_dict_)
    if len(collab_dict) > 0:
        for auth in collab_dict:
            if collab_dict[auth]['col_auth_inst'] == 'IE University':
                IE_col_indicies.append(i) 
                
df_pubs_IE_col = df_pubs_esp_col.iloc[IE_col_indicies]

In [44]:
print(len(df_pubs_IE_col))
df_pubs_IE_col.head()

16


Unnamed: 0,id,doi,title,display_name,publication_year,publication_date,ids,host_venue,type,open_access,...,abstract_inverted_index,cited_by_api_url,counts_by_year,updated_date,created_date,first_author,first_auth_orcid,first_auth_inst,first_auth_country,collaborators_of_interest
44284,https://openalex.org/W3124864154,https://doi.org/10.1111/1911-3846.12299,Popularizing a Management Accounting Idea: The...,Popularizing a Management Accounting Idea: The...,2017,2017-06-01,{'openalex': 'https://openalex.org/W3124864154...,"{'id': 'https://openalex.org/V65924262', 'issn...",journal-article,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,"{'We': [0, 100], 'explore': [1], 'how': [2, 67...",https://api.openalex.org/works?filter=cites:W3...,"[{'year': 2022, 'cited_by_count': 4}, {'year':...",2022-08-18T20:04:36.345408,2021-02-01,David J. Cooper,,University of Alberta,CA,"{'Mahmoud Ezzamel': {'col_auth_orcid': None, '..."
162492,https://openalex.org/W2801245079,https://doi.org/10.1111/joop.12221,How does proactive personality promote creativ...,How does proactive personality promote creativ...,2018,2018-12-01,{'openalex': 'https://openalex.org/W2801245079...,"{'id': 'https://openalex.org/V87328381', 'issn...",journal-article,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,,https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 4}, {'year':...",2022-08-18T17:59:54.620476,2018-05-17,Jingzhou Pan,,College of Management and Economics,CA,"{'Bin Ma': {'col_auth_orcid': None, 'col_auth_..."
230479,https://openalex.org/W2967825003,https://doi.org/10.2134/jeq2019.02.0070,Options for Improved Phosphorus Cycling and Us...,Options for Improved Phosphorus Cycling and Us...,2019,2019-09-01,{'openalex': 'https://openalex.org/W2967825003...,"{'id': 'https://openalex.org/V81083566', 'issn...",journal-article,"{'is_oa': True, 'oa_status': 'hybrid', 'oa_url...",...,"{'Soil': [0], 'phosphorus': [1], '(P)': [2], '...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 11}, {'year'...",2022-09-18T11:01:02.692261,2019-08-22,Kimberley D. Schneider,,Agriculture and Agriculture-Food Canada,CA,{'Derek H. Lynch': {'col_auth_orcid': 'https:/...
243732,https://openalex.org/W2987653376,https://doi.org/10.2136/sssaj2019.03.0087,Spatial Variation of Soil Health Indices in a ...,Spatial Variation of Soil Health Indices in a ...,2019,2019-11-01,{'openalex': 'https://openalex.org/W2987653376...,"{'id': 'https://openalex.org/V59755854', 'issn...",journal-article,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,,https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 1}, {'year':...",2022-08-24T06:24:44.634774,2019-11-22,Bernie J. Zebarth,https://orcid.org/0000-0003-1465-5051,Agriculture and Agriculture-Food Canada,CA,{'David L. Burton': {'col_auth_orcid': 'https:...
265009,https://openalex.org/W2999020507,https://doi.org/10.1080/0960085x.2019.1708821,IT value creation in public sector: how IT-ena...,IT value creation in public sector: how IT-ena...,2020,2020-01-06,{'openalex': 'https://openalex.org/W2999020507...,"{'id': 'https://openalex.org/V116712389', 'iss...",journal-article,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,"{'Governments': [0], 'today': [1], 'are': [2],...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 7}, {'year':...",2022-09-16T17:47:56.155670,2020-01-23,Jie Mein Goh,https://orcid.org/0000-0002-3067-0916,Simon Fraser University,CA,{'Jie Mein Goh': {'col_auth_orcid': 'https://o...


In [45]:
#Process IE collaborators
for i in range(0, len(df_pubs_IE_col)):
    collab_dict_ = df_pubs_IE_col['collaborators_of_interest'].iloc[i]
    collab_dict = ast.literal_eval(collab_dict_)
    if len(collab_dict) > 0:
        collab_str = ''
        for auth in collab_dict:
            if collab_str != '':
                collab_str += ', '
            collab_str += auth + ' (' + collab_dict[auth]['col_auth_inst']
            if collab_dict[auth]['col_auth_orcid'] == None:
                collab_str += ')'
            else: 
                collab_str += ', ' + collab_dict[auth]['col_auth_orcid'] + ')'
        df_pubs_IE_col.iloc[i,-1] = collab_str

In [46]:
df_pubs_IE_col.head()

Unnamed: 0,id,doi,title,display_name,publication_year,publication_date,ids,host_venue,type,open_access,...,abstract_inverted_index,cited_by_api_url,counts_by_year,updated_date,created_date,first_author,first_auth_orcid,first_auth_inst,first_auth_country,collaborators_of_interest
44284,https://openalex.org/W3124864154,https://doi.org/10.1111/1911-3846.12299,Popularizing a Management Accounting Idea: The...,Popularizing a Management Accounting Idea: The...,2017,2017-06-01,{'openalex': 'https://openalex.org/W3124864154...,"{'id': 'https://openalex.org/V65924262', 'issn...",journal-article,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,"{'We': [0, 100], 'explore': [1], 'how': [2, 67...",https://api.openalex.org/works?filter=cites:W3...,"[{'year': 2022, 'cited_by_count': 4}, {'year':...",2022-08-18T20:04:36.345408,2021-02-01,David J. Cooper,,University of Alberta,CA,Mahmoud Ezzamel (IE University)
162492,https://openalex.org/W2801245079,https://doi.org/10.1111/joop.12221,How does proactive personality promote creativ...,How does proactive personality promote creativ...,2018,2018-12-01,{'openalex': 'https://openalex.org/W2801245079...,"{'id': 'https://openalex.org/V87328381', 'issn...",journal-article,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,,https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 4}, {'year':...",2022-08-18T17:59:54.620476,2018-05-17,Jingzhou Pan,,College of Management and Economics,CA,Bin Ma (IE University)
230479,https://openalex.org/W2967825003,https://doi.org/10.2134/jeq2019.02.0070,Options for Improved Phosphorus Cycling and Us...,Options for Improved Phosphorus Cycling and Us...,2019,2019-09-01,{'openalex': 'https://openalex.org/W2967825003...,"{'id': 'https://openalex.org/V81083566', 'issn...",journal-article,"{'is_oa': True, 'oa_status': 'hybrid', 'oa_url...",...,"{'Soil': [0], 'phosphorus': [1], '(P)': [2], '...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 11}, {'year'...",2022-09-18T11:01:02.692261,2019-08-22,Kimberley D. Schneider,,Agriculture and Agriculture-Food Canada,CA,"Derek H. Lynch (IE University, https://orcid.o..."
243732,https://openalex.org/W2987653376,https://doi.org/10.2136/sssaj2019.03.0087,Spatial Variation of Soil Health Indices in a ...,Spatial Variation of Soil Health Indices in a ...,2019,2019-11-01,{'openalex': 'https://openalex.org/W2987653376...,"{'id': 'https://openalex.org/V59755854', 'issn...",journal-article,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,,https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 1}, {'year':...",2022-08-24T06:24:44.634774,2019-11-22,Bernie J. Zebarth,https://orcid.org/0000-0003-1465-5051,Agriculture and Agriculture-Food Canada,CA,"David L. Burton (IE University, https://orcid...."
265009,https://openalex.org/W2999020507,https://doi.org/10.1080/0960085x.2019.1708821,IT value creation in public sector: how IT-ena...,IT value creation in public sector: how IT-ena...,2020,2020-01-06,{'openalex': 'https://openalex.org/W2999020507...,"{'id': 'https://openalex.org/V116712389', 'iss...",journal-article,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,"{'Governments': [0], 'today': [1], 'are': [2],...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 7}, {'year':...",2022-09-16T17:47:56.155670,2020-01-23,Jie Mein Goh,https://orcid.org/0000-0002-3067-0916,Simon Fraser University,CA,"Jie Mein Goh (IE University, https://orcid.org..."


In [47]:
df_result = df_pubs_IE_col[['first_author', 'first_auth_inst', 'title', 'publication_date', 'cited_by_count', 'collaborators_of_interest', 'first_auth_orcid', 'id']]
df_result = df_result.sort_values(['cited_by_count', 'publication_date'], ascending=[False, False]).reset_index(drop=True)
df_result = df_result.drop_duplicates(subset=['id'], keep='first')
print(str(len(df_result)) + ' unique papers retrieved.')
df_result = df_result.rename(columns={
    'first_author': 'Canadian Researcher',
    'first_auth_inst': 'Institution',
    'title': 'Publication Title',
    'publication_date': 'Publication Date',
    'cited_by_count': 'Citation Count',
    'collaborators_of_interest': 'IE Collaborators',
    'first_auth_orcid': 'Academic Profile (ORCID)',
    'id': 'Publication Profile'
})
df_result = df_result.replace(np.nan, '')

15 unique papers retrieved.


In [48]:
dataDir_save = dataDir_reg
sheet_name_ = 'top-pubs with IE collab'
writer = pd.ExcelWriter(dataDir_save + 'CA_works_IE_col.xlsx')
df_result.to_excel(writer, sheet_name=sheet_name_, index=False)
# Auto-adjust columns' width
for column in df_pubs_IE_col:
    if column == 'Publication Title':
        column_width = 100
    elif column == 'IE Collaborators':
        column_width = 75
    else:
        column_width = max(df_pubs_IE_col[column].astype(str).map(len).max(), len(column))
    col_idx = df_pubs_IE_col.columns.get_loc(column)
    writer.sheets[sheet_name_].set_column(col_idx, col_idx, column_width)

writer.save()

In [49]:
df_result.head()

Unnamed: 0,Canadian Researcher,Institution,Publication Title,Publication Date,Citation Count,IE Collaborators,Academic Profile (ORCID),Publication Profile
0,David J. Cooper,University of Alberta,Popularizing a Management Accounting Idea: The...,2017-06-01,63,Mahmoud Ezzamel (IE University),,https://openalex.org/W3124864154
1,Kimberley D. Schneider,Agriculture and Agriculture-Food Canada,Options for Improved Phosphorus Cycling and Us...,2019-09-01,40,"Derek H. Lynch (IE University, https://orcid.o...",,https://openalex.org/W2967825003
2,Jingzhou Pan,College of Management and Economics,How does proactive personality promote creativ...,2018-12-01,24,Bin Ma (IE University),,https://openalex.org/W2801245079
3,Jie Mein Goh,Simon Fraser University,IT value creation in public sector: how IT-ena...,2020-01-06,22,"Jie Mein Goh (IE University, https://orcid.org...",https://orcid.org/0000-0002-3067-0916,https://openalex.org/W2999020507
5,Stanley J. Shapiro,Simon Fraser University,Macromarketing Pedagogy: Empowering Students t...,2021-03-01,10,"Stefanie Beninger (IE University, https://orci...",https://orcid.org/0000-0003-3013-2576,https://openalex.org/W3080720362


In [53]:
top = 10
df_pubs_IE_col['publications'] = 1
df_top_ac = df_pubs_IE_col[['cited_by_count', 'publications', 'first_author']].groupby(['first_author']).sum()
df_top_ac = df_top_ac.sort_values('cited_by_count', ascending=False)
df_top_ac2 = df_top_ac.merge(df_pubs[['first_author', 'first_auth_inst', 'first_auth_orcid']], how='left', on='first_author')
df_top_ac2 = df_top_ac2.replace(np.nan, '')
df_top_ac2 = df_top_ac2.rename(columns={
    'cited_by_count': 'by citations',
    'publications': 'by publications',
    'first_auth_inst': 'institution',
    'first_auth_orcid': 'orcid'
})
df_top_ac2 = df_top_ac2.drop_duplicates(subset=['orcid'])
df_top_ac2 = df_top_ac2.drop_duplicates(subset=['first_author'])
df_top_ac2 = df_top_ac2.set_index('first_author')
if len(df_top_ac2) < top:
    top = len(df_top_ac2)
df_top_ac2.index.names = ['Top-' + str(top) + ' academics in Canada that collaborate with IE researchers']

df_top_ac2.head(top)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_pubs_IE_col['publications'] = 1


Unnamed: 0_level_0,by citations,by publications,institution,orcid
Top-10 academics in Canada that collaborate with IE researchers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
David J. Cooper,63,1,University of Alberta,
Jie Mein Goh,44,2,Simon Fraser University,https://orcid.org/0000-0002-3067-0916
Jingzhou Pan,24,1,College of Management and Economics,https://orcid.org/0000-0002-5484-7958
Stanley J. Shapiro,10,1,Simon Fraser University,https://orcid.org/0000-0003-3013-2576
Bernie J. Zebarth,6,1,Agriculture and Agriculture-Food Canada,https://orcid.org/0000-0003-1465-5051
William F. McIntyre,2,1,Population Health Research Institute,https://orcid.org/0000-0001-6082-7542
Zhanna Lyubykh,2,1,University of Calgary,https://orcid.org/0000-0002-6587-2200
Cody A. Freas,0,1,University of Alberta,https://orcid.org/0000-0001-7026-1255
Daniel Richard Clark,0,1,Western University,https://orcid.org/0000-0003-1645-4667
Karen Robson,0,1,University of Windsor,https://orcid.org/0000-0001-7933-3411
