In [1]:
import pandas as pd
from tables import *
import numpy as np
from tqdm import tqdm_notebook as tqdm
import csv
from scipy import sparse
import pickle
import os

In [2]:
path = '/home/User1/data/self_citations/'

In [3]:
min_year = 1980
max_year = 2020
nb_years = max_year-min_year

In [4]:
disciplines = pd.read_csv(path+'liste_discipline.txt',encoding='latin_1',sep='\t')
list_disciplines = pd.unique(disciplines['EDiscipline'])
nb_disciplines = len(list_disciplines)

In [5]:
from matplotlib import pyplot as plt

In [6]:
types_cit = ['self','co','others']

authors_info = {}
with open(path+'model/authors_infos_model.csv','r') as f:
    reader = csv.reader(f,delimiter='\t')
    next(reader)
    for line in reader:
        authors_info[int(line[0])] = [line[1],int(line[2]),int(line[4])]

In [7]:
bins_nb_arts = [0,1,3,5,8,14,20,30,40,50,70,100,150,200,300,500,10000]
labels=bins_nb_arts[1:-1] + ['501+']


authors_df = pd.read_csv(path+'model/authors_infos_model.csv',sep='\t')

authors_df.head()

authors_df['binned_arts'] = pd.cut(authors_df['nb_articles'],bins=bins_nb_arts,labels=labels)

authors_info = {}
for i in range(len(authors_df)):
    row = authors_df.iloc[i]
    authors_info[row['Cluster_ID']] = [row['discipline'],row['min_year'],row['binned_arts']]

with open(path+'authors_info_bin_arts.p','wb') as f:
    pickle.dump(authors_info,f)

In [None]:
with open(path+'authors_info_bin_arts.p','rb') as f:
    authors_info = pickle.load(f)

In [None]:
#Map to auth ID
with open(path+'authors_disc.p','rb') as f:
    authors_disc_idx = pickle.load(f)

In [None]:
#Keys:idx,values:authorID
for disc in list_disciplines:
    authors_disc_idx[disc] = {authors_disc_idx[disc][x]:x for x in authors_disc_idx[disc]}

In [None]:
disc_groups = {'Arts and Humanities':['Arts','Humanities'],
              'Natural Sciences and Engineering':['Biology','Physics','Mathematics','Engineering and Technology','Earth and Space','Chemistry'],
              'Social Sciences':['Social Sciences','Psychology','Professional Fields','Health'],
             'Health Sciences':['Biomedical Research','Clinical Medicine']}

nb_groups = len(disc_groups)
group_names = list(disc_groups.keys())
min_pubs = 5

In [None]:
nb_plots = len(labels)

### Porp with network

### With network and condensed disc

In [None]:
for i in tqdm(range(nb_groups)):
    group = group_names[i]   
    disc_group = disc_groups[group]
    arrays_refs = {}
    arrays_cits = {}
    fig1, ax1 = plt.subplots(4, 4,figsize=(25,25))
    fig2, ax2 = plt.subplots(4, 4,figsize=(25,25))
    nb_pubs = []
    for type_cit in types_cit:
        for disc in disc_group:
            load_ref = sparse.load_npz('{}/arrays/references_{}_{}_array_norm.npz'.format(path,type_cit,disc)).todense()
            load_cit = sparse.load_npz('{}/arrays/citations_{}_{}_array_norm.npz'.format(path,type_cit,disc)).todense()
            if type_cit == 'self':
                nb_auts = load_cit.shape[0]
                nb_pubs = nb_pubs + [authors_info[authors_disc_idx[disc][x]][2] for x in range(nb_auts)]
            if type_cit in arrays_cits: 
                arrays_cits[type_cit] = np.concatenate([arrays_cits[type_cit],load_cit],axis=0)
                arrays_refs[type_cit] = np.concatenate([arrays_refs[type_cit],load_ref],axis=0)
            else:
                arrays_cits[type_cit] = load_cit
                arrays_refs[type_cit] = load_ref


    for disc in disc_group:
        load_ref = sparse.load_npz('{}/arrays/references_self_{}_network_array_norm.npz'.format(path,disc)).todense()
        load_cit = sparse.load_npz('{}/arrays/citations_self_{}_network_array_norm.npz'.format(path,disc)).todense()

        if 'self_net' in arrays_cits:
            arrays_cits['self_net'] = np.concatenate([arrays_cits['self_net'],
                                                   load_cit],axis=0)
            arrays_refs['self_net'] = np.concatenate([arrays_refs['self_net'],
                                                   load_ref],axis=0)
        else:
            arrays_cits['self_net'] = load_cit
            arrays_refs['self_net'] = load_ref

    nb_pubs = np.array(nb_pubs)
    row_i = 0
    col_i = 0
    for j in range(nb_plots):
        bin_plot = str(labels[j])
        bin_idx = np.where(nb_pubs==bin_plot)[0]
        arrays_refs_bin = {}
        arrays_cits_bin = {}
        for type_cit in types_cit + ['self_net']:
            arrays_refs_bin[type_cit] = arrays_refs[type_cit][bin_idx,:]
            arrays_cits_bin[type_cit] = arrays_cits[type_cit][bin_idx,:]
        total_refs = arrays_refs_bin['self'] + arrays_refs_bin['co'] + arrays_refs_bin['others'] 
        total_cits =  arrays_cits_bin['self'] + arrays_cits_bin['co'] + arrays_cits_bin['others'] 
        total_refs_cumul = np.sum(total_refs,axis=1)
        total_cits_cumul = np.sum(total_cits,axis=1)
        rows_refs = np.where(total_refs_cumul)[0]
        rows_cits = np.where(total_cits_cumul)[0]

        plot_types = ['self','co','self_net']
        stats_refs = {}
        stats_cits = {}

        for type_cit in plot_types:
            arrays_refs_bin[type_cit + '_perc_year'] = np.zeros(arrays_refs_bin[type_cit].shape) 
            arrays_cits_bin[type_cit + '_perc_year'] = np.zeros(arrays_cits_bin[type_cit].shape) 

            idx_cits = total_cits.nonzero()
            idx_refs = total_refs.nonzero()
            arrays_refs_bin[type_cit + '_perc_year'][idx_refs] = (arrays_refs_bin[type_cit][idx_refs]/total_refs[idx_refs])*100
            arrays_cits_bin[type_cit + '_perc_year'][idx_cits] = (arrays_cits_bin[type_cit][idx_cits]/total_cits[idx_cits])*100



            count_cits = np.count_nonzero(total_cits,axis=0)
            idx_count_cits = np.where(count_cits)[1]
            stats_cits[type_cit + '_perc_year'] = np.zeros(nb_years)
            stats_cits[type_cit + '_perc_year'][idx_count_cits] = np.divide(np.sum(arrays_cits_bin[type_cit + '_perc_year'],axis=0)[idx_count_cits],
                                                                           count_cits[0,idx_count_cits])


            count_refs = np.count_nonzero(total_refs,axis=0)
            idx_count_refs = np.where(count_refs)[1]
            stats_refs[type_cit + '_perc_year'] = np.zeros(nb_years)
            stats_refs[type_cit + '_perc_year'][idx_count_refs] = np.divide(np.sum(arrays_refs_bin[type_cit + '_perc_year'],axis=0)[idx_count_refs],
                                                                            count_refs[0,idx_count_refs])                                     




        ax1[row_i,col_i].plot(stats_cits['self_perc_year'][:37],'r',label='% direct citations',linewidth=2)
        ax1[row_i,col_i].plot(stats_cits['co_perc_year'][:37],'r--',label='% coauthors citations',linewidth=2)
        ax1[row_i,col_i].plot(stats_cits['self_net_perc_year'][:37],'r-.',label='% collaborators citations',linewidth=2)
        ax1[row_i,col_i].plot(stats_refs['self_perc_year'][:37],'g',label='% direct references',linewidth=2)
        ax1[row_i,col_i].plot(stats_refs['co_perc_year'][:37],'g--',label='% coauthors references',linewidth=2)
        ax1[row_i,col_i].plot(stats_refs['self_net_perc_year'][:37],'g-.',label='% collaborators references',linewidth=2)




        ax2[row_i,col_i].plot(stats_cits['self_perc_year'][:37] + stats_cits['co_perc_year'][:37] ,'b',label='% self citations',linewidth=2)
        ax2[row_i,col_i].plot(stats_cits['self_perc_year'][:37] + stats_cits['co_perc_year'][:37] + stats_cits['self_net_perc_year'][:37],
                      'b--',label='% self citations + collaborators',linewidth=2)
        ax2[row_i,col_i].plot(stats_refs['self_perc_year'][:37]+stats_refs['co_perc_year'][:37],'orange',label = '% self references',linewidth=2)
        ax2[row_i,col_i].plot(stats_refs['self_perc_year'][:37] + stats_refs['co_perc_year'][:37] + stats_refs['self_net_perc_year'][:37],
                          color='orange',linestyle='--',label='% references + collaborators',linewidth=2)


        ax1[row_i,col_i].set_title(str(labels[j]) + ' papers',fontsize=20)
        ax2[row_i,col_i].set_title(str(labels[j]) + ' papers',fontsize=20)
        if col_i == 0:
            ax1[row_i,col_i].set_ylabel('%',fontsize=20)
            ax2[row_i,col_i].set_ylabel('%',fontsize=20)
        if row_i == 3:
            ax1[row_i,col_i].set_xlabel('Academic age',fontsize=20)
            ax2[row_i,col_i].set_xlabel('Academic age',fontsize=20)
        if j == 0:
            ax1[row_i,col_i].legend()
            ax2[row_i,col_i].legend()
            ax1[row_i,col_i].legend(prop=dict(size=15))
            ax2[row_i,col_i].legend(prop=dict(size=15))
        ax1[row_i,col_i].tick_params(labelsize=18)
        ax2[row_i,col_i].tick_params(labelsize=18)

        col_i += 1
        if col_i == 4:
            col_i = 0
            row_i+=1
    fig1.suptitle(group, fontsize=25)
    fig2.suptitle(group, fontsize=25)
    
    out_path = path+'results/perc_w_network_panels_nPubs/'
    if not os.path.exists(out_path):
        os.makedirs(out_path)
    fig1.savefig(out_path + group + '.png')
    fig1.savefig(out_path + group + '.svg')
    
    fig2.savefig(out_path + group + '_combined.png')
    fig2.savefig(out_path + group + '_combined.svg')

    plt.close(fig1)
    plt.close(fig2)
    del stats_refs, stats_cits,load_ref,load_cit,arrays_cits,arrays_refs

### Get raw count of citations

#### Groups

In [16]:
plt.close()

In [17]:
for i in tqdm(range(nb_groups)):
    group = group_names[i]   
    disc_group = disc_groups[group]
    arrays_refs = {}
    arrays_cits = {}
    
    fig1, ax1 = plt.subplots(4, 4,figsize=(25,25))
    nb_pubs = []
    for type_cit in types_cit:
        for disc in disc_group:
            load_cit = sparse.load_npz('{}/arrays/citations_{}_{}_array_norm.npz'.format(path,type_cit,disc)).todense()
            if type_cit == 'self':
                nb_auts = load_cit.shape[0]
                nb_pubs = nb_pubs + [authors_info[authors_disc_idx[disc][x]][2] for x in range(nb_auts)]
            if type_cit in arrays_cits: 
                arrays_cits[type_cit] = np.concatenate([arrays_cits[type_cit],load_cit],axis=0)
            else:
                arrays_cits[type_cit] = load_cit

    nb_pubs = np.array(nb_pubs)
    row_i = 0
    col_i = 0
    for j in range(nb_plots):
        bin_plot = str(labels[j])
        bin_idx = np.where(nb_pubs==bin_plot)[0]
        arrays_cits_bin = {}
        for type_cit in types_cit:
            arrays_cits_bin[type_cit] = arrays_cits[type_cit][bin_idx,:]
            
        active = np.zeros(arrays_cits_bin[type_cit].shape)
        last_nz = (arrays_cits_bin[type_cit]!=0).cumsum(1).argmax(1)
        nb_cit = np.sum(arrays_cits_bin[type_cit],axis=1)
        for k in range(active.shape[0]):
            if nb_cit[k] > 0:
                last_idx = np.asarray(last_nz[k])[0][0]
                active[k,:last_idx+1] = 1
        nb_active = np.sum(active,axis=0)
        plot_types = ['self','co','others']
        stats_cits = {}
        #idx_active = np.where()
        for type_cit in plot_types:
            #stats_cits[type_cit + '_count_year'] = np.zeros(nb_years)
            stats_cits[type_cit + '_count_year'] = np.divide(np.squeeze(np.asarray(np.sum(arrays_cits_bin[type_cit],axis=0))),nb_active)    

        ax1[row_i,col_i].plot(stats_cits['self_count_year'][:37],'r',label='Self')
        ax1[row_i,col_i].plot(stats_cits['co_count_year'][:37],'r--',label='Coauthors')
        if j == 0:
            ax1[row_i,col_i].legend()
            ax1[row_i,col_i].legend(prop=dict(size=15))
      
        #ax1[row_i,col_i].twinx()
        ax1[row_i,col_i].plot(stats_cits['others_count_year'][:37],'k',label='Non-self')
        ax1[row_i,col_i].set_title(str(labels[j]) + ' papers',fontsize=20)
        if col_i == 0:
            ax1[row_i,col_i].set_ylabel('# citations',fontsize=20)
        if row_i == 3:
            ax1[row_i,col_i].set_xlabel('Academic age',fontsize=20)
        if j == 0:
            #ax1[row_i,col_i].legend()
            ax1[row_i,col_i].legend(prop=dict(size=15))

        ax1[row_i,col_i].tick_params(labelsize=18)

        col_i += 1
        if col_i == 4:
            col_i = 0
            row_i+=1
    fig1.suptitle(group, fontsize=25)

    out_path = path+'results/perc_w_network_panels_nPubs/'
    if not os.path.exists(out_path):
        os.makedirs(out_path)
    fig1.savefig(out_path + group + '_count.png')
    plt.close(fig1)
    del  stats_cits,load_cit,arrays_cits

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  """Entry point for launching an IPython kernel.


A Jupyter Widget




### Show cumulative data

In [19]:
for i in tqdm(range(nb_groups)):
    group = group_names[i]   
    disc_group = disc_groups[group]
    arrays_refs = {}
    arrays_cits = {}
    fig1, ax1 = plt.subplots(4, 4,figsize=(25,25))
    fig2, ax2 = plt.subplots(4, 4,figsize=(25,25))
    nb_pubs = []
    for type_cit in types_cit:
        for disc in disc_group:
            load_ref = sparse.load_npz('{}/arrays/references_{}_{}_array_norm.npz'.format(path,type_cit,disc)).todense()
            load_cit = sparse.load_npz('{}/arrays/citations_{}_{}_array_norm.npz'.format(path,type_cit,disc)).todense()
            if type_cit == 'self':
                nb_auts = load_cit.shape[0]
                nb_pubs = nb_pubs + [authors_info[authors_disc_idx[disc][x]][2] for x in range(nb_auts)]
            if type_cit in arrays_cits: 
                arrays_cits[type_cit] = np.concatenate([arrays_cits[type_cit],load_cit],axis=0)
                arrays_refs[type_cit] = np.concatenate([arrays_refs[type_cit],load_ref],axis=0)
            else:
                arrays_cits[type_cit] = load_cit
                arrays_refs[type_cit] = load_ref
    

    for disc in disc_group:
        load_ref = sparse.load_npz('{}/arrays/references_self_{}_network_array_norm.npz'.format(path,disc)).todense()
        load_cit = sparse.load_npz('{}/arrays/citations_self_{}_network_array_norm.npz'.format(path,disc)).todense()

        if 'self_net' in arrays_cits:
            arrays_cits['self_net'] = np.concatenate([arrays_cits['self_net'],
                                                   load_cit],axis=0)
            arrays_refs['self_net'] = np.concatenate([arrays_refs['self_net'],
                                                   load_ref],axis=0)
        else:
            arrays_cits['self_net'] = load_cit
            arrays_refs['self_net'] = load_ref
    del load_ref,load_cit
    nb_pubs = np.array(nb_pubs)
    row_i = 0
    col_i = 0
    for j in range(nb_plots):
        bin_plot = str(labels[j])
        bin_idx = np.where(nb_pubs==bin_plot)[0]
        arrays_refs_bin = {}
        arrays_cits_bin = {}
        for type_cit in types_cit + ['self_net']:
            arrays_refs_bin[type_cit] = arrays_refs[type_cit][bin_idx,:]
            arrays_cits_bin[type_cit] = arrays_cits[type_cit][bin_idx,:]
        for type_cit in types_cit:
            arrays_cits_bin[type_cit] = np.cumsum(arrays_cits_bin[type_cit],axis=1)
            arrays_refs_bin[type_cit] = np.cumsum(arrays_refs_bin[type_cit],axis=1)
        total_refs = arrays_refs_bin['self'] + arrays_refs_bin['co'] + arrays_refs_bin['others'] 
        total_cits =  arrays_cits_bin['self'] + arrays_cits_bin['co'] + arrays_cits_bin['others'] 
        total_refs_cumul = np.sum(total_refs,axis=1)
        total_cits_cumul = np.sum(total_cits,axis=1)
        rows_refs = np.where(total_refs_cumul)[0]
        rows_cits = np.where(total_cits_cumul)[0]

        plot_types = ['self','co','self_net']
        stats_refs = {}
        stats_cits = {}

        for type_cit in plot_types:
            arrays_refs_bin[type_cit + '_perc_year'] = np.zeros(arrays_refs_bin[type_cit].shape) 
            arrays_cits_bin[type_cit + '_perc_year'] = np.zeros(arrays_cits_bin[type_cit].shape) 

            idx_cits = total_cits.nonzero()
            idx_refs = total_refs.nonzero()
            arrays_refs_bin[type_cit + '_perc_year'][idx_refs] = (arrays_refs_bin[type_cit][idx_refs]/total_refs[idx_refs])*100
            arrays_cits_bin[type_cit + '_perc_year'][idx_cits] = (arrays_cits_bin[type_cit][idx_cits]/total_cits[idx_cits])*100



            count_cits = np.count_nonzero(total_cits,axis=0)
            idx_count_cits = np.where(count_cits)[1]
            stats_cits[type_cit + '_perc_year'] = np.zeros(nb_years)
            stats_cits[type_cit + '_perc_year'][idx_count_cits] = np.divide(np.sum(arrays_cits_bin[type_cit + '_perc_year'],axis=0)[idx_count_cits],
                                                                           count_cits[0,idx_count_cits])


            count_refs = np.count_nonzero(total_refs,axis=0)
            idx_count_refs = np.where(count_refs)[1]
            stats_refs[type_cit + '_perc_year'] = np.zeros(nb_years)
            stats_refs[type_cit + '_perc_year'][idx_count_refs] = np.divide(np.sum(arrays_refs_bin[type_cit + '_perc_year'],axis=0)[idx_count_refs],
                                                                            count_refs[0,idx_count_refs])                                   




        ax1[row_i,col_i].plot(stats_cits['self_perc_year'][:37],'r',label='% self citations',linewidth=2)
        ax1[row_i,col_i].plot(stats_cits['co_perc_year'][:37],'r--',label='% coauthors citations',linewidth=2)
        ax1[row_i,col_i].plot(stats_cits['self_net_perc_year'][:37],'r-.',label='% network citations',linewidth=2)
        ax1[row_i,col_i].plot(stats_refs['self_perc_year'][:37],'g',label='% self references',linewidth=2)
        ax1[row_i,col_i].plot(stats_refs['co_perc_year'][:37],'g--',label='% coauthors references',linewidth=2)
        ax1[row_i,col_i].plot(stats_refs['self_net_perc_year'][:37],'g-.',label='% network references',linewidth=2)




        ax2[row_i,col_i].plot(stats_cits['self_perc_year'][:37] + stats_cits['co_perc_year'][:37] ,'b',label='% citations',linewidth=2)
        ax2[row_i,col_i].plot(stats_cits['self_perc_year'][:37] + stats_cits['co_perc_year'][:37] + stats_cits['self_net_perc_year'][:37],
                      'b--',label='% citations w\\ network',linewidth=2)
        ax2[row_i,col_i].plot(stats_refs['self_perc_year'][:37]+stats_refs['co_perc_year'][:37],'orange',label = '% references',linewidth=2)
        ax2[row_i,col_i].plot(stats_refs['self_perc_year'][:37] + stats_refs['co_perc_year'][:37] + stats_refs['self_net_perc_year'][:37],
                          color='orange',linestyle='--',label='% references w\\ network',linewidth=2)


        ax1[row_i,col_i].set_title(str(labels[j]) + ' papers',fontsize=20)
        ax2[row_i,col_i].set_title(str(labels[j]) + ' papers',fontsize=20)
        if col_i == 0:
            ax1[row_i,col_i].set_ylabel('%',fontsize=20)
            ax2[row_i,col_i].set_ylabel('%',fontsize=20)
        if row_i == 3:
            ax1[row_i,col_i].set_xlabel('Academic age',fontsize=20)
            ax2[row_i,col_i].set_xlabel('Academic age',fontsize=20)
        if j == 0:
            ax1[row_i,col_i].legend()
            ax2[row_i,col_i].legend()
            ax1[row_i,col_i].legend(prop=dict(size=15))
            ax2[row_i,col_i].legend(prop=dict(size=15))
        ax1[row_i,col_i].tick_params(labelsize=18)
        ax2[row_i,col_i].tick_params(labelsize=18)

        col_i += 1
        if col_i == 4:
            col_i = 0
            row_i+=1
    fig1.suptitle(group, fontsize=25)
    fig2.suptitle(group, fontsize=25)
    
    out_path = path+'results/perc_w_network_panels_nPubs/'
    if not os.path.exists(out_path):
        os.makedirs(out_path)
    fig1.savefig(out_path + group + '_cumul.png')
    fig2.savefig(out_path + group + '_cumul.svg')

    plt.close(fig1)
    plt.close(fig2)
    del stats_refs, stats_cits,arrays_cits,arrays_refs

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  """Entry point for launching an IPython kernel.


A Jupyter Widget




In [18]:
for i in tqdm(range(nb_groups)):
    group = group_names[i]   
    disc_group = disc_groups[group]
    arrays_refs = {}
    arrays_cits = {}
    
    for type_cit in types_cit:
        for disc in disc_group:
            load_ref = sparse.load_npz('{}/arrays/references_{}_{}_array_norm.npz'.format(path,type_cit,disc)).todense()
            load_cit = sparse.load_npz('{}/arrays/citations_{}_{}_array_norm.npz'.format(path,type_cit,disc)).todense()
            nb_auts = load_cit.shape[0]
            nb_pubs = np.array([authors_info[authors_disc_idx[disc][x]][2] for x in range(nb_auts)])
            no_min_pub = np.where(nb_pubs<min_pubs)[0]
            load_cit[no_min_pub,:] = 0 

            if type_cit in arrays_cits: 
                arrays_cits[type_cit] = np.concatenate([arrays_cits[type_cit],load_cit],axis=0)
                arrays_refs[type_cit] = np.concatenate([arrays_refs[type_cit],load_ref],axis=0)
            else:
                arrays_cits[type_cit] = load_cit
                arrays_refs[type_cit] = load_ref
            
    for disc in disc_group:
        load_ref = sparse.load_npz('{}/arrays/references_self_{}_network_array_norm.npz'.format(path,disc)).todense()
        load_cit = sparse.load_npz('{}/arrays/citations_self_{}_network_array_norm.npz'.format(path,disc)).todense()
        nb_auts = load_cit.shape[0]
        nb_pubs = np.array([authors_info[authors_disc_idx[disc][x]][2] for x in range(nb_auts)])
        no_min_pub = np.where(nb_pubs<min_pubs)[0]
        load_cit[no_min_pub,:] = 0 
        if 'self_net' in arrays_cits:
            arrays_cits['self_net'] = np.concatenate([arrays_cits['self_net'],
                                                   load_cit],axis=0)
            arrays_refs['self_net'] = np.concatenate([arrays_refs['self_net'],
                                                   load_ref],axis=0)
        else:
            arrays_cits['self_net'] = load_cit
            arrays_refs['self_net'] = load_ref
            
    for type_cit in types_cit:
        arrays_cits[type_cit] = np.cumsum(arrays_cits[type_cit],axis=1)
        arrays_refs[type_cit] = np.cumsum(arrays_refs[type_cit],axis=1)
        
    #arrays_cits['self_net'] = sparse.load_npz('{}/arrays/citations_self_{}_network_array_norm.npz'.format(path,disc)).todense()
    #arrays_refs['self_net'] = sparse.load_npz('{}/arrays/references_self_{}_network_array_norm.npz'.format(path,disc)).todense()
    total_refs = arrays_refs['self'] + arrays_refs['co'] + arrays_refs['others'] 
    total_cits =  arrays_cits['self'] + arrays_cits['co'] + arrays_cits['others'] 
    total_refs_cumul = np.sum(total_refs,axis=1)
    total_cits_cumul = np.sum(total_cits,axis=1)
    rows_refs = np.where(total_refs_cumul)[0]
    rows_cits = np.where(total_cits_cumul)[0]
    
    plot_types = ['self','co','self_net']
    stats_refs = {}
    stats_cits = {}
        
    for type_cit in plot_types:
        arrays_refs[type_cit + '_perc_year'] = np.zeros(arrays_refs[type_cit].shape) 
        arrays_cits[type_cit + '_perc_year'] = np.zeros(arrays_cits[type_cit].shape) 
        
        idx_cits = total_cits.nonzero()
        idx_refs = total_refs.nonzero()
        arrays_refs[type_cit + '_perc_year'][idx_refs] = (arrays_refs[type_cit][idx_refs]/total_refs[idx_refs])*100
        arrays_cits[type_cit + '_perc_year'][idx_cits] = (arrays_cits[type_cit][idx_cits]/total_cits[idx_cits])*100
        

     
        count_cits = np.count_nonzero(total_cits,axis=0)
        idx_count_cits = np.where(count_cits)[1]
        stats_cits[type_cit + '_perc_year'] = np.zeros(nb_years)
        stats_cits[type_cit + '_perc_year'][idx_count_cits] = np.divide(np.sum(arrays_cits[type_cit + '_perc_year'],axis=0)[idx_count_cits],
                                                                       count_cits[0,idx_count_cits])
        
        
        count_refs = np.count_nonzero(total_refs,axis=0)
        idx_count_refs = np.where(count_refs)[1]
        stats_refs[type_cit + '_perc_year'] = np.zeros(nb_years)
        stats_refs[type_cit + '_perc_year'][idx_count_refs] = np.divide(np.sum(arrays_refs[type_cit + '_perc_year'],axis=0)[idx_count_refs],
                                                                        count_refs[0,idx_count_refs])                                     

    
    
    plt.figure(figsize=(15,6))
    plt.subplot(121)
    plt.plot(stats_cits['self_perc_year'][:37],'r',label='% self citations',linewidth=2)
    plt.plot(stats_cits['co_perc_year'][:37],'r--',label='% coauthors citations',linewidth=2)
    plt.plot(stats_cits['self_net_perc_year'][:37],'r-.',label='% network citations',linewidth=2)
    plt.plot(stats_refs['self_perc_year'][:37],'g',label='% self references',linewidth=2)
    plt.plot(stats_refs['co_perc_year'][:37],'g--',label='% coauthors references',linewidth=2)
    plt.plot(stats_refs['self_net_perc_year'][:37],'g-.',label='% network references',linewidth=2)
    plt.title(group,fontsize=20)
    plt.ylabel('%',fontsize=20)
    plt.xlabel('Year since first publication',fontsize=20)
    plt.legend()
    plt.tick_params(labelsize=18)


    plt.subplot(122)
    plt.plot(stats_cits['self_perc_year'][:37] + stats_cits['co_perc_year'][:37] ,'b',label='% citations',linewidth=2)
    plt.plot(stats_cits['self_perc_year'][:37] + stats_cits['co_perc_year'][:37] + stats_cits['self_net_perc_year'][:37],
             'b--',label='% citations w\\ network',linewidth=2)
    plt.plot(stats_refs['self_perc_year'][:37]+stats_refs['co_perc_year'][:37],'orange',label = '% references',linewidth=2)
    plt.plot(stats_refs['self_perc_year'][:37] + stats_refs['co_perc_year'][:37] + stats_refs['self_net_perc_year'][:37],
             color='orange',linestyle='--',label='% references w\\ network',linewidth=2)
    plt.title(group,fontsize=20)
    plt.ylabel('%',fontsize=20)
    plt.xlabel('Year since first publication',fontsize=20)
    plt.legend()
    plt.tick_params(labelsize=18)
    
    
    plt.legend()
    out_path = path+'results/perc_cumulative_w_network_min{}pubs/'.format(min_pubs)
    #plt.show()
    if not os.path.exists(out_path):
        os.makedirs(out_path)
    plt.savefig(out_path + group + '.png')
    plt.savefig(out_path + group + '.svg')

    plt.close()

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  """Entry point for launching an IPython kernel.


A Jupyter Widget

TypeError: '<' not supported between instances of 'numpy.ndarray' and 'int'