In [1]:
import pandas as pd
from tables import *
import numpy as np
from tqdm import tqdm_notebook as tqdm
import csv
from scipy import sparse
import pickle
import os

In [2]:
path = '/home/User1/data/self_citations/'

In [14]:
pub_years = np.array([1988,1998])
# pub_years = np.array([1990,1991])

version = '{}-{}'.format(pub_years[0],pub_years[1])
max_year = 2018
nb_years = 21

In [4]:
disciplines = pd.read_csv(path+'liste_discipline.txt',encoding='latin_1',sep='\t')
list_disciplines = pd.unique(disciplines['EDiscipline'])
nb_disciplines = len(list_disciplines)

In [5]:
from matplotlib import pyplot as plt

In [6]:
types_cit = {'self':0,'co':1,'network':2,'others':3}

In [7]:
authors_info = {}
with open(path+'model/authors_infos_model.csv','r') as f:
    reader = csv.reader(f,delimiter='\t')
    next(reader)
    for line in reader:
        authors_info[int(line[0])] = [line[1],int(line[2]),int(line[4])]

In [8]:
with open('{}/citations_{}.p'.format(path,version),'rb') as f:
    arrays_cits = pickle.load(f)

In [9]:
disc_groups = {'Arts and Humanities':['Arts','Humanities'],
              'Natural Sciences and Engineering':['Biology','Physics','Mathematics','Engineering and Technology','Earth and Space','Chemistry'],
              'Social Sciences':['Social Sciences','Psychology','Professional Fields','Health'],
             'Health Sciences':['Biomedical Research','Clinical Medicine']}

nb_groups = len(disc_groups)
group_names = list(disc_groups.keys())


In [10]:
authors = list(arrays_cits.keys())
nb_authors = len(authors)
map_discs = {}
for group in group_names:
    disciplines = disc_groups[group]
    for disc in disciplines:
        map_discs[disc] = group

In [11]:
authors_group = {x:[] for x in group_names}
for i in range(nb_authors):
    author_ID = authors[i]
    disc_author = authors_info[author_ID][0]
    if disc_author != 'Unknown':
        group_author = map_discs[disc_author]
        authors_group[group_author].append(author_ID)
    
    

In [12]:
colors = [
     np.array([53,116,172]),
    np.array([44,164,40]),
        
         np.array([164,186,183]),
          np.array([255,232,31])
         ]
colors = [x/255 for x in colors]

### With network and condensed disc

#### Freq

In [15]:
plt.figure(figsize=(15,15))
for i in tqdm(range(nb_groups)):
    group = group_names[i]   
    authors = authors_group[group]
    nb_authors = len(authors)
    all_cits = np.zeros((4,nb_years,nb_authors))
    for j in range(nb_authors):
        author_ID = authors[j]
        arts = list(arrays_cits[author_ID].keys())
        nb_arts = len(arts)
        cits_authors = np.zeros((4,nb_years,nb_arts))
        for k in range(nb_arts):
            art_cite = arts[k]
            cits_authors[:,:,k] = arrays_cits[author_ID][art_cite]
        all_cits[:,:,j] = np.mean(cits_authors,axis=2)

    avg_cits = np.mean(all_cits,axis=2)
    avg_cits = avg_cits[:,:-1]


    plt.subplot(2,2,i+1)
    plt.plot(avg_cits[0,:],'r',label='Direct',linewidth=3)
    plt.plot(avg_cits[1,:],'r--',label='Coauthors',linewidth=3)
    plt.plot(avg_cits[2,:],'r-.',label='Collaborators',linewidth=3)
    plt.plot(avg_cits[3,:],'k',label='Others',linewidth=3)
    plt.ylabel('# citations',fontsize=20)
    plt.legend(loc='upper left')
    plt.tick_params(labelsize=18)
    plt.title(group,fontsize=20)
    
#plt.show()
out_path = path+'results/cits_dynamic_pub_age/'
if not os.path.exists(out_path):
    os.makedirs(out_path)
plt.savefig(out_path + 'abs_{}.png'.format(version))
plt.savefig(out_path + 'abs_{}.svg'.format(version))
plt.close()


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


A Jupyter Widget




#### NOrmalise

In [16]:
plt.figure(figsize=(15,15))
for i in tqdm(range(nb_groups)):
    group = group_names[i]   
    authors = authors_group[group]
    nb_authors = len(authors)
    all_cits = np.zeros((4,nb_years,nb_authors))
    for j in range(nb_authors):
        author_ID = authors[j]
        arts = list(arrays_cits[author_ID].keys())
        nb_arts = len(arts)
        cits_authors = np.zeros((4,nb_years,nb_arts))
        for k in range(nb_arts):
            art_cite = arts[k]
            cits_authors[:,:,k] = arrays_cits[author_ID][art_cite]
        all_cits[:,:,j] = np.mean(cits_authors,axis=2)

    avg_cits = np.mean(all_cits,axis=2)
    avg_cits = avg_cits[:,:-1]
    max_rows = np.max(avg_cits,axis=1)
    min_rows = np.min(avg_cits,axis=1)
    stats_cits = np.subtract(avg_cits,min_rows[:,None])/np.subtract(max_rows,min_rows)[:,None]
    

    plt.subplot(2,2,i+1)
    plt.plot(stats_cits[0,:],'r',label='Direct',linewidth=3)
    plt.plot(stats_cits[1,:],'r--',label='Coauthors',linewidth=3)
    plt.plot(stats_cits[2,:],'r-.',label='Collaborators',linewidth=3)
    plt.plot(stats_cits[3,:],'k',label='Others',linewidth=3)
    plt.ylabel('Norm. citations',fontsize=20)
    plt.xlabel('Publication age',fontsize=20)
    plt.legend(loc='upper left')
    plt.tick_params(labelsize=18)
    plt.title(group,fontsize=20)
    
#plt.show()
out_path = path+'results/cits_dynamic_pub_age/'
if not os.path.exists(out_path):
    os.makedirs(out_path)
plt.savefig(out_path + 'norm_{}.png'.format(version))
plt.savefig(out_path + 'norm_{}.svg'.format(version))
plt.close()

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


A Jupyter Widget




#### Perc


In [17]:
plt.figure(figsize=(15,15))
for i in tqdm(range(nb_groups)):
    group = group_names[i]   
    authors = authors_group[group]
    nb_authors = len(authors)
    all_cits = np.zeros((4,nb_years,nb_authors))
    for j in range(nb_authors):
        author_ID = authors[j]
        arts = list(arrays_cits[author_ID].keys())
        nb_arts = len(arts)
        cits_authors = np.zeros((4,nb_years,nb_arts))
        for k in range(nb_arts):
            art_cite = arts[k]
            cits_authors[:,:,k] = arrays_cits[author_ID][art_cite]
        all_cits[:,:,j] = np.mean(cits_authors,axis=2)

    avg_cits = np.mean(all_cits,axis=2)
    avg_cits = avg_cits[:,:-1]
    total_cits = np.sum(avg_cits,axis=0)
    stats_cits = avg_cits/total_cits[None,:]
    

    plt.subplot(2,2,i+1)
    plt.plot(stats_cits[0,:],'r',label='Direct',linewidth=3)
    plt.plot(stats_cits[1,:],'r--',label='Coauthors',linewidth=3)
    plt.plot(stats_cits[2,:],'r-.',label='Collaborators',linewidth=3)
    plt.plot(stats_cits[3,:],'k',label='Others',linewidth=3)
    plt.ylabel('% citations',fontsize=20)
    plt.xlabel('Publication age',fontsize=20)
    plt.legend(loc='upper left')
    plt.tick_params(labelsize=18)
    plt.title(group,fontsize=20)
    
#plt.show()
out_path = path+'results/cits_dynamic_pub_age/'
if not os.path.exists(out_path):
    os.makedirs(out_path)
plt.savefig(out_path + 'perc_{}.png'.format(version))
plt.savefig(out_path + 'perc_{}.svg'.format(version))
plt.close()

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


A Jupyter Widget


