In [2]:
import os
import pandas as pd

data_dir = 'data'
results_dir = 'results'
metrics_file = 'metrics.csv'
affiliations = [
    folder
    for folder in os.listdir(data_dir)
    if not folder.startswith('.') and
        os.path.isdir(os.path.join(data_dir, folder))
]
metrics_files = [
    os.path.join(results_dir, affiliation, metrics_file)
    for affiliation in affiliations
]

affiliation_acronyms = {
    'Australian National University': 'ANU',
    'University of Adelaide': 'UoA',
    'Monash University': 'Monash',
    'University of New South Wales': 'UNSW',
    'University of Queensland': 'UQ',
    'University of Western Australia': 'UWA',
    'University of Sydney': 'USyd',
    'University of Melbourne': 'UniMelb',
}

In [11]:
affiliation_dfs = []
top5 = ['UniMelb', 'USyd', 'UNSW', 'ANU', 'Monash']

for file in metrics_files:
    uni_acronym = affiliation_acronyms[file.split('/')[1]]
    if uni_acronym not in top5: continue
    affil_df = pd.read_csv(file, usecols=lambda col: col != 'Name')
    affil_df['University'] = uni_acronym
    affil_df['% first, single, last authorship'] = affil_df['% first author'] + affil_df['% single author'] + affil_df['% last author']
    affil_df.rename(columns={'h-leadership-index': 'hl-index', 'Median number of Authors': 'Median number of co-authors'}, inplace=True)
    affil_df = affil_df[affil_df['h-index'] > 10]
    affil_df = affil_df.sample(10)
    affil_df['Median citations'] = affil_df['Median citations'].astype(int)
    affil_df['Median author position'] = affil_df['Median author position'].astype(int)
    affil_df['Median number of co-authors'] = affil_df['Median number of co-authors'].astype(int)
    affiliation_dfs.append(affil_df)

# Individual researcher metrics
metrics_df = pd.concat(affiliation_dfs)[['Median citations', 'h-index', 'hl-index', '% first, single, last authorship', 'Median author position', 'Median number of co-authors', 'University']]
metrics_df = metrics_df.reset_index(drop=True)
print(metrics_df.to_latex(index=False).replace('0000', ''))

\begin{tabular}{rrrrrrl}
\toprule
Median citations & h-index & hl-index & % first, single, last authorship & Median author position & Median number of co-authors & University \\
\midrule
19 & 32 & 26 & 27.10 & 4 & 9 & UniMelb \\
27 & 57 & 39 & 30.87 & 7 & 10 & UniMelb \\
11 & 38 & 32 & 38.59 & 3 & 6 & UniMelb \\
43 & 126 & 126 & 58.88 & 3 & 5 & UniMelb \\
23 & 104 & 104 & 62.97 & 3 & 4 & UniMelb \\
36 & 83 & 83 & 44.80 & 5 & 7 & UniMelb \\
36 & 101 & 101 & 71.80 & 3 & 4 & UniMelb \\
30 & 116 & 116 & 85.75 & 2 & 4 & UniMelb \\
51 & 134 & 134 & 76.68 & 5 & 6 & UniMelb \\
24 & 64 & 64 & 46.54 & 2 & 4 & UniMelb \\
7 & 61 & 61 & 121.38 & 3 & 3 & USyd \\
25 & 99 & 99 & 57.65 & 3 & 4 & USyd \\
8 & 26 & 20 & 34.05 & 7 & 18 & USyd \\
13 & 12 & 12 & 36.36 & 3 & 7 & USyd \\
38 & 120 & 116 & 29.62 & 4 & 7 & USyd \\
14 & 103 & 102 & 41.59 & 3 & 6 & USyd \\
6 & 79 & 79 & 83.04 & 4 & 4 & USyd \\
25 & 122 & 121 & 27.65 & 6 & 15 & USyd \\
15 & 62 & 59 & 32.71 & 4 & 7 & USyd \\
26 & 101 & 101 & 55.07 & 

In [16]:
metrics_df['% first, single, last authorship'].min()

0.89

In [2]:
affiliation_dfs = [
    pd.read_csv(file, usecols=lambda col: col != 'Name')
    for file in metrics_files
]
agg_df = pd.DataFrame([
    df.mean().round(2)
    for df in affiliation_dfs
], index=affiliations)
agg_df

Unnamed: 0,Publications,Total citations,Median citations,h-index,h-frac-index,hm-index,h-leadership-index,% first author,% last author,% single author,Median author position,i10-index,Average number of Authors,Median number of Authors
University of Melbourne,375.75,38062.45,31.55,77.71,18.39,29.24,73.92,14.93,32.92,3.37,4.32,266.8,14.55,10.77
University of Sydney,552.2,45958.72,24.94,86.06,16.54,32.3,75.88,15.99,33.59,6.33,3.91,369.44,16.28,12.41
University of Western Australia,348.3,36619.64,32.82,77.66,15.4,28.06,68.78,14.0,31.13,3.73,5.67,254.82,18.99,18.33
Australian National University,296.06,35000.98,36.16,67.6,17.9,23.46,53.2,18.3,32.13,8.15,3.52,201.14,24.47,24.87
University of Adelaide,418.46,38886.76,37.0,80.84,19.32,27.94,68.64,14.75,29.71,4.8,5.46,288.04,22.56,24.57
University of Queensland,442.24,52833.36,38.58,85.74,19.92,32.8,79.26,13.82,32.49,2.69,4.09,319.94,15.85,9.36
Monash University,481.06,47674.3,22.67,75.16,14.4,25.46,63.78,14.35,27.8,3.13,6.08,323.04,21.3,18.08
University of New South Wales,449.68,41158.32,25.59,80.1,18.26,34.34,77.02,21.46,33.79,6.04,3.6,306.96,10.6,5.68


In [14]:
print(agg_df.to_latex())

\begin{tabular}{lrrrrrrrrrrrrrr}
\toprule
 & Publications & Total citations & Median citations & h-index & h-frac-index & hm-index & h-leadership-index & % first author & % last author & % single author & Median author position & i10-index & Average number of Authors & Median number of Authors \\
\midrule
Australian National University & 296.060000 & 35000.980000 & 36.160000 & 67.600000 & 17.900000 & 23.460000 & 53.200000 & 18.300000 & 32.130000 & 8.150000 & 3.520000 & 201.140000 & 24.470000 & 24.870000 \\
University of Adelaide & 418.460000 & 38886.760000 & 37.000000 & 80.840000 & 19.320000 & 27.940000 & 68.640000 & 14.750000 & 29.710000 & 4.800000 & 5.460000 & 288.040000 & 22.560000 & 24.570000 \\
Monash University & 481.060000 & 47674.300000 & 22.670000 & 75.160000 & 14.400000 & 25.460000 & 63.780000 & 14.350000 & 27.800000 & 3.130000 & 6.080000 & 323.040000 & 21.300000 & 18.080000 \\
University of New South Wales & 449.680000 & 41158.320000 & 25.590000 & 80.100000 & 18.260000 & 34.