In [1]:
import os
import pandas as pd
import numpy as np
from collections import defaultdict
from tqdm.notebook import tqdm

### hyperparam

In [2]:
year = 2019

In [3]:
df = pd.read_csv(f'../result_{year}.csv')

In [4]:
df.head()

Unnamed: 0,centrality,name,group,country,organization
0,0.991648,USA,Country,,
1,0.050982,NBER,FullOrgName,USA,
2,0.026058,WORLD DEVELOPMENT,journaltitle,,
3,0.023323,APPLIED ECONOMICS,journaltitle,,
4,0.023008,JOURNAL OF ECONOMIC BEHAVIOR & ORGANIZATION,journaltitle,,


### Country ranking

In [5]:
topk = 10
country = df[df['group'] == 'Country']
country.head(topk)

Unnamed: 0,centrality,name,group,country,organization
0,0.991648,USA,Country,,
128,0.004464,Peoples R China,Country,,
254,0.002145,England,Country,,
278,0.001827,Germany,Country,,
362,0.001212,Australia,Country,,
368,0.001142,Italy,Country,,
437,0.000875,France,Country,,
462,0.000748,Spain,Country,,
488,0.000723,Canada,Country,,
546,0.000565,Netherlands,Country,,


### journal ranking

In [6]:
# centrality & h-index & impact_factor
topk = 10
journal = df[df['group'] == 'journaltitle'].reset_index()
journal = journal.drop(columns=['country', 'organization', 'index'])

# impact factor
impact_factor = pd.read_csv('../impact_factor/simple.csv')
h_index = pd.read_csv('../impact_factor/hindex.csv')

def ff(_df):
    def f(x):
        try:
            if type(x) is str:
                res = _df[_df['Journal'].apply(lambda y: x.lower() in y.lower())]
                return res.iloc[0][0]
            else:
                return np.nan
        except Exception as e:
            return np.nan
    return f

journal['impact_factor_optimal_rank'] = journal['name'].apply(ff(impact_factor))
journal['h_index_optimal_rank'] = journal['name'].apply(ff(h_index))

journal.head(topk)

Unnamed: 0,centrality,name,group,impact_factor_optimal_rank,h_index_optimal_rank
0,0.026058,WORLD DEVELOPMENT,journaltitle,112.0,34.0
1,0.023323,APPLIED ECONOMICS,journaltitle,18.0,49.0
2,0.023008,JOURNAL OF ECONOMIC BEHAVIOR & ORGANIZATION,journaltitle,117.0,44.0
3,0.021245,JOURNAL OF FINANCIAL ECONOMICS,journaltitle,5.0,6.0
4,0.020849,AMERICAN ECONOMIC REVIEW,journaltitle,8.0,1.0
5,0.018919,REVIEW OF FINANCIAL STUDIES,journaltitle,7.0,14.0
6,0.018888,APPLIED ECONOMICS LETTERS,journaltitle,470.0,210.0
7,0.016649,ECONOMICS LETTERS,journaltitle,183.0,39.0
8,0.015431,JOURNAL OF CONSUMER AFFAIRS,journaltitle,667.0,668.0
9,0.014778,ENVIRONMENTAL & RESOURCE ECONOMICS,journaltitle,130.0,110.0


In [7]:
journal.sort_values(by=['impact_factor_optimal_rank']).head(topk)

Unnamed: 0,centrality,name,group,impact_factor_optimal_rank,h_index_optimal_rank
129,0.002278,QUARTERLY JOURNAL OF ECONOMICS,journaltitle,1.0,2.0
245,0.000522,JOURNAL OF ECONOMICS,journaltitle,1.0,2.0
147,0.001773,JOURNAL OF ECONOMIC LITERATURE,journaltitle,2.0,12.0
203,0.001033,JOURNAL OF ECONOMIC GROWTH,journaltitle,4.0,58.0
3,0.021245,JOURNAL OF FINANCIAL ECONOMICS,journaltitle,5.0,6.0
39,0.006962,JOURNAL OF ECONOMIC PERSPECTIVES,journaltitle,6.0,10.0
5,0.018919,REVIEW OF FINANCIAL STUDIES,journaltitle,7.0,14.0
4,0.020849,AMERICAN ECONOMIC REVIEW,journaltitle,8.0,1.0
16,0.011486,JOURNAL OF POLITICAL ECONOMY,journaltitle,9.0,4.0
18,0.011383,JOURNAL OF FINANCE,journaltitle,10.0,3.0


In [8]:
journal.sort_values(by=['h_index_optimal_rank']).head(topk)

Unnamed: 0,centrality,name,group,impact_factor_optimal_rank,h_index_optimal_rank
4,0.020849,AMERICAN ECONOMIC REVIEW,journaltitle,8.0,1.0
245,0.000522,JOURNAL OF ECONOMICS,journaltitle,1.0,2.0
129,0.002278,QUARTERLY JOURNAL OF ECONOMICS,journaltitle,1.0,2.0
18,0.011383,JOURNAL OF FINANCE,journaltitle,10.0,3.0
16,0.011486,JOURNAL OF POLITICAL ECONOMY,journaltitle,9.0,4.0
3,0.021245,JOURNAL OF FINANCIAL ECONOMICS,journaltitle,5.0,6.0
11,0.013726,REVIEW OF ECONOMIC STUDIES,journaltitle,11.0,7.0
64,0.004805,JOURNAL OF MONETARY ECONOMICS,journaltitle,12.0,8.0
39,0.006962,JOURNAL OF ECONOMIC PERSPECTIVES,journaltitle,6.0,10.0
23,0.010246,REVIEW OF ECONOMICS AND STATISTICS,journaltitle,20.0,11.0


In [9]:
journal_rk = journal

### author ranking

In [10]:
topk = 10
author = df[df['group'] == 'Fullname']
author.head(topk)

Unnamed: 0,centrality,name,group,country,organization
319,0.001461,"Bahmani-Oskooee, Mohsen",Fullname,USA,Univ Wisconsin
344,0.001248,"Wohar, Mark E.",Fullname,USA,Univ Nebraska
382,0.001133,"Hassan, M. Kabir",Fullname,USA,Univ New Orleans
421,0.000931,"Goel, Rajeev K.",Fullname,USA,Illinois State Univ
449,0.000826,"Baltagi, Badi H.",Fullname,USA,Syracuse Univ
466,0.000733,"Samek, Anya",Fullname,USA,Univ Southern Calif
472,0.000728,"Hewings, Geoffrey J. D.",Fullname,USA,Univ Illinois
476,0.000727,"Hammoudeh, Shawkat",Fullname,USA,Drexel Univ
477,0.000727,"Johnston, Robert J.",Fullname,USA,Clark Univ
517,0.000624,"Munger, Michael C.",Fullname,USA,Duke Univ


### author ranking by country

In [11]:
countries = df[df['group'] == 'Country']['name'].tolist()
for country in countries:
    print(country)
    print('centrality = ', df[df['name'] == country]['centrality'].iloc[0], sep='')
    authors = df[(df['country'] == country) & (df['group'] == 'Fullname')]
    print(authors.sort_values(by='centrality', ascending=False).head())
    print('-'*50)

USA
centrality = 0.9916481863234224
     centrality                     name     group country  \
319    0.001461  Bahmani-Oskooee, Mohsen  Fullname     USA   
344    0.001248           Wohar, Mark E.  Fullname     USA   
382    0.001133         Hassan, M. Kabir  Fullname     USA   
421    0.000931          Goel, Rajeev K.  Fullname     USA   
449    0.000826         Baltagi, Badi H.  Fullname     USA   

            organization  
319       Univ Wisconsin  
344        Univ Nebraska  
382     Univ New Orleans  
421  Illinois State Univ  
449        Syracuse Univ  
--------------------------------------------------
Peoples R China
centrality = 0.004464139862145678
      centrality            name     group          country  \
9451    0.000019  Wang, Shouyang  Fullname  Peoples R China   
9453    0.000019        Ma, Feng  Fullname  Peoples R China   
9479    0.000018    Lin, Boqiang  Fullname  Peoples R China   
9638    0.000011       Yin, Libo  Fullname  Peoples R China   
9684    0.000

10357             Univ Otago  
--------------------------------------------------
Austria
centrality = 0.00017650285245771063
       centrality                    name     group  country  \
9867     0.000007     Kerschbamer, Rudolf  Fullname  Austria   
10241    0.000005          Kopel, Michael  Fullname  Austria   
10775    0.000003          Lackner, Mario  Fullname  Austria   
11090    0.000003           Loretz, Simon  Fullname  Austria   
11222    0.000003  Guentner, Jochen H. F.  Fullname  Austria   

                    organization  
9867              Univ Innsbruck  
10241    Karl Franzens Univ Graz  
10775  Johannes Kepler Univ Linz  
11090     Austrian Inst Econ Res  
11222  Johannes Kepler Univ Linz  
--------------------------------------------------
Belgium
centrality = 0.0001742889660085881
       centrality                name     group  country  \
10567    0.000004       Wang, Hwachyi  Fullname  Belgium   
10568    0.000004       Lauwers, Dirk  Fullname  Belgium   
11217

centrality = 5.066143317290507e-05
       centrality             name     group  country            organization
12815    0.000002      Dombi, Akos  Fullname  Hungary      Eotvos Lorand Univ
12816    0.000002    Dedak, Istvan  Fullname  Hungary  Eszterhazy Karoly Univ
12944    0.000002  Zrubka, Zsombor  Fullname  Hungary  Corvinus Univ Budapest
13167    0.000002    Pentek, Marta  Fullname  Hungary  Corvinus Univ Budapest
13168    0.000002  Gulacsi, Laszlo  Fullname  Hungary  Corvinus Univ Budapest
--------------------------------------------------
Argentina
centrality = 4.281750701955744e-05
       centrality               name     group    country  \
12286    0.000003    Elias, Julio J.  Fullname  Argentina   
12958    0.000002    Sigman, Mariano  Fullname  Argentina   
12959    0.000002  Galvez, Ramiro H.  Fullname  Argentina   
12960    0.000002      Shalom, Diego  Fullname  Argentina   
12961    0.000002      Freira, Lucia  Fullname  Argentina   

                 organization  
12

22676  Council Budget Responsibil  
--------------------------------------------------
Philippines
centrality = 1.594306711663569e-05
       centrality                       name     group      country  \
11817    0.000003                Balie, Jean  Fullname  Philippines   
12276    0.000003  Biona, Jose Bienvenido M.  Fullname  Philippines   
14126    0.000002             Park, Donghyun  Fullname  Philippines   
16587    0.000001            Fillone, Alexis  Fullname  Philippines   
16588    0.000001              Rith, Monorom  Fullname  Philippines   

            organization  
11817  Int Rice Res Inst  
12276   De La Salle Univ  
14126     Asian Dev Bank  
16587   De La Salle Univ  
16588   De La Salle Univ  
--------------------------------------------------
Egypt
centrality = 1.5317628477349487e-05
       centrality              name     group country     organization
10276    0.000005      Zaki, Chahir  Fullname   Egypt       Cairo Univ
12890    0.000002       Rizk, Reham  Fulln

37112             World Bank  
--------------------------------------------------
Tanzania
centrality = 5.373211833317926e-06
         centrality               name     group   country  \
11835  2.687446e-06  Komakech, Hans C.  Fullname  Tanzania   
11836  2.687446e-06    Mhede, Edwin P.  Fullname  Tanzania   
36087  1.110398e-09    Manyong, Victor  Fullname  Tanzania   
36192  5.552278e-10   Feleke, Shiferaw  Fullname  Tanzania   

                                    organization  
11835  Nelson Mandela African Inst Sci & Technol  
11836              Minist Ind Trade & Investment  
36087                                       IITA  
36192                                       IITA  
--------------------------------------------------
Sri Lanka
centrality = 4.8769147817167855e-06
         centrality                          name     group    country  \
15564  1.448231e-06                De Mel, Suresh  Fullname  Sri Lanka   
18718  9.082921e-07                de Mel, Suresh  Fullname  Sr

17213    0.000001  Ouedraogo, Jerome  Fullname   Niger        UNECA
--------------------------------------------------
Armenia
centrality = 1.1075288606697132e-06
         centrality                name     group  country       organization
21903  5.859654e-07  Barseghyan, Gayane  Fullname  Armenia  Amer Univ Armenia
22831  5.430449e-07   Khachatryan, Knar  Fullname  Armenia  Amer Univ Armenia
--------------------------------------------------
Panama
centrality = 7.944283084617845e-07
         centrality                        name     group country  \
19783  7.924233e-07          Hall, Jefferson S.  Fullname  Panama   
33471  6.603586e-08  de Sarralde, Santiago Diaz  Fullname  Panama   

                    organization  
19783  Smithsonian Trop Res Inst  
33471      Interamer Ctr Tax Adm  
--------------------------------------------------
Costa Rica
centrality = 6.206741807421912e-07
         centrality                name     group     country  \
25241  4.075096e-07  Cardoza, Guill

centrality = 1.1144341727858348e-08
         centrality                      name     group        country  \
35469  1.115477e-08  Dosa, Pacifique Mongongo  Fullname  DEM REP CONGO   

                 organization  
35469  Univ Nouveaux Horizons  
--------------------------------------------------
Iraq
centrality = 9.102113109189967e-10
         centrality            name     group country organization
36112  9.121532e-10  Taleb, Mushtaq  Fullname    Iraq   Univ Anbar
--------------------------------------------------
Dominican Rep
centrality = 6.970965576682598e-10
         centrality                        name     group        country  \
36149  7.281959e-10  Gomez-Luciano, Cristino A.  Fullname  Dominican Rep   

                                   organization  
36149  Inst Especializado Estudios Super Loyola  
--------------------------------------------------
Lesotho
centrality = 2.3178730979265256e-18
Empty DataFrame
Columns: [centrality, name, group, country, organization]
Inde

### author ranking by organization

In [12]:
topk = 50
orgs = df[df['group'] == 'FullOrgName']['name'].tolist()[:topk]
for org in orgs:
    print(org)
    print('centrality = ', df[df['name'] == org]['centrality'].iloc[0], sep='')
    authors = df[(df['organization'] == org) & (df['group'] == 'Fullname')]
    print(authors.sort_values(by='centrality', ascending=False).head())
    print('-'*50)

NBER
centrality = 0.050982293218899535
     centrality              name     group country organization
609    0.000434     Currie, Janet  Fullname     USA         NBER
610    0.000433    Neumark, David  Fullname     USA         NBER
733    0.000328   Bernstein, Shai  Fullname     USA         NBER
734    0.000327        He, Zhiguo  Fullname     USA         NBER
735    0.000326  Morales, Eduardo  Fullname     USA         NBER
--------------------------------------------------
World Bank
centrality = 0.021066145513592455
     centrality              name     group country organization
613    0.000424  Deininger, Klaus  Fullname     USA   World Bank
614    0.000422   McKenzie, David  Fullname     USA   World Bank
740    0.000316       Islam, Asif  Fullname     USA   World Bank
744    0.000316     Molini, Vasco  Fullname     USA   World Bank
745    0.000316   Mattoo, Aaditya  Fullname     USA   World Bank
--------------------------------------------------
Int Monetary Fund
centrality = 0.0

centrality = 0.007702256896655348
     centrality                 name     group country organization
555    0.000523    Cason, Timothy N.  Fullname     USA  Purdue Univ
622    0.000419      Lusk, Jayson L.  Fullname     USA  Purdue Univ
787    0.000313  Delgado, Michael S.  Fullname     USA  Purdue Univ
835    0.000312    Bauchet, Jonathan  Fullname     USA  Purdue Univ
883    0.000310      Martin, Stephen  Fullname     USA  Purdue Univ
--------------------------------------------------
Univ Texas Austin
centrality = 0.007588939640548182
      centrality                name     group country       organization
811     0.000312         Bhaskar, V.  Fullname     USA  Univ Texas Austin
1197    0.000209   Xiaolan, Mindy Z.  Fullname     USA  Univ Texas Austin
1356    0.000208    Thomas, Caroline  Fullname     USA  Univ Texas Austin
1576    0.000207  Traphagan, John W.  Fullname     USA  Univ Texas Austin
1583    0.000207         Han, Sukjin  Fullname     USA  Univ Texas Austin
-----------

1631    0.000207    Langan, Andrew  Fullname     USA  Princeton Univ
--------------------------------------------------
Univ Calif Davis
centrality = 0.005176274589279422
      centrality                 name     group country      organization
859     0.000311       Woo, Wing Thye  Fullname     USA  Univ Calif Davis
932     0.000309     Carter, Colin A.  Fullname     USA  Univ Calif Davis
1931    0.000206         Smith, Aaron  Fullname     USA  Univ Calif Davis
1972    0.000206  Swenson, Deborah L.  Fullname     USA  Univ Calif Davis
2740    0.000105     Rudnick, Jessica  Fullname     USA  Univ Calif Davis
--------------------------------------------------
Washington Univ
centrality = 0.005088199248602959
      centrality                     name     group country     organization
1288    0.000209         Roll, Stephen P.  Fullname     USA  Washington Univ
1347    0.000208       Sherraden, Michael  Fullname     USA  Washington Univ
1373    0.000208  Grinstein-Weiss, Michal  Fullname  

### org ranking

In [13]:
topk = 10
orgs = df[df['group'] == 'FullOrgName']
orgs.head(topk)

Unnamed: 0,centrality,name,group,country,organization
1,0.050982,NBER,FullOrgName,USA,
6,0.021066,World Bank,FullOrgName,USA,
10,0.016819,Int Monetary Fund,FullOrgName,USA,
12,0.016457,Stanford Univ,FullOrgName,USA,
18,0.012755,Univ Illinois,FullOrgName,USA,
19,0.01253,Univ Wisconsin,FullOrgName,USA,
20,0.012524,Harvard Univ,FullOrgName,USA,
21,0.012429,MIT,FullOrgName,USA,
23,0.011997,Univ Chicago,FullOrgName,USA,
24,0.011901,Univ Calif Berkeley,FullOrgName,USA,


### org ranking by country

In [14]:
countries = df[df['group'] == 'Country']['name'].tolist()
for country in countries:
    print(country)
    print('centrality = ', df[df['name'] == country]['centrality'].iloc[0], sep='')
    orgs = df[(df['country'] == country) & (df['group'] == 'FullOrgName')]
    print(orgs.sort_values(by='centrality', ascending=False).head())
    print('-'*50)

USA
centrality = 0.9916481863234224
    centrality               name        group country organization
1     0.050982               NBER  FullOrgName     USA          NaN
6     0.021066         World Bank  FullOrgName     USA          NaN
10    0.016819  Int Monetary Fund  FullOrgName     USA          NaN
12    0.016457      Stanford Univ  FullOrgName     USA          NaN
18    0.012755      Univ Illinois  FullOrgName     USA          NaN
--------------------------------------------------
Peoples R China
centrality = 0.004464139862145678
      centrality                              name        group  \
2160    0.000176                       Peking Univ  FullOrgName   
2165    0.000164          Univ Int Business & Econ  FullOrgName   
2167    0.000154  Southwestern Univ Finance & Econ  FullOrgName   
2169    0.000147                 Renmin Univ China  FullOrgName   
2170    0.000135                     Zhejiang Univ  FullOrgName   

              country organization  
2160  Peoples R

centrality = 0.00017650285245771063
      centrality                         name        group  country  \
9322    0.000032               Univ Innsbruck  FullOrgName  Austria   
9373    0.000025                  Univ Vienna  FullOrgName  Austria   
9439    0.000021    Johannes Kepler Univ Linz  FullOrgName  Austria   
9478    0.000018      Karl Franzens Univ Graz  FullOrgName  Austria   
9734    0.000009  Vienna Univ Econ & Business  FullOrgName  Austria   

     organization  
9322          NaN  
9373          NaN  
9439          NaN  
9478          NaN  
9734          NaN  
--------------------------------------------------
Belgium
centrality = 0.0001742889660085881
      centrality                    name        group  country organization
9259    0.000046              Univ Ghent  FullOrgName  Belgium          NaN
9292    0.000038  Katholieke Univ Leuven  FullOrgName  Belgium          NaN
9456    0.000019    Univ Libre Bruxelles  FullOrgName  Belgium          NaN
9565    0.000014   

centrality = 7.199063441513328e-05
       centrality                                      name        group  \
9522     0.000016               Bucharest Univ Econ Studies  FullOrgName   
9560     0.000014                         Babes Bolyai Univ  FullOrgName   
9862     0.000007  Univ Med Pharm Sci & Technol Tirgu Mures  FullOrgName   
9924     0.000007                   Politehn Univ Timisoara  FullOrgName   
10265    0.000005                       West Univ Timisoara  FullOrgName   

       country organization  
9522   Romania          NaN  
9560   Romania          NaN  
9862   Romania          NaN  
9924   Romania          NaN  
10265  Romania          NaN  
--------------------------------------------------
Vietnam
centrality = 6.740598526022735e-05
       centrality                        name        group  country  \
9610     0.000012          Ton Duc Thang Univ  FullOrgName  Vietnam   
10175    0.000005           RMIT Univ Vietnam  FullOrgName  Vietnam   
10314    0.000005  Ho

17593    0.000001              Univ Uyo  FullOrgName  Nigeria          NaN
--------------------------------------------------
Indonesia
centrality = 2.306723182699403e-05
       centrality                name        group    country organization
10056    0.000006      Univ Indonesia  FullOrgName  Indonesia          NaN
10114    0.000005          Univ Jambi  FullOrgName  Indonesia          NaN
11824    0.000003  Univ Palangka Raya  FullOrgName  Indonesia          NaN
13079    0.000002      SMERU Res Inst  FullOrgName  Indonesia          NaN
17372    0.000001      Univ Airlangga  FullOrgName  Indonesia          NaN
--------------------------------------------------
Peru
centrality = 2.0766558403542946e-05
       centrality                           name        group country  \
9937     0.000007                     Univ Piura  FullOrgName    Peru   
10848    0.000003  Pontificia Univ Catolica Peru  FullOrgName    Peru   
11826    0.000003                            CIP  FullOrgName    Per

37565          NaN  
--------------------------------------------------
Ecuador
centrality = 8.059362198528824e-06
         centrality                             name        group  country  \
11844  2.686883e-06              ESPOL Polytech Univ  FullOrgName  Ecuador   
13097  2.289438e-06             Inst Nacl Invest Agr  FullOrgName  Ecuador   
17732  1.072100e-06               Banco Cent Ecuador  FullOrgName  Ecuador   
19818  7.854837e-07                   Univ Nacl Loja  FullOrgName  Ecuador   
22712  5.524860e-07  Corp Estudios Desarrollo CORDES  FullOrgName  Ecuador   

      organization  
11844          NaN  
13097          NaN  
17732          NaN  
19818          NaN  
22712          NaN  
--------------------------------------------------
Cote Ivoire
centrality = 7.961168262223033e-06
         centrality                                     name        group  \
11846  2.686876e-06                     African Dev Bank Grp  FullOrgName   
12906  2.404939e-06                   

36104          NaN  
--------------------------------------------------
Bosnia & Herceg
centrality = 2.5244528336948813e-06
         centrality                name        group          country  \
13148  2.242409e-06      Int Burch Univ  FullOrgName  Bosnia & Herceg   
31893  1.081930e-07  Pavlov Int Bank Ad  FullOrgName  Bosnia & Herceg   
32497  9.816160e-08  Univ East Sarajevo  FullOrgName  Bosnia & Herceg   
33139  7.568896e-08       Univ Sarajevo  FullOrgName  Bosnia & Herceg   

      organization  
13148          NaN  
31893          NaN  
32497          NaN  
33139          NaN  
--------------------------------------------------
Monaco
centrality = 2.404365327202029e-06
       centrality             name        group country organization
12907    0.000002  Int Univ Monaco  FullOrgName  Monaco          NaN
--------------------------------------------------
Mauritius
centrality = 2.28608102368851e-06
         centrality                 name        group    country organization
1

centrality = 1.9269624092292836e-07
         centrality  name        group       country organization
29277  1.926962e-07  WAMA  FullOrgName  Sierra Leone          NaN
--------------------------------------------------
Senegal
centrality = 1.926962409215882e-07
         centrality                   name        group  country organization
29276  1.926962e-07  Univ Cheikh Anta Diop  FullOrgName  Senegal          NaN
--------------------------------------------------
Bahrain
centrality = 1.7281326556598597e-07
         centrality                     name        group  country  \
32508  9.791919e-08             Univ Bahrain  FullOrgName  Bahrain   
33166  7.489408e-08  Islamic Int Rating Agcy  FullOrgName  Bahrain   

      organization  
32508          NaN  
33166          NaN  
--------------------------------------------------
Rep of Georgia
centrality = 1.6121155106861545e-07
         centrality                        name        group         country  \
30205  1.612116e-07  Tbilisi St

## publication classified by country

In [15]:
raw_df = pd.read_csv(f'../../data/2021-12-16/csv/{year}.csv')[['Fullname', 'PaperTitle', 'journaltitle']]

In [16]:
sub_df = df.loc[df['group'] == 'Fullname', ['name', 'country']]
author_country_mapping = dict(zip(sub_df['name'], sub_df['country']))

In [17]:
stat = defaultdict(lambda: defaultdict(lambda: []))
for g in tqdm(raw_df.groupby('PaperTitle')):
    r = g[1].iloc[0]
    country = author_country_mapping[r['Fullname']]
    title = r['PaperTitle']
    journal = r['journaltitle']
    stat[country][journal].append(title)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16649.0), HTML(value='')))




In [20]:
country = 'USA'
res_df = pd.DataFrame([stat[country]])
print(f'journal title : number of paper in {country} in year {year}')
print('-'*20)
for k, v in sorted(stat[country].items(), key=lambda x: len(x[1]), reverse=True):
    print(k, len(v), sep=' : ')

journal title : number of paper in USA in year 2019
--------------------
APPLIED ECONOMICS LETTERS : 99
AMERICAN ECONOMIC REVIEW : 94
JOURNAL OF ECONOMIC BEHAVIOR & ORGANIZATION : 93
WORLD DEVELOPMENT : 90
APPLIED ECONOMICS : 89
ECONOMICS LETTERS : 83
JOURNAL OF FINANCIAL ECONOMICS : 83
REVIEW OF FINANCIAL STUDIES : 73
REVIEW OF ECONOMIC STUDIES : 58
GAMES AND ECONOMIC BEHAVIOR : 57
ENERGY ECONOMICS : 57
ENVIRONMENTAL & RESOURCE ECONOMICS : 55
JOURNAL OF FINANCE : 54
PUBLIC CHOICE : 52
ECONOMIC INQUIRY : 51
JOURNAL OF CONSUMER AFFAIRS : 51
JOURNAL OF POLITICAL ECONOMY : 49
REVIEW OF ECONOMICS AND STATISTICS : 45
JOURNAL OF PUBLIC ECONOMICS : 43
JOURNAL OF DEVELOPMENT STUDIES : 43
JOURNAL OF FINANCIAL AND QUANTITATIVE ANALYSIS : 42
JOURNAL OF DEVELOPMENT ECONOMICS : 42
ECONOMICS OF EDUCATION REVIEW : 42
JOURNAL OF ECONOMIC THEORY : 41
JOURNAL OF ENVIRONMENTAL ECONOMICS AND MANAGEMENT : 40
AMERICAN ECONOMIC JOURNAL-ECONOMIC POLICY : 40
JOURNAL OF ECONOMIC PERSPECTIVES : 37
SOUTHERN ECONO

In [21]:
data = [[k, len(v)] for k, v in stat[country].items()]
res_df = pd.DataFrame(data, columns=['journal title', 'count'])
res_df['centrality_rank'] = res_df['journal title'].apply(lambda x: journal_rk.loc[journal_rk['name'] == x, ['centrality']].index[0]+1)
res_df['impact-factor_rank'] = res_df['journal title'].apply(lambda x: journal_rk.loc[journal_rk['name'] == x, ['impact_factor_optimal_rank']].values[0][0])
res_df['h-index_rank'] = res_df['journal title'].apply(lambda x: journal_rk.loc[journal_rk['name'] == x, ['h_index_optimal_rank']].values[0][0])
res_df.sort_values(by=['count'], ascending=False)

Unnamed: 0,journal title,count,centrality_rank,impact-factor_rank,h-index_rank
43,APPLIED ECONOMICS LETTERS,99,7,470.0,210.0
2,AMERICAN ECONOMIC REVIEW,94,5,8.0,1.0
62,JOURNAL OF ECONOMIC BEHAVIOR & ORGANIZATION,93,3,117.0,44.0
45,WORLD DEVELOPMENT,90,1,112.0,34.0
49,APPLIED ECONOMICS,89,2,18.0,49.0
...,...,...,...,...,...
247,ESTUDIOS DE ECONOMIA,1,286,819.0,726.0
267,CESIFO ECONOMIC STUDIES,1,312,188.0,285.0
264,FISCAL STUDIES,1,255,205.0,255.0
262,JOURNAL OF NEUROSCIENCE PSYCHOLOGY AND ECONOMICS,1,221,,
