In [1]:
import pandas as pd
import numpy as np
import panel as pn
import holoviews as hv
from holoviews import opts
import hvplot.pandas
pn.extension('tabulator')
hv.extension('bokeh')

In [2]:
df = pd.read_csv('Derived/All-Journals-Cleaned.csv')

In [3]:
# Data Manipulations for Later Use
# create a year column
df["Year"] = (df['Issue'].str[-4:]).astype(int)

# identify author and JEL columns
author_columns = [col for col in df.columns if col.startswith("Author")]
jel_columns = [col for col in df.columns if col.startswith("JEL")]

## Widgets Creation

In [4]:
# Define Panel Widgets

# 1. Journal Dropdown
journal_names = df['Journal'].unique()
journal_names = list(journal_names)
journal_select = pn.widgets.Select(name='Choose Journal', options=journal_names)

# 2. Year Slider
journal_start_yr = {
    'AMERICAN ECONOMIC REVIEW' : 1999,
    'AMERICAN ECONOMIC REVIEW: INSIGHTS' : 2019,
    'AMERICAN ECONOMIC JOURNAL: APPLIED ECONOMICS' : 2009,
    'AMERICAN ECONOMIC JOURNAL: ECONOMIC POLICY' : 2009,
    'AMERICAN ECONOMIC JOURNAL: MACROECONOMICS' : 2009,
    'AMERICAN ECONOMIC JOURNAL: MICROECONOMICS' : 2009
}
    
year_slider = pn.widgets.IntSlider(name = "Year", start = 1999, end = 2024, step = 1)

# Function to update the year slider range based on the journal selected
def update_year_range(event):
    journal_name = event.new
    year_slider.start = journal_start_yr[journal_name]
    year_slider.value = journal_start_yr[journal_name]

# Watch for changes to the choice of journal
journal_select.param.watch(update_year_range, 'value')

Watcher(inst=Select(name='Choose Journal', options=['AMERICAN ECONOMIC REVIEW...], value='AMERICAN ECONOMIC R...), cls=<class 'panel.widgets.select.Select'>, fn=<function update_year_range at 0x168265260>, mode='args', onlychanged=True, parameter_names=('value',), what='value', queued=False, precedence=0)

## Top Economists Panel

In [5]:
# Top Economists(old)
def get_top_authors(start_year, journal):
    """
    function to get top 10 authors in the selected journal from the selected start year
    """
    # keep paper in the selected journal from the selected start year
    filtered_data = df[(df['Year'] >= start_year) & (df['Journal'] == journal)]
    # transform the dataset so that each row corresponds to a single author of a paper
    filtered_melted_data = filtered_data.melt(id_vars='Title', value_vars=author_columns, var_name='AuthorCol', value_name='Author').dropna(subset=['Author'])
    # get the top 10 authors with most works published
    author_counts_top10 = filtered_melted_data['Author'].value_counts().head(10)
    return author_counts_top10.to_frame('Number of Papers')

In [6]:
# Top Economists
# construct a sub dataset which contains annual counts of works of each author in each journal
    # transform the dataset so that each row corresponds to a single author of a paper
melted_data_by_author = df.melt(id_vars=['Title','Year','Journal'], value_vars=author_columns, var_name='AuthorCol', value_name='Author').dropna(subset=['Author'])
    # group by year, journal and author, then count the occurrences
grouped_author = melted_data_by_author.groupby(['Year','Journal', 'Author']).size().reset_index(name='counts')
    # rename columns for clarity
grouped_author.columns = ['Year', 'Journal', 'Author', 'Count']
    # make dataframe pipline interactive
igrouped_author = grouped_author.interactive()

In [7]:
# create author works count pipline
author_count_pipline = (
    igrouped_author[
    (igrouped_author.Year == year_slider) &
    (igrouped_author.Journal == journal_select)
    ].reset_index(drop=True)
)

In [8]:
# author works count table
author_table = author_count_pipline.pipe(pn.widgets.Tabulator, pagination = 'remote', page_size = 10, sizing_mode = 'stretch_width')
author_table

## Research Field Trend Panel

In [9]:
# Popular Research Fields
# code book for subfields
field_code = {
    "A" : "General Economics and Teaching",
    "B" : "History of Economic Thought, Methodology, and Heterodox Approaches",
    "C" : "Mathematical and Quantitative Methods",
    "D" : "Microeconomics",
    "E" : "Macroeconomics and Monetary Economics",
    "F" : "International Economics",
    "G" : "Financial Economics",
    "H" : "Public Economics",
    "I" : "Health, Education, and Welfare",
    "J" : "Labor and Demographic Economics",
    "K" : "Law and Economics",
    "L" : "Industrial Organization",
    "M" : "Business Administration and Business Economics • Marketing • Accounting • Personnel Economics",
    "N" : "Economic History",
    "O" : "Economic Development, Innovation, Technological Change, and Growth",
    "P" : "Political Economy and Comparative Economic Systems",
    "Q" : "Agricultural and Natural Resource Economics • Environmental and Ecological Economics",
    "R" : "Urban, Rural, Regional, Real Estate, and Transportation Economics",
    "Y" : "Miscellaneous Categories",
    "Z" : "Other Special Topics"
}

# construct a sub dataset which contains annual counts of works in each subfiled of a journal
    # transform the dataset so that each row corresponds to a single JEL code of a paper
melted_data_by_JEL = df.melt(id_vars=['Title','Year','Journal'], value_vars=jel_columns, var_name='JELCol', value_name='JEL').dropna(subset=['JEL'])
    # create a column of corresponding subfields
melted_data_by_JEL['Subfield'] = melted_data_by_JEL['JEL'].str[0].map(field_code)
    # group by year, journal and subfield, then count the occurrences
grouped_subfield = melted_data_by_JEL.groupby(['Year','Journal', 'Subfield']).size().reset_index(name='counts')
    # rename columns for clarity
grouped_subfield.columns = ['Year', 'Journal', 'Subfield', 'Count']
    # make dataframe pipline interactive
igrouped_subfield = grouped_subfield.interactive()

In [10]:
# create subfiled works count pipline
subfiled_count_pipline = (
    igrouped_subfield[
    (igrouped_subfield.Year >= year_slider) &
    (igrouped_subfield.Journal == journal_select)
    ].reset_index(drop=True)
)

In [11]:
# subfield trend plot
subfiled_trend_plot = subfiled_count_pipline.hvplot(x='Year', y='Count', by='Subfield', title='Research Field Trend',height=300, width=1200)
subfiled_trend_plot

In [12]:
panel_layout = pn.Row(subfiled_trend_plot)
panel_layout.show()

Launching server at http://localhost:62811


<panel.io.server.Server at 0x1692167d0>

## Social Network

In [13]:
import networkx as nx

In [14]:
# create a network graph
G = nx.Graph()

# add weighted edges by the number of collaborations
for index, row in df.iterrows():
    authors = [row[f'{a}'] for a in author_columns if pd.notna(row[f'{a}'])]
    for i in range(len(authors)):
        for j in range(i + 1, len(authors)):
            if G.has_edge(authors[i], authors[j]):
                G[authors[i]][authors[j]]['weight'] += 1  # Increment weight by 1 for each co-authored paper
            else:
                G.add_edge(authors[i], authors[j], weight=1)

In [15]:
import hvplot.networkx as hvnx
pn.extension()

In [16]:
def plot_network(author):
    if author in G:
        subgraph = nx.ego_graph(G, author, radius=1)  # radius=1 gives direct collaborators
        # apply a layout algorithm
        positions = nx.spring_layout(subgraph, weight='weight')
        # draw the graph
        return hvnx.draw(subgraph, positions, edge_color='grey', 
                         with_labels=True, label_position='top', font_size='10pt', 
                         node_color='darkred', node_line_width=0)
    else:
        return ""

# Search widget
author_input = pn.widgets.AutocompleteInput(name='Which author you are interested to look at?', options=list(G.nodes), placeholder='Enter author name')

# Dynamic map to update the plot based on the search
network_plot = pn.bind(plot_network, author_input)
# Layout
network_search = pn.Row(pn.Row(author_input), network_plot)
network_search

In [17]:
network_search.show()

Launching server at http://localhost:62812


<panel.io.server.Server at 0x16b902d50>

## Geographic Trend

In [18]:
# Load journal data
df_lemmatized = pd.read_csv('Derived/Processed-Journals-Temp.csv')

# Extract and add year from 'Issue'
df_lemmatized["Year"] = df_lemmatized['Issue'].str[-4:].astype(int)

In [19]:
import pycountry

# function to find countries mentioned in an abstract
def find_countries_in_abstract(abstract, country_list):
    mentioned_countries = set()  # Using a set to avoid duplicates
    for country in country_list:
        if country.lower() in abstract.lower():
            mentioned_countries.add(country)
    return mentioned_countries

# get list of all countries
country_list = [country.name for country in pycountry.countries]

In [20]:
# creating an empty list to store results
mentions = []

# process each record in the dataframe
for index, row in df_lemmatized.iterrows():
    year = row['Year']
    abstract = row['Lemmatized_Abstracts']
    countries_mentioned = find_countries_in_abstract(abstract, country_list)
    
    # For each country mentioned, add to the results list
    for country in countries_mentioned:
        mentions.append({'Year': year, 'Country': country})

# create a new dataframe from the results list
mentions_df = pd.DataFrame(mentions)

# count mentions per year and country
geo_df = mentions_df.groupby(['Year', 'Country']).size().reset_index(name='Mentions')

# save the new dataset to a CSV file
geo_df.to_csv('Derived/Year_Country_Mentions.csv', index=False)

In [21]:
# make country dataframe pipline interactive
igeo_df = geo_df.interactive()

# create country count pipline
country_count_pipline = (
    igeo_df[
    (igeo_df.Year == year_slider) &
    (igeo_df.Mentions > 1) # keep countries mentioned more than once
    ].reset_index(drop=True)
)
country_count_pipline = country_count_pipline[['Country', 'Mentions']].sort_values(by='Mentions', ascending=False)

# country count table
country_table = country_count_pipline.pipe(pn.widgets.Tabulator, 
                                           pagination = 'remote', 
                                           page_size = 10, 
                                           sizing_mode = 'stretch_width',
                                           show_index=False)
country_table