# For Descriptive Visualizations

In [1]:
# import libraries
import pandas as pd
import sqlite3
import plotly.express as px
import plotly.graph_objects as go
import numpy as np

In [2]:
# connect to database
conn = sqlite3.connect("econtop.db")
cur = conn.cursor()

In [49]:
# extract title, date, journal, from database, starting from 2012-2022
q = """
    SELECT art.doi, art.journal, art.title, art.date
    FROM author_article AS aa JOIN author AS au JOIN article AS art JOIN affiliation AS af
    ON aa.authorid = au.authorid AND
    aa.doi = art.doi AND
    au.affiliationid = af.affiliationid
    WHERE art.date >= '2012-01-01' AND
    art.date < '2023-01-01'
    """

# remove duplicated abstracts, set as strings
df = pd.read_sql_query(q, conn).drop_duplicates(subset=["doi"]).astype("str")
df["date"] = pd.to_datetime(df["date"])   # convert date to datetime
df.head()

Unnamed: 0,doi,journal,title,date
0,10.1093/rfs/hhr069,Review of Financial Studies,The Inventory Growth Spread,2012-01-15
2,10.1093/rfs/hhr109,Review of Financial Studies,Takeovers and Divergence of Investor Opinion,2012-01-15
3,10.1093/rfs/hhr081,Review of Financial Studies,Corporate Governance Objectives of Labor Union...,2012-01-15
4,10.1093/rfs/hhr076,Review of Financial Studies,Managerial Attributes and Executive Compensation,2012-01-15
7,10.1093/rfs/hhr092,Review of Financial Studies,The Road Less Traveled: Strategy Distinctivene...,2012-01-15


In [50]:
df_count_by_journal = df.groupby("journal").count()[["doi"]].rename(columns={"doi":"cnt"})

fig = go.Figure()
fig.add_trace(
    go.Bar(
        y = df_count_by_journal.index,
        x = df_count_by_journal.cnt,
        text = df_count_by_journal.cnt,
        orientation = "h",
        marker={'color': df_count_by_journal.cnt,
        'colorscale': 'Peach'}
    )
)
fig.update_layout(title_text="Number of Collected Publications from 2012 to 2022 by Journal",
#                 template="plotly_dark",
                  yaxis={'categoryorder':'total ascending'},
                  width=800,
                  height=600
                  )
fig.show()

In [51]:
# extract title, date, journal, from database, starting from 2012-2022
q = """
    SELECT art.doi, au.authorname, af.name, au.authorid, art.title, art.date
    FROM author_article AS aa JOIN author AS au JOIN article AS art JOIN affiliation AS af
    ON aa.authorid = au.authorid AND
    aa.doi = art.doi AND
    au.affiliationid = af.affiliationid
    WHERE art.date >= '2012-01-01' AND
    art.date < '2023-01-01'
    """

# remove duplicated abstracts, set as strings
df = pd.read_sql_query(q, conn).astype("str")
df["date"] = pd.to_datetime(df["date"])   # convert date to datetime
df.head()

Unnamed: 0,doi,authorname,name,authorid,title,date
0,10.1093/rfs/hhr069,Frederico Belo,INSEAD,4Weq9mEAAAAJ,The Inventory Growth Spread,2012-01-15
1,10.1093/rfs/hhr069,Xiaoji Lin,University of Minnesota,7QwB9o8AAAAJ,The Inventory Growth Spread,2012-01-15
2,10.1093/rfs/hhr109,Kose John,New York University,dJlFSXkAAAAJ,Takeovers and Divergence of Investor Opinion,2012-01-15
3,10.1093/rfs/hhr081,Ashwini Agrawal,London School of Economics,apUAKLgAAAAJ,Corporate Governance Objectives of Labor Union...,2012-01-15
4,10.1093/rfs/hhr076,Si Li,Wilfrid Laurier University,lmzraDAAAAAJ,Managerial Attributes and Executive Compensation,2012-01-15


In [55]:
# Top 20 Authors
df_count_by_author = df.groupby("authorid").count()[["doi"]].rename(columns={"doi":"cnt"}).sort_values("cnt", ascending=False)[:20]
# match authorid to name
names = []
for id in df_count_by_author.index:
    for idx in df.index:
        if id == df.loc[idx,"authorid"]:
            names.append(df.loc[idx,"authorname"]+", "+df.loc[idx,"name"])
            break

# reset index
df_count_by_author["name"] = names

fig = go.Figure()
fig.add_trace(
    go.Bar(
        y = df_count_by_author.name,
        x = df_count_by_author.cnt,
        text = df_count_by_author.cnt,
        orientation = "h",
        marker={'color': df_count_by_author.cnt,
        'colorscale': 'Tealgrn'}
    )
)
fig.update_layout(title_text="Number of Collected Publications from 2012 to 2022 by Authors",
#                 template="plotly_dark",
                  yaxis={'categoryorder':'total ascending'},
                  width=1200,
                  height=700
                  )
fig.show()

In [56]:
# extract title, date, journal, from database, starting from 2012-2022
q = """
    SELECT art.doi, af.name, art.title, art.date
    FROM author_article AS aa JOIN author AS au JOIN article AS art JOIN affiliation AS af
    ON aa.authorid = au.authorid AND
    aa.doi = art.doi AND
    au.affiliationid = af.affiliationid
    WHERE art.date >= '2012-01-01' AND
    art.date < '2023-01-01'
    """

# remove duplicated abstracts, set as strings
df = pd.read_sql_query(q, conn).drop_duplicates(subset=["doi","name"]).astype("str")
df["date"] = pd.to_datetime(df["date"])   # convert date to datetime
df.head()

Unnamed: 0,doi,name,title,date
0,10.1093/rfs/hhr069,INSEAD,The Inventory Growth Spread,2012-01-15
1,10.1093/rfs/hhr069,University of Minnesota,The Inventory Growth Spread,2012-01-15
2,10.1093/rfs/hhr109,New York University,Takeovers and Divergence of Investor Opinion,2012-01-15
3,10.1093/rfs/hhr081,London School of Economics,Corporate Governance Objectives of Labor Union...,2012-01-15
4,10.1093/rfs/hhr076,Wilfrid Laurier University,Managerial Attributes and Executive Compensation,2012-01-15


In [60]:
# Top 20 Institutions
df_count_by_aff = df.groupby("name").count()[["doi"]].rename(columns={"doi":"cnt"}).sort_values("cnt", ascending=False)[:20]

fig = go.Figure()
fig.add_trace(
    go.Bar(
        y = df_count_by_aff.index,
        x = df_count_by_aff.cnt,
        text = df_count_by_aff.cnt,
        orientation = "h",
        marker={'color': df_count_by_aff.cnt,
        'colorscale': 'Purpor'}
    )
)
fig.update_layout(title_text="Number of Collected Publications from 2012 to 2022 by Institutions",
#                 template="plotly_dark",
                  yaxis={'categoryorder':'total ascending'},
                  width=1000,
                  height=700
                  )
fig.show()