In [1]:
import altair as alt
import numpy as np
import pandas as pd
import requests
import yaml

In [2]:
url = "https://api.ies.ed.gov/eric/"


def load_data(query):

    df_dict = {}
    for year in range(1960, 2022):
        query_final = query + str(year)
        querystring = {
            "search": query_final,
            "format": "json",
        }
        response = requests.request("GET", url, params=querystring)
        text = yaml.safe_load(response.text)
        df_dict[year] = int(text["response"]["numFound"])

    df = pd.DataFrame(list(df_dict.items()), columns=["Year", "Articles"])
    return df

In [3]:
# Test individual queries
querystring = {
#             "search": "'Add Health' AND peerreviewed:T AND publicationdateyear:2018",
            "search": '(title:"Add Health" OR description:"Add Health") AND peerreviewed:T AND publicationdateyear:2021',
            "format": "json",
        }
response = requests.request("GET", url, params=querystring)
text = yaml.safe_load(response.text)
int(text["response"]["numFound"])

3

### Absolute Counts

In [4]:
query_old = "network"
query = '(title:"' + query_old + '" OR description:"' + query_old + '") AND peerreviewed:T AND publicationdateyear:'
# query = "'" + query_old + "' AND peerreviewed:T AND publicationdateyear:"
print(query)
df1 = load_data(query)
df1["Domain"] = query_old

(title:"network" OR description:"network") AND peerreviewed:T AND publicationdateyear:


In [5]:
query_old = "social network"
query = '(title:"' + query_old + '" OR description:"' + query_old + '") AND peerreviewed:T AND publicationdateyear:'
# query = "'" + query_old + "' AND peerreviewed:T AND publicationdateyear:"
print(query)
df2 = load_data(query)
df2["Domain"] = query_old

(title:"social network" OR description:"social network") AND peerreviewed:T AND publicationdateyear:


In [6]:
query_old = "social network analysis"
query = '(title:"' + query_old + '" OR description:"' + query_old + '") AND peerreviewed:T AND publicationdateyear:'
# query = "'" + query_old + "' AND peerreviewed:T AND publicationdateyear:"
print(query)
df3 = load_data(query)
df3["Domain"] = query_old

(title:"social network analysis" OR description:"social network analysis") AND peerreviewed:T AND publicationdateyear:


In [7]:
query_old = "Add Health"
query = '(title:"' + query_old + '" OR description:"' + query_old + '") AND peerreviewed:T AND publicationdateyear:'
# query = "'" + query_old + "' AND peerreviewed:T AND publicationdateyear:"
print(query)
df4 = load_data(query)
df4["Domain"] = query_old

(title:"Add Health" OR description:"Add Health") AND peerreviewed:T AND publicationdateyear:


In [8]:
query_old = "ERGM"
query = '(title:"' + query_old + '" OR description:"' + query_old + '") AND peerreviewed:T AND publicationdateyear:'
# query = "'" + query_old + "' AND peerreviewed:T AND publicationdateyear:"
print(query)
df5 = load_data(query)
df5["Domain"] = query_old

(title:"ERGM" OR description:"ERGM") AND peerreviewed:T AND publicationdateyear:


In [9]:
query_old = "SIENA"
query = '(title:"' + query_old + '" OR description:"' + query_old + '") AND peerreviewed:T AND publicationdateyear:'
# query = "'" + query_old + "' AND peerreviewed:T AND publicationdateyear:"
print(query)
df6 = load_data(query)
df6["Domain"] = query_old

(title:"SIENA" OR description:"SIENA") AND peerreviewed:T AND publicationdateyear:


In [10]:
df_append = (
    df2
    .append(df4)
    .append(df5)
    .append(df6)
)
#df.to_csv("jl_output.csv", encoding='utf-8', index=False)

In [11]:
alt.Chart(df_append).mark_line().encode(
    x="Year:Q",
    y="Articles:Q",
    tooltip=["Articles"],
    color="Domain",
    strokeDash="Domain",
).properties(
    width=800,
    height=600
)

### % of All Corpus Size

In [None]:
### All Corpus Size ###
query_old = ""
# query = '(title:"' + query_old + '" OR description:"' + query_old + '") AND peerreviewed:T AND publicationdateyear:'
query = "'" + query_old + "' AND peerreviewed:T AND publicationdateyear:"
df_all = load_data(query)
df_all.rename(columns={'Articles': 'Total'}, inplace=True)

In [None]:
df_append = df_append.merge(df_all, on='Year', how='inner')
df_append["prop_articles"] = df_append["Articles"]/df_append["Total"]

In [17]:
alt.Chart(df_append).mark_line().encode(
    x="Year:Q",
    #y="prop_articles:Q",
    y=alt.Y('prop_articles:Q', axis=alt.Axis(format='%', title="Proportion of total corpus")),
    tooltip=["prop_articles"],
    color="Domain",
    strokeDash="Domain",
).properties(
    width=800,
    height=600
)