In [None]:
import numpy as np 
import pandas as pd 
import requests 
import json
import time
import matplotlib.pyplot as plt 
import seaborn as sns 
sns.set()

In [None]:
def getEricRecords(search, fields = None, start=0, rows=200):
    url = 'https://api.ies.ed.gov/eric/?'
    url = url + 'search=' + search + '&rows=' + str(rows) + '&format=json&start=' + str(start) 
    if(fields):
        url = url + '&fields=' + ', '.join(fields)
    responseJson = requests.get(url).json() 
    return pd.DataFrame(responseJson)

In [None]:
def getRecordCount(search):
    dataFrame = getEricRecords(search)
    totalRecords = dataFrame.loc['numFound'][0]
    print('Search', search, 'returned', '{:,}'.format(totalRecords), 'records') 
    return totalRecords

In [None]:
def cleanElementsUsingList(x): 
    if(not isinstance(x, list)):
        return x
    if(not x or (len(x) == 1 and x[0] == '')):
        return None 
    return ', '.join(x)

In [None]:
def getAllEricRecords(search, fields = None, cleanElements = True): 
    startTime = time.time()
    nextFirstRecord = 0
    numRecordsReturnedEachApiCall = 200
    totalRecords = getRecordCount(search) 
    if(totalRecords == 0):
        print ('Search', search, 'has no results') 
        return []
    while(nextFirstRecord < totalRecords):
        dataFrame = getEricRecords(search, fields, nextFirstRecord) 
        if(nextFirstRecord == 0):
            records = pd.DataFrame(dataFrame.loc['docs'][0]) 
        else:
            records = pd.concat([records, pd.DataFrame(dataFrame.loc['docs'][0])], sort=False, ignore_index=True) 
        nextFirstRecord += numRecordsReturnedEachApiCall
    print('took', '{:,.1f}'.format(time.time() - startTime), 'seconds')
    return records.applymap(cleanElementsUsingList) if cleanElements else records

In [None]:
search = 'subject:autism AND subject:”teaching methods” AND publicationdateyear:2019'
records = getAllEricRecords(search)
records

In [None]:
records.info()

In [None]:
allEricFields = ['id', 'title', 'author', 'source', 'publicationdateyear', 'description',
                     'subject', 'peerreviewed', 'abstractor', 'audience', 'authorxlink',
                     'e_datemodified', 'e_fulltextauth', 'e_yearadded', 'educationlevel',
                     'identifiersgeo', 'identifierslaw', 'identifierstest', 'iescited',
                     'iesfunded', 'iesgrantcontractnum', 'iesgrantcontractnumxlink',
                     'ieslinkpublication', 'ieslinkwwcreviewguide', 'ieswwcreviewed',
                     'institution', 'isbn', 'issn', 'language', 'publicationtype',
                     'publisher', 'sourceid', 'sponsor', 'url']
print('There are', len(allEricFields), 'possible fields in the ERIC API response')

In [None]:
search = 'subject:autism AND subject:”teaching methods” AND publicationdateyear:2019'
records = getAllEricRecords(search, allEricFields)

In [None]:
records.head()

In [None]:
records.info()

In [None]:
# records.to_excel('ERIC records for autism and teaching methods in 2019.xlsx')