Experimenting with the CORE API
----

# Setup

In [4]:
from dotenv import load_dotenv
from os import getenv
import requests
import pandas as pd
import json

if load_dotenv():
    print("Loaded environment variables")
else:
    print("Failed to load environemnt variables")
CORE_API_KEY = getenv('CORE_API_KEY')

Loaded environment variables


In [96]:
url = 'https://api.core.ac.uk/v3/search/works'
query = {
    'q': '(sociology OR anthrpology OR political science) AND yearPublished:2022 AND _exists_:abstract',
    'limit': 250
}
headers = {"Authorization": f"Bearer {CORE_API_KEY}"}

res = requests.post(url, json.dumps(query), headers=headers)
res.status_code

200

In [97]:
raw = json.loads(res.text)
results = raw['results']

In [98]:
print(raw['totalHits'])

87538


In [103]:
results[-1]

{'acceptedDate': None,
 'arxivId': None,
 'authors': [{'name': 'Magee, Caroline Alexandra'}],
 'citationCount': None,
 'contributors': [],
 'outputs': ['https://api.core.ac.uk/v3/outputs/519836734'],
 'createdDate': '2022-06-10T08:06:13',
 'dataProviders': [{'id': 3615,
   'name': '',
   'url': 'https://api.core.ac.uk/v3/data-providers/3615',
   'logo': 'https://api.core.ac.uk/data-providers/3615/logo'}],
 'depositedDate': None,
 'abstract': 'This thesis applies social science research, historical and cultural analysis, and legislative review to understand the relationship between Texas’s school-based sexuality education policies and public health and, in so doing, lay the groundwork for policy change that will better promote sexual and reproductive health, health equity, and health justice in Texas. The thesis first situates Texas state-level sexuality education policy within a larger US political, historical, and legal context, highlighting the key roles of race, class, gender, and s

In [128]:
df = pd.DataFrame([
    {
        'title': result['title'],
        'len_fullText': len(result['fullText']), 
        'len_abstract': len(result['abstract']),
        'fieldOfStudy':result['fieldOfStudy'], 
        'documentType':result['documentType'],
        'publisher': result['publisher']
    } for result in results])

# documentType doesn't have much useful information in these results
print(df.documentType.value_counts(dropna=False))

# many of these documents seem to be theses, so they are quite long: median is ~75,000
# but shortest are <500
print(df.len_fullText.describe())
print(df.len_abstract.describe())

documentType
    248
Name: count, dtype: int64
count    2.480000e+02
mean     2.456155e+05
std      3.316800e+05
min      4.260000e+02
25%      4.823925e+04
50%      7.610800e+04
75%      3.608900e+05
max      1.567517e+06
Name: len_fullText, dtype: float64
count     248.000000
mean     1274.326613
std       888.760695
min        12.000000
25%       903.250000
50%      1196.000000
75%      1487.250000
max      9824.000000
Name: len_abstract, dtype: float64


In [123]:
# abstract can be searched for in text to remove, but NB small differences can prevent match
# in this example, inclusion of line breaks in full text
print(results[-1]['fullText'].find(results[-1]['abstract']))
print(results[-1]['fullText'].find(results[-1]['abstract'][:100]))
print(results[-1]['fullText'].find(results[-1]['abstract'][-100:]))

-1
1327
3542


In [124]:
# maybe one way select docs is to filter by abstract length: below ~300 is probably not interesting
# the long ones are probably for books and theses, which are also interesting -- but still multi-thousand word is perhaps not ideal
df.sort_values('len_fullText')

Unnamed: 0,title,len_fullText,len_abstract,fieldOfStudy,documentType,publisher
39,Dynamics of Difference: Inequality and Transfo...,426,1103,,,Routeldge
116,Private security in Africa,2830,909,,,'Bloomsbury Academic'
81,Book Review: \u3cem\u3eFat Matters: From Socio...,5837,140,,,Open PRAIRIE: Open Public Research Access Inst...
2,Dr. Kevin C. Casey Motl named dean of Ouachi...,6038,232,,,Scholarly Commons @ Ouachita
1,Entangled Legalities Beyond the State,6391,327,,,'Cambridge University Press (CUP)'
...,...,...,...,...,...,...
183,Social and Political Issues on Sustainable Dev...,1213316,979,,,'Informa UK Limited'
221,Dynamics in the History of Religions between A...,1278558,424,,,'Brill'
238,Irony as a Method of Neoclassical Sociology : ...,1313335,9824,,,Åbo Akademis förlag - Åbo Akademi University P...
135,La sociologia di Luciano Cavalli,1442746,2086,,,'Firenze University Press'


In [125]:
df.publisher.value_counts()

publisher
'Informa UK Limited'                                29
                                                    19
'Lifescience Global'                                14
'Springer Science and Business Media LLC'           12
'SAGE Publications'                                  8
                                                    ..
Routeldge                                            1
'School of Advanced Study'                           1
Encompass                                            1
London School of Economics and Political Science     1
UNI ScholarWorks                                     1
Name: count, Length: 119, dtype: int64

In [126]:
# there are some odd results: e.g. this is a press release
results[2]

{'acceptedDate': None,
 'arxivId': None,
 'authors': [{'name': 'Lawson, Felley'}, {'name': 'Ouachita News Bureau,'}],
 'citationCount': None,
 'contributors': [],
 'outputs': ['https://api.core.ac.uk/v3/outputs/534809425'],
 'createdDate': '2022-10-10T21:50:37',
 'dataProviders': [{'id': 3629,
   'name': '',
   'url': 'https://api.core.ac.uk/v3/data-providers/3629',
   'logo': 'https://api.core.ac.uk/data-providers/3629/logo'}],
 'depositedDate': None,
 'abstract': 'Dr. Kevin C. “Casey” Motl has been named dean of the William H. Sutton School of Social Sciences at Ouachita Baptist University. The Sutton School houses Ouachita’s departments of History, Political Science, Psychology and Sociology',
 'documentType': '',
 'doi': None,
 'downloadUrl': 'https://core.ac.uk/download/534809425.pdf',
 'fieldOfStudy': None,
 'fullText': 'Ouachita Baptist University Scholarly Commons @ Ouachita Press Releases Office of Communication 6-27-2022 Dr. Kevin C. "Casey" Motl named dean of Ouachita\'s Sut

In [131]:
# a book review: the abstract is not actually informative
results[81]

{'acceptedDate': None,
 'arxivId': None,
 'authors': [{'name': 'Hansen, Brittney'}],
 'citationCount': None,
 'contributors': [],
 'outputs': ['https://api.core.ac.uk/v3/outputs/519731483'],
 'createdDate': '2022-06-08T17:37:00',
 'dataProviders': [{'id': 2573,
   'name': '',
   'url': 'https://api.core.ac.uk/v3/data-providers/2573',
   'logo': 'https://api.core.ac.uk/data-providers/2573/logo'}],
 'depositedDate': None,
 'abstract': 'Tsichlia, Gina and Alexandra M. Johnstone (Eds.). Fat Matters: From Sociology to Science. M & K Update Ltd., 2010, 144 pp., $45.00 paperback',
 'documentType': '',
 'doi': None,
 'downloadUrl': 'https://core.ac.uk/download/519731483.pdf',
 'fieldOfStudy': None,
 'fullText': 'Great Plains Sociologist Volume 21 Issue 1 Article 4 2010 Book Review: Fat Matters: From Sociology to Science Edited by Gina Tsichlia and Alexandra M. Johnstone Brittney Hansen University of North Dakota Follow this and additional works at: https://openprairie.sdstate.edu/greatplainsso

In [132]:
# results with too-short text often have problems: e.g. this is just garbage
results[116]

{'acceptedDate': None,
 'arxivId': None,
 'authors': [],
 'citationCount': None,
 'contributors': [],
 'outputs': ['https://api.core.ac.uk/v3/outputs/548537633'],
 'createdDate': '2022-12-10T02:00:32',
 'dataProviders': [{'id': 4636,
   'name': '',
   'url': 'https://api.core.ac.uk/v3/data-providers/4636',
   'logo': 'https://api.core.ac.uk/data-providers/4636/logo'}],
 'depositedDate': None,
 'abstract': 'Across Africa, growing economic inequality, instability and urbanization have led to the rapid spread of private security providers. While these PSPs have already had a significant impact on African societies, their impact has so far received little in the way of comprehensive analysis. Drawing on a wide range of disciplinary approaches, and encompassing anthropology, sociology and political science, Private Security in Africa offers unique insight into the lives and experiences of security providers and those affected by them, as well as into the fragile state context which has allo