# Python basics week 7 - APIs

## Example 1
Retrieve a single piece of data from an API endpoint

Read the docs for Crossref Commons: https://gitlab.com/crossref/crossref_commons_py

In [None]:
# install whichever packages you don't have yet
pip install crossref-commons pandas

In [1]:
import crossref_commons.retrieval
import pandas as pd

In [None]:
study = crossref_commons.retrieval.get_publication_as_json('10.5621/sciefictstud.40.2.0382')
study

In [None]:
study
# This looks an awful lot like a dictionary

In [None]:
study['publisher']  # you can call keys directly on the JSON object

In [None]:
# normally, we could do something like this, but it won't work with this json structure
df = pd.DataFrame(study)

In [None]:
# Instead, we use json_normalize to flatten the JSON structure
df = pd.json_normalize(study)
df

In [None]:
df['title'][0]

In [None]:
type(df['title'][0])
# This is being read as a list, but we want it as a string

In [None]:
# You might think we could just do this, but it won't work in upcoming versions of pandas
# df['title'][0] = str(df['title'][0])
# https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
# We do this instead
# .loc allows us to access a cell or a group of rows and columns by labels (like coordinates for the dataframe)
# .loc[row_indexer, col_indexer]
df.loc[0, "title"] = str(df.loc[0, 'title'])
type(df['title'][0])

In [None]:
df.loc[0, "title"] = df['title'][0].replace('\'', '')
df =   # Get rid of the single quotes and brackets
df

## Example 2
Generate a list of identifiers from one API endpoint and use them to retrieve data from a second API endpoint.

In [25]:
retracted_df = pd.json_normalize(crossref_commons.retrieval.get_publication_as_json("10.1007/978-981-15-9916-3_8"))
retracted_df


Unnamed: 0,publisher-location,update-to,reference-count,publisher,isbn-type,license,short-container-title,DOI,type,page,...,created.date-parts,created.date-time,created.timestamp,published-online.date-parts,deposited.date-parts,deposited.date-time,deposited.timestamp,resource.primary.URL,issued.date-parts,published.date-parts
0,Singapore,"[{'updated': {'date-parts': [[2021, 3, 23]], '...",58,Springer Nature Singapore,"[{'type': 'print', 'value': '9789811599156'}, ...","[{'start': {'date-parts': [[2021, 1, 1]], 'dat...",[],10.1007/978-981-15-9916-3_8,book-chapter,207-207,...,"[[2021, 3, 22]]",2021-03-22T21:18:13Z,1616447893000,"[[2021, 3, 23]]","[[2023, 10, 23]]",2023-10-23T18:27:23Z,1698085643000,https://link.springer.com/10.1007/978-981-15-9...,[[2021]],[[2021]]


In [None]:
from crossref_commons.iteration import iterate_publications_as_json
dois = []
filter = {'update-type':'retraction', 'type':'journal-article'}
query = {'query.affiliation': 'university'}
for pub in iterate_publications_as_json(max_results=100, filter=filter, queries=query):
  dois.append(pub['DOI'])

dois



['10.1103/physrevlett.126.117003',
 '10.4049/jimmunol.1290021',
 '10.1364/oe.484081',
 '10.4049/jimmunol.2200762',
 '10.4040/jkan.2015.45.1.157',
 '10.3390/gels11080640',
 '10.1042/bsr20193893',
 '10.1364/oe.487520',
 '10.4028/www.scientific.net/amr.760-762.45',
 '10.4028/www.scientific.net/amr.1155.1',
 '10.4028/www.scientific.net/amm.341-342.204',
 '10.2308/isys-10429',
 '10.4028/www.scientific.net/amr.1107.301',
 '10.1039/d4nr90151g',
 '10.1080/21655979.2021.2016046',
 '10.1042/bsr20201260',
 '10.1103/physrevlett.127.016401',
 '10.4028/www.scientific.net/amm.543-547.3382',
 '10.4070/kcj.2023.0999',
 '10.2308/ajpt-10448',
 '10.2308/bria-10430',
 '10.1364/ao.423154',
 '10.3390/polym16111512',
 '10.3846/13926292.2015.1024973',
 '10.1111/ceo.2862',
 '10.3390/v13112154',
 '10.4028/www.scientific.net/msf.984.224',
 '10.4028/www.scientific.net/amm.556-562.5243',
 '10.3839/jabc.2021.063',
 '10.1042/bsr20192842',
 '10.1364/ao.492842',
 '10.1364/oe.518972',
 '10.1248/bpb.32.1135a',
 '10.3892/

In [27]:
for doi in dois:
  response = pd.json_normalize(crossref_commons.retrieval.get_publication_as_json(doi))
  retracted_df = pd.concat([retracted_df, response], ignore_index=True)

retracted_df
# The Data Wrangler extension is really helpful for exploring larger dataframes

Unnamed: 0,publisher-location,update-to,reference-count,publisher,isbn-type,license,short-container-title,DOI,type,page,...,article-number,accepted.date-parts,journal-issue.issue,journal-issue.published-print.date-parts,published-other.date-parts,abstract,journal-issue.published-online.date-parts,alternative-id,aliases,archive
0,Singapore,"[{'updated': {'date-parts': [[2021, 3, 23]], '...",58,Springer Nature Singapore,"[{'type': 'print', 'value': '9789811599156'}, ...","[{'start': {'date-parts': [[2021, 1, 1]], 'dat...",[],10.1007/978-981-15-9916-3_8,book-chapter,207-207,...,,,,,,,,,,
1,,"[{'updated': {'date-parts': [[2023, 12, 7]], '...",37,American Physical Society (APS),,"[{'start': {'date-parts': [[2021, 3, 19]], 'da...",[Phys. Rev. Lett.],10.1103/physrevlett.126.117003,journal-article,,...,117003,"[[2021, 1, 19]]",11,"[[2021, 3]]",,,,,,
2,,"[{'record-id': '4490', 'source': 'retraction-w...",0,Oxford University Press (OUP),,"[{'start': {'date-parts': [[2012, 6, 1]], 'dat...",[],10.4049/jimmunol.1290021,journal-article,5801-5801,...,,,11,"[[2012, 6, 1]]","[[2012, 6]]",,,,,
3,,"[{'updated': {'date-parts': [[2023, 9, 11]], '...",1,Optica Publishing Group,,"[{'start': {'date-parts': [[2023, 9, 11]], 'da...",[Opt. Express],10.1364/oe.484081,journal-article,31670,...,,,20,[[2023]],,<jats:p>The referenced article [<jats:mixed-ci...,[[2023]],,,
4,,"[{'record-id': '41043', 'source': 'retraction-...",0,Oxford University Press (OUP),,"[{'start': {'date-parts': [[2022, 12, 1]], 'da...",[],10.4049/jimmunol.2200762,journal-article,2362-2362,...,,,12,"[[2022, 12, 15]]","[[2022, 12]]",,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,,"[{'record-id': '179', 'source': 'retraction-wa...",0,American Thoracic Society,,,[Am J Respir Crit Care Med],10.1164/ajrccm.162.3.retraction_a,journal-article,788-788,...,,,3,"[[2000, 9, 1]]",,,,[10.1164/ajrccm.162.3.retraction_a],,
97,,"[{'updated': {'date-parts': [[2023, 11, 28]], ...",2,MDPI AG,,"[{'start': {'date-parts': [[2024, 8, 23]], 'da...",[Cryptography],10.3390/cryptography8030037,journal-article,37,...,,,3,,,"<jats:p>The journal retracts the article, “A P...","[[2024, 9]]",[cryptography8030037],,
98,,"[{'record-id': '22257', 'source': 'retraction-...",0,Oxford University Press (OUP),,"[{'start': {'date-parts': [[2020, 1, 31]], 'da...",[],10.1093/geront/gnaa013,journal-article,e101-e101,...,,,3,"[[2021, 4, 3]]","[[2021, 4, 1]]",,"[[2020, 1, 31]]",,,
99,,"[{'record-id': '23565', 'source': 'retraction-...",2,AIP Publishing,,,[],10.1063/5.0013366,journal-article,,...,069901,,6,"[[2020, 6, 1]]","[[2020, 6]]",,,,,


In [28]:
retracted_df.to_csv('retracted.csv', index=False)

## Example 2a

In [42]:
darwin_df = pd.json_normalize(crossref_commons.retrieval.get_publication_as_json("10.5962/bhl.title.74"))
darwin_df

Unnamed: 0,publisher-location,reference-count,publisher,short-container-title,DOI,type,source,is-referenced-by-count,title,prefix,...,published-print.date-parts,created.date-parts,created.date-time,created.timestamp,deposited.date-parts,deposited.date-time,deposited.timestamp,resource.primary.URL,issued.date-parts,published.date-parts
0,London :,0,"J. Murray,",[],10.5962/bhl.title.74,monograph,Crossref,3,[Insectivorous plants /by Charles Darwin.],10.5962,...,[[1888]],"[[2011, 11, 10]]",2011-11-10T20:10:22Z,1320955822000,"[[2011, 11, 10]]",2011-11-10T20:10:31Z,1320955831000,http://www.biodiversitylibrary.org/bibliograph...,[[1888]],[[1888]]


In [41]:
from crossref_commons.iteration import iterate_publications_as_json
darwin_dois = []
query = {'query.author': 'Charles Darwin',}
filter = {'type':'monograph'}
for pub in iterate_publications_as_json(max_results=100, filter=filter, queries=query):
  darwin_dois.append(pub['DOI'])

darwin_dois

['10.5962/bhl.title.56969',
 '10.5962/bhl.title.52034',
 '10.5962/bhl.title.23455',
 '10.5962/bhl.title.74',
 '10.5962/bhl.title.1416',
 '10.5962/bhl.title.56070',
 '10.5962/bhl.title.23707',
 '10.5962/bhl.title.168964',
 '10.5962/bhl.title.23454',
 '10.5962/bhl.title.1600',
 '10.5962/bhl.title.24483',
 '10.5962/bhl.title.57208',
 '10.5962/bhl.title.1368',
 '10.5962/bhl.title.164114',
 '10.5962/bhl.title.26231',
 '10.5962/bhl.title.167318',
 '10.5962/bhl.title.50683',
 '10.5962/bhl.title.1417',
 '10.5962/bhl.title.49491',
 '10.5962/bhl.title.33365',
 '10.5962/bhl.title.17709',
 '10.5962/bhl.title.56998',
 '10.1017/cbo9780511693670',
 '10.5962/bhl.title.163749',
 '10.5962/bhl.title.56422',
 '10.5962/bhl.title.2075',
 '10.5962/bhl.title.160128',
 '10.5962/bhl.title.28353',
 '10.1017/cbo9780511694202',
 '10.5962/bhl.title.46249',
 '10.5962/bhl.title.114905',
 '10.15304/pu.2022.32',
 '10.5962/bhl.title.43845',
 '10.5962/bhl.title.24162',
 '10.5962/bhl.title.48549',
 '10.5962/bhl.title.5606

In [43]:
for doi in darwin_dois:
  response = pd.json_normalize(crossref_commons.retrieval.get_publication_as_json(doi))
  darwin_df = pd.concat([darwin_df, response], ignore_index=True)

darwin_df
# The Data Wrangler extension is really helpful for exploring larger dataframes

Unnamed: 0,publisher-location,reference-count,publisher,short-container-title,DOI,type,source,is-referenced-by-count,title,prefix,...,isbn-type,license,abstract,ISBN,published-online.date-parts,relation.is-identical-to,translator,indexed.version,language,editor
0,London :,0,"J. Murray,",[],10.5962/bhl.title.74,monograph,Crossref,3,[Insectivorous plants /by Charles Darwin.],10.5962,...,,,,,,,,,,
1,New York :,0,"A.L. Burt,",[],10.5962/bhl.title.56969,monograph,Crossref,4,[The origin of species by means of natural sel...,10.5962,...,,,,,,,,,,
2,London :,0,"J. Murray,",[],10.5962/bhl.title.52034,monograph,Crossref,3,[Journal of researches into the natural histor...,10.5962,...,,,,,,,,,,
3,London :,0,"J. Unray,",[],10.5962/bhl.title.23455,monograph,Crossref,1,"[The life and letters of Charles Darwin, inclu...",10.5962,...,,,,,,,,,,
4,London :,0,"J. Murray,",[],10.5962/bhl.title.74,monograph,Crossref,3,[Insectivorous plants /by Charles Darwin.],10.5962,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,New York :,0,"D. Appleton,",[],10.5962/bhl.title.26055,monograph,Crossref,1,[Insectivorous plants / by Charles Darwin.],10.5962,...,,,,,,,,,,
97,London :,0,"J. Murray,",[],10.5962/bhl.title.22342,monograph,Crossref,2,[Journal of researches into the natural histor...,10.5962,...,,,,,,,,,,
98,New York :,0,"P. F. Collier,",[],10.5962/bhl.title.27114,monograph,Crossref,4,[Origin of species by means of natural selecti...,10.5962,...,,,,,,,,,,
99,New York,0,D. Appleton,[],10.5962/bhl.title.56364,monograph,Crossref,3,[Journal of researches into the natural histor...,10.5962,...,,,,,,,,,,


In [61]:
darwin_df['DOI'].nunique()

100