# Python basics week 7 - APIs

In [None]:
# install whichever packages you don't have yet
pip install crossref-commons pandas

Collecting crossref-commons
  Downloading crossref_commons-0.0.7-py3-none-any.whl.metadata (3.2 kB)
Collecting ratelimit>=2.2.1 (from crossref-commons)
  Downloading ratelimit-2.2.1.tar.gz (5.3 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Downloading crossref_commons-0.0.7-py3-none-any.whl (14 kB)
Building wheels for collected packages: ratelimit
  Building wheel for ratelimit (pyproject.toml): started
  Building wheel for ratelimit (pyproject.toml): finished with status 'done'
  Created wheel for ratelimit: filename=ratelimit-2.2.1-py3-none-any.whl size=6007 sha256=8be7ca6b18be12d68b4b1130375f76bcd676fe13608347173b7661e16a3b11a1
  Stored in directory: c:\users\swatson\appdata\local\pip\cache\wheels\bc

In [2]:
import crossref_commons.retrieval
import pandas as pd

In [3]:
study = crossref_commons.retrieval.get_publication_as_json('10.5621/sciefictstud.40.2.0382')
study

{'indexed': {'date-parts': [[2025, 8, 2]],
  'date-time': '2025-08-02T18:35:31Z',
  'timestamp': 1754159731358,
  'version': '3.41.2'},
 'reference-count': 0,
 'publisher': 'University of California Press',
 'issue': '2',
 'content-domain': {'domain': [], 'crossmark-restriction': False},
 'short-container-title': ['Science Fiction Studies'],
 'published-print': {'date-parts': [[2013]]},
 'DOI': '10.5621/sciefictstud.40.2.0382',
 'type': 'journal-article',
 'created': {'date-parts': [[2013, 7, 2]],
  'date-time': '2013-07-02T08:51:49Z',
  'timestamp': 1372755109000},
 'page': '382',
 'source': 'Crossref',
 'is-referenced-by-count': 0,
 'title': ['Humanism on Gallifrey'],
 'prefix': '10.1525',
 'volume': '40',
 'author': [{'family': 'Elizabeth Lundberg',
   'sequence': 'first',
   'affiliation': []}],
 'member': '408',
 'container-title': ['Science Fiction Studies'],
 'original-title': [],
 'deposited': {'date-parts': [[2025, 7, 25]],
  'date-time': '2025-07-25T20:47:51Z',
  'timestamp':

In [5]:
study
# This looks an awful lot like a dictionary

{'indexed': {'date-parts': [[2025, 8, 2]],
  'date-time': '2025-08-02T18:35:31Z',
  'timestamp': 1754159731358,
  'version': '3.41.2'},
 'reference-count': 0,
 'publisher': 'University of California Press',
 'issue': '2',
 'content-domain': {'domain': [], 'crossmark-restriction': False},
 'short-container-title': ['Science Fiction Studies'],
 'published-print': {'date-parts': [[2013]]},
 'DOI': '10.5621/sciefictstud.40.2.0382',
 'type': 'journal-article',
 'created': {'date-parts': [[2013, 7, 2]],
  'date-time': '2013-07-02T08:51:49Z',
  'timestamp': 1372755109000},
 'page': '382',
 'source': 'Crossref',
 'is-referenced-by-count': 0,
 'title': ['Humanism on Gallifrey'],
 'prefix': '10.1525',
 'volume': '40',
 'author': [{'family': 'Elizabeth Lundberg',
   'sequence': 'first',
   'affiliation': []}],
 'member': '408',
 'container-title': ['Science Fiction Studies'],
 'original-title': [],
 'deposited': {'date-parts': [[2025, 7, 25]],
  'date-time': '2025-07-25T20:47:51Z',
  'timestamp':

In [6]:
study['publisher']  # you can call keys directly on the JSON object

'University of California Press'

In [7]:
# normally, we could do something like this, but it won't work with this json structure
df = pd.DataFrame(study)

ValueError: All arrays must be of the same length

In [8]:
# Instead, we use json_normalize to flatten the JSON structure
df = pd.json_normalize(study)
df

Unnamed: 0,reference-count,publisher,issue,short-container-title,DOI,type,page,source,is-referenced-by-count,title,...,created.date-time,created.timestamp,deposited.date-parts,deposited.date-time,deposited.timestamp,resource.primary.URL,issued.date-parts,journal-issue.issue,journal-issue.published-print.date-parts,published.date-parts
0,0,University of California Press,2,[Science Fiction Studies],10.5621/sciefictstud.40.2.0382,journal-article,382,Crossref,0,[Humanism on Gallifrey],...,2013-07-02T08:51:49Z,1372755109000,"[[2025, 7, 25]]",2025-07-25T20:47:51Z,1753476471000,https://online.ucpress.edu/sfs/article/40/Part...,[[2013]],2,[[2013]],[[2013]]


In [9]:
df['title'][0]

['Humanism on Gallifrey']

In [10]:
type(df['title'][0])

list

In [11]:
# You might think we could just do this, but it won't work in upcoming versions of pandas
# https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

df['title'][0] = str(df['title'][0])

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df['title'][0] = str(df['title'][0])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['title'][0] = str(df['t

In [12]:
# We do this instead
df.loc[0, "title"] = str(df['title'][0]).replace('\'', '')  # Get rid of the single quotes around the title while we're at it
df

Unnamed: 0,reference-count,publisher,issue,short-container-title,DOI,type,page,source,is-referenced-by-count,title,...,created.date-time,created.timestamp,deposited.date-parts,deposited.date-time,deposited.timestamp,resource.primary.URL,issued.date-parts,journal-issue.issue,journal-issue.published-print.date-parts,published.date-parts
0,0,University of California Press,2,[Science Fiction Studies],10.5621/sciefictstud.40.2.0382,journal-article,382,Crossref,0,[Humanism on Gallifrey],...,2013-07-02T08:51:49Z,1372755109000,"[[2025, 7, 25]]",2025-07-25T20:47:51Z,1753476471000,https://online.ucpress.edu/sfs/article/40/Part...,[[2013]],2,[[2013]],[[2013]]


In [13]:
from crossref_commons.iteration import iterate_publications_as_json
DOIS = []
filter = {'member': '3444',}
for pub in iterate_publications_as_json(max_results=100, filter=filter):
  DOIS.append(pub['DOI'])

DOIS



['10.5962/bhl.title.49941',
 '10.5962/bhl.title.108915',
 '10.5962/bhl.title.94153',
 '10.5962/bhl.part.23369',
 '10.5479/si.00963801.30-1462.695',
 '10.5962/bhl.title.33142',
 '10.5962/p.313802',
 '10.5962/bhl.title.142240',
 '10.5962/bhl.title.42454',
 '10.5962/bhl.title.65831',
 '10.5479/si.00775630.29.1',
 '10.5479/si.00963801.1126.457',
 '10.5962/bhl.title.109197',
 '10.5962/bhl.title.63910',
 '10.5479/si.00810282.250',
 '10.5962/bhl.part.1065',
 '10.5479/si.00775630.93.1',
 '10.5962/bhl.title.33014',
 '10.5962/bhl.title.28774',
 '10.5962/bhl.title.140653',
 '10.5962/bhl.title.26910',
 '10.5962/bhl.part.4855',
 '10.5962/bhl.title.117805',
 '10.5962/bhl.title.14483',
 '10.5962/bhl.title.61362',
 '10.5962/bhl.title.147308',
 '10.5962/bhl.title.138321',
 '10.5962/bhl.title.104186',
 '10.5962/bhl.title.101143',
 '10.5962/bhl.title.55910',
 '10.5962/bhl.title.147959',
 '10.5962/bhl.title.130362',
 '10.5962/bhl.title.50454',
 '10.5962/bhl.title.146247',
 '10.5962/p.320447',
 '10.5962/bh

In [14]:
df = pd.json_normalize(crossref_commons.retrieval.get_publication_as_json('10.5962/bhl.title.49941'))
df


Unnamed: 0,publisher-location,reference-count,publisher,short-container-title,DOI,type,source,is-referenced-by-count,title,prefix,...,published-print.date-parts,created.date-parts,created.date-time,created.timestamp,deposited.date-parts,deposited.date-time,deposited.timestamp,resource.primary.URL,issued.date-parts,published.date-parts
0,London :,0,"H.G. Bohn,",[],10.5962/bhl.title.49941,monograph,Crossref,1,[A selection of the birds of Brazil and Mexico...,10.5962,...,[[1841]],"[[2011, 11, 23]]",2011-11-23T21:51:04Z,1322085064000,"[[2011, 11, 23]]",2011-11-23T21:51:19Z,1322085079000,http://www.biodiversitylibrary.org/bibliograph...,[[1841]],[[1841]]


In [17]:
for DOI in DOIS:
  response = crossref_commons.retrieval.get_publication_as_json(DOI)
  df = pd.concat([df, pd.json_normalize(response)], ignore_index=True)

df

Unnamed: 0,publisher-location,reference-count,publisher,short-container-title,DOI,type,source,is-referenced-by-count,title,prefix,...,journal-issue.published-print.date-parts,institution,license,abstract,subtype,posted.date-parts,editor,standards-body.name,standards-body.acronym,approved.date-parts
0,London :,0,"H.G. Bohn,",[],10.5962/bhl.title.49941,monograph,Crossref,1,[A selection of the birds of Brazil and Mexico...,10.5962,...,,,,,,,,,,
1,London :,0,"H.G. Bohn,",[],10.5962/bhl.title.49941,monograph,Crossref,1,[A selection of the birds of Brazil and Mexico...,10.5962,...,,,,,,,,,,
2,"Washington, D.C. :",0,"U.S. Dept. of Agriculture,",[],10.5962/bhl.title.108915,monograph,Crossref,1,[Land reclamation policies in the United State...,10.5962,...,,,,,,,,,,
3,"Portland, Or. :",0,Pacific Northwest Forest and Range Experiment ...,[],10.5962/bhl.title.94153,monograph,Crossref,2,[Pruning of ponderosa pine : effect on growth /],10.5962,...,,,,,,,,,,
4,,0,Smithsonian Institution,[],10.5962/bhl.part.23369,journal-article,Crossref,1,[Zwei neue südamerikanische Microdon-Arten],10.5962,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,"Torino,",0,"Giuseppe Cassone,",[],10.5962/bhl.title.6118,monograph,Crossref,0,"[Flora medico-farmaceutica, compilata dal dott...",10.5962,...,,,,,,,,,,
97,London :,0,"Chapman and Hall,",[],10.5962/bhl.title.25933,monograph,Crossref,0,[Sport / by W. Bromley-Davenport ; illustrated...,10.5962,...,,,,,,,,,,
98,"Upper Darby, Pa. :",0,"Northeastern Forest Experiment Station,",[],10.5962/bhl.title.84489,monograph,Crossref,0,[Trees-- helping to clean our air??],10.5962,...,,,,,,,,,,
99,"Marysville, California :",0,"Donald Morse Nursery,",[],10.5962/bhl.title.133707,monograph,Crossref,0,"[Price list : January 1, 1925 /]",10.5962,...,,,,,,,,,,


In [18]:
# Last bit of cleanup of duplicated first row:
df.drop(index=0, inplace=True)
df



Unnamed: 0,publisher-location,reference-count,publisher,short-container-title,DOI,type,source,is-referenced-by-count,title,prefix,...,journal-issue.published-print.date-parts,institution,license,abstract,subtype,posted.date-parts,editor,standards-body.name,standards-body.acronym,approved.date-parts
1,London :,0,"H.G. Bohn,",[],10.5962/bhl.title.49941,monograph,Crossref,1,[A selection of the birds of Brazil and Mexico...,10.5962,...,,,,,,,,,,
2,"Washington, D.C. :",0,"U.S. Dept. of Agriculture,",[],10.5962/bhl.title.108915,monograph,Crossref,1,[Land reclamation policies in the United State...,10.5962,...,,,,,,,,,,
3,"Portland, Or. :",0,Pacific Northwest Forest and Range Experiment ...,[],10.5962/bhl.title.94153,monograph,Crossref,2,[Pruning of ponderosa pine : effect on growth /],10.5962,...,,,,,,,,,,
4,,0,Smithsonian Institution,[],10.5962/bhl.part.23369,journal-article,Crossref,1,[Zwei neue südamerikanische Microdon-Arten],10.5962,...,,,,,,,,,,
5,,0,Smithsonian Institution,[],10.5479/si.00963801.30-1462.695,journal-article,Crossref,2,"[List of fishes collected on Tanega and Yaku, ...",10.5479,...,[[1906]],,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,"Torino,",0,"Giuseppe Cassone,",[],10.5962/bhl.title.6118,monograph,Crossref,0,"[Flora medico-farmaceutica, compilata dal dott...",10.5962,...,,,,,,,,,,
97,London :,0,"Chapman and Hall,",[],10.5962/bhl.title.25933,monograph,Crossref,0,[Sport / by W. Bromley-Davenport ; illustrated...,10.5962,...,,,,,,,,,,
98,"Upper Darby, Pa. :",0,"Northeastern Forest Experiment Station,",[],10.5962/bhl.title.84489,monograph,Crossref,0,[Trees-- helping to clean our air??],10.5962,...,,,,,,,,,,
99,"Marysville, California :",0,"Donald Morse Nursery,",[],10.5962/bhl.title.133707,monograph,Crossref,0,"[Price list : January 1, 1925 /]",10.5962,...,,,,,,,,,,


In [21]:
df.to_csv('darwin.csv', index=False)