# Retrieve published versions of articles on arXiv with `pyzotero`

## Load entire Zotero library

In [194]:
from pyzotero import zotero
zot = zotero.Zotero(library_id, 'user', api_key)
items = zot.everything(zot.top())

## Finds arXiv preprints in library

In [195]:
preprints = []

for item in items:
    if 'publicationTitle' in item['data']:
        if (
            any('arxiv' in str for str in [item['data']['publicationTitle'].lower(), item['data']['url'].lower()]) and 
            (' ' not in item['data']['url'])
        ):
            preprints.append(item)

## Scrape URLs for DOIs

In [204]:
import requests
import re
import textwrap

regex = 'name="citation_doi" content=(.*?)/>'
pattern = re.compile(regex)


new = {}
keys, titles, DOIs = [], [], []
for item in preprints:
    r = requests.get(item['data']['url'])
    match = re.search(pattern, r.text)
    if match:
        DOI = match[1].replace('"', '').strip()
        new[item['data']['key']] = DOI
        keys.append(item['data']['key'])
        titles.append(item['data']['title'])
        DOIs.append(DOI)

In [205]:
print(f'{len(new)} DOIs have been found among {len(preprints)} preprints.')
print('\n')
print('\n'.join(f'{key:<9} {textwrap.shorten(title, width=60):<60} {DOI:<30}' for key, title, DOI in zip(keys, titles, DOIs)))

18 DOIs have been found among 33 preprints.


PH2Z6HN3  Crossover from Fabry-Pérot to charging oscillations in [...] 10.1103/PhysRevLett.125.187701
DBX4GU48  Cavity Quantum Electrodynamics with Second-Order [...]       10.1002/lpor.201900425        
CIJHD52N  Four-wave mixing dynamics of a strongly coupled [...]        10.1103/PhysRevB.101.245301   
WV7YVYQA  Flux-mediated optomechanics with a transmon qubit in [...]   10.1103/PhysRevResearch.2.023335
5HQGSXL6  Strong coupling between a single quantum dot and an [...]    10.35848/1882-0786/aba7a8     
YPFNY6J4  Quantum jump approach to microscopic heat engines            10.1103/PhysRevResearch.2.033449
HPVJIM5D  A Double Quantum Dot Spin Valve                              10.1038/s42005-020-00405-2    
AM37R9KQ  Waveguide Quantum Electrodynamics with Giant [...]           10.1038/s41586-020-2529-9     
9UTDNZWQ  Machine learning enables completely automatic tuning [...]   10.1038/s41467-020-17835-9    
44NST65M  Hybrid superconductor-

## Update items in library

In [198]:
success = []
for key, DOI in new.items():
    to_update = zot.item(key)
    to_update['data']['DOI'] = DOI
    success.append(zot.update_item(to_update))
    if all(success):
        print('All preprints have been updated with new DOI.')
    else:
        pass
    

{'PH2Z6HN3': '10.1103/PhysRevLett.125.187701',
 'DBX4GU48': '10.1002/lpor.201900425',
 'CIJHD52N': '10.1103/PhysRevB.101.245301',
 'WV7YVYQA': '10.1103/PhysRevResearch.2.023335',
 '5HQGSXL6': '10.35848/1882-0786/aba7a8',
 'YPFNY6J4': '10.1103/PhysRevResearch.2.033449',
 'HPVJIM5D': '10.1038/s42005-020-00405-2',
 'AM37R9KQ': '10.1038/s41586-020-2529-9',
 '9UTDNZWQ': '10.1038/s41467-020-17835-9',
 '44NST65M': '10.1063/5.0004777',
 'M39R38QF': '10.1016/j.scib.2020.10.005',
 'ZCD3D237': '10.1103/PhysRevB.102.104503',
 'LVED9S8B': '10.1103/PhysRevLett.125.237701',
 'NLFCSQ4A': '10.1103/PhysRevResearch.2.033040',
 'D7IJT7BU': '10.1103/PhysRevResearch.2.043075',
 'UXEJ3YWN': '10.22331/q-2020-09-28-335',
 'U5XKSL9T': '10.1126/science.abb9811',
 'NC93WPG9': '10.22331/q-2020-10-09-340'}