In [6]:
import bibtexparser
import requests
from bs4 import BeautifulSoup

def get_clickable_link(arxiv_id):
    base_url = "http://export.arxiv.org/api/query"
    params = {"id_list": arxiv_id}
    response = requests.get(base_url, params=params)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        # Search for the journal_ref tag in the Atom feed
        journal_ref = soup.find('arxiv:doi')
        if journal_ref:
            doi_link = f"https://doi.org/{journal_ref.text.strip()}"
            return f'<a href="{doi_link}" target="_blank">{doi_link}</a>'
        else:
            arxiv_link = f"https://arxiv.org/abs/{arxiv_id}"
            return f'<a href="{arxiv_link}" target="_blank">{arxiv_link}</a>'
    else:
        print("Failed to retrieve data from arXiv")
        return None

def get_inspirehep_entry(doi):
    url = f'https://inspirehep.net/api/literature?q=doi:{doi}'
    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()
        # Simplified: Assume first record is the correct one and extract BibTeX
        # You might need to adjust based on the actual structure of INSPIRE-HEP's response
        # print(data)
        search_data = response.json()
        if len(search_data['hits']['hits'])>0:
            bibtex_url = search_data['hits']['hits'][0]['links']['bibtex']
            bibtex_response = requests.get(bibtex_url)

            return bibtex_response.text

    else:
        print("Failed to retrieve data from INSPIRE-HEP")
        print(response.status_code)
    return None

def get_arxiv_journal_version(arxiv_id):
    base_url = "http://export.arxiv.org/api/query"
    params = {"id_list": arxiv_id}
    response = requests.get(base_url, params=params)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        doi = soup.find('arxiv:doi')
        if doi:
            doi_text = doi.text.strip()
            doi_link = f"https://doi.org/{doi_text}"
            return {'link': doi_link, 'type': 'DOI'}
        else:
            arxiv_link = f"https://arxiv.org/abs/{arxiv_id}"
            return {'link': arxiv_link, 'type': 'arXiv'}
    else:
        print(response)
        print("Failed to retrieve data")
        return None
def add_arxiv_url_to_entry(entry):
    if 'eprint' in entry and entry.get('archivePrefix', '').lower() == 'arxiv':
        arxiv_id = entry['eprint']
        arxiv_url = f'https://arxiv.org/abs/{arxiv_id}'
        entry['url'] = arxiv_url  # Add or update the URL field with the arXiv link

def clean_entry(entry):
    # This function ensures that only the 'doi' field is set if available, otherwise, set the URL to the arXiv link
    if 'doi' in entry:
        entry['url'] = f"https://doi.org/{entry['doi']}"
    elif 'eprint' in entry and entry.get('archivePrefix', '').lower() == 'arxiv':
        arxiv_info = get_arxiv_journal_version(entry['eprint'])
        if arxiv_info:
            entry['url'] = arxiv_info['link']
            if arxiv_info['type'] == 'DOI':
                entry['doi'] = arxiv_info['link'].split('/')[-1]
    for key in ['bdsk-url-1', 'eprint']:
        if key in entry:
            del entry[key]

with open("bibtex.bib") as bibtex_file:
    bib_database = bibtexparser.load(bibtex_file)
i=0
j=0
inspireheps = []
arxivs=[]
for entry in bib_database.entries:
    original_id = entry.get('ID', None)
    if 'doi' in entry:
        updated_entry = get_inspirehep_entry(entry['doi'])

        if updated_entry:
            i+=1
            print("success doi")
            entry.update(bibtexparser.loads(updated_entry).entries[0])

            inspireheps.append(entry)
    elif 'eprint' in entry:

        if entry.get('archiveprefix', '').lower() == 'arxiv':
            updated_entry = get_arxiv_journal_version(entry['eprint'])
            entry.update(updated_entry)

            if updated_entry:
                j+=1
                print("success eprint")
                arxivs.append(entry)
            else:
                print("no updated eprint")
        else:
            print("no eprint")
    print("no eprint nor doi")
    if original_id:
        entry['ID'] = original_id  # Ensure original ID is preserved
   # clean_entry(entry)  # Clean
    if i%5==0:
        import time
        time.sleep(2)

    # Write updated BibTeX
with open('updated_arxiv_references.bib', 'w') as bibtex_file:
    bibtexparser.dump(bib_database, bibtex_file)



Entry type software not standard. Not considered.
Entry type online not standard. Not considered.


success doi
no eprint nor doi


  k = self.parse_starttag(i)


success eprint
no eprint nor doi
no eprint nor doi
no eprint nor doi
no eprint nor doi
no eprint nor doi
no eprint nor doi
success doi
no eprint nor doi
no eprint nor doi
no eprint nor doi
no eprint
no eprint nor doi
success doi
no eprint nor doi
no eprint
no eprint nor doi
no eprint nor doi
no eprint
no eprint nor doi
success doi
no eprint nor doi
success doi
no eprint nor doi
success doi
no eprint nor doi
no eprint nor doi
success eprint
no eprint nor doi
success doi
no eprint nor doi
success doi
no eprint nor doi
success doi
no eprint nor doi
success doi
no eprint nor doi
success eprint
no eprint nor doi
success doi
no eprint nor doi
success eprint
no eprint nor doi
success eprint
no eprint nor doi
success eprint
no eprint nor doi
success eprint
no eprint nor doi
no eprint nor doi
success eprint
no eprint nor doi
no eprint nor doi
success eprint
no eprint nor doi
success eprint
no eprint nor doi
no eprint nor doi
no eprint nor doi
no eprint nor doi
no eprint nor doi
no eprint nor do

In [4]:
inspireheps[0]

'@article{Feickert:2021ajf,\n    author = "Feickert, Matthew and Nachman, Benjamin",\n    title = "{A Living Review of Machine Learning for Particle Physics}",\n    eprint = "2102.02770",\n    archivePrefix = "arXiv",\n    primaryClass = "hep-ph",\n    month = "2",\n    year = "2021"\n}\n'

In [9]:
arxivs

[{'link': 'https://arxiv.org/abs/1502.03509', 'type': 'arXiv'},
 {'link': 'https://arxiv.org/abs/1312.6114', 'type': 'arXiv'},
 {'link': 'https://arxiv.org/abs/1701.07875', 'type': 'arXiv'},
 {'link': 'https://arxiv.org/abs/1410.5401', 'type': 'arXiv'},
 {'link': 'https://arxiv.org/abs/1508.04025', 'type': 'arXiv'},
 {'link': 'https://arxiv.org/abs/1409.0473', 'type': 'arXiv'},
 {'link': 'https://arxiv.org/abs/1606.00709', 'type': 'arXiv'},
 {'link': 'https://arxiv.org/abs/1502.03044', 'type': 'arXiv'},
 {'link': 'https://arxiv.org/abs/1901.09006', 'type': 'arXiv'},
 {'link': 'https://arxiv.org/abs/1703.06114', 'type': 'arXiv'},
 {'link': 'https://arxiv.org/abs/2305.15254', 'type': 'arXiv'},
 {'link': 'https://arxiv.org/abs/1912.02292', 'type': 'arXiv'},
 {'link': 'https://arxiv.org/abs/1312.6114', 'type': 'arXiv'},
 {'link': 'https://arxiv.org/abs/1802.05957', 'type': 'arXiv'},
 {'link': 'https://arxiv.org/abs/1602.07868', 'type': 'arXiv'},
 {'link': 'https://arxiv.org/abs/1704.03971'

In [7]:
from bibtexparser.bibdatabase import BibDatabase
bib_database = BibDatabase()
# Populate the database with your entries
bib_database.entries = inspireheps

bibtex_str = bibtexparser.dumps(bib_database)

# # Save the BibTeX string to a file
with open('updated_inspirehep.bib', 'w') as bibtex_file:
    bibtex_file.write(bibtex_str)

# bib_database = BibDatabase()
# # Populate the database with your entries
# bib_database.entries = arxivs
# bibtex_str = bibtexparser.dumps(bib_database)

# # Save the BibTeX string to a file
# with open('updated_arrxiv.bib', 'w') as bibtex_file:
#     bibtex_file.write(bibtex_str)

In [5]:
import pandas as pd
import numpy as np

# test data
data = [ [ 13471,  "2090.2091.2139.2169", 	[2090, 2091, 2139, 2169]],
         [ 27898,  "2090.2091.2141.2181",	[2090, 2091, 2141, 2181, ]],
         [ 107921, "2090.2091.2139.2169",	[2090, 2091, 2139, 2169, ]],
         [ 120472, "2090.2091.2139.2169",	[2090, 2091, 2140, 2180, ]]
       ]

df = pd.DataFrame(data, columns=['ArtikelNr', 'WGrStr', "WGrList"])
# suche alle Zeilen die den gesuchten Wert  ein Wert
x = 2181

x = 2139
print( "Suche 2139" ,  df[["ArtikelNr","WGrStr","WGrList"]][df['WGrList'].map( lambda z:x in z)  ])

x = 2090
print( "Suche 2090" ,  df[["ArtikelNr","WGrStr","WGrList"]][df['WGrList'].map( lambda z:x in z)  ])
# erweitere die Suche mit einer Liste  - suche alle Zeilen die mindestens einen Wert aus der Liste enthalten
x = [2181, 2180,2090]
print( "Suche 2181" ,  df[["ArtikelNr","WGrStr","WGrList"]][df['WGrList'].apply( lambda z: any( [temp in z for temp in x] ) )  ])




Suche 2139    ArtikelNr               WGrStr                   WGrList
0      13471  2090.2091.2139.2169  [2090, 2091, 2139, 2169]
2     107921  2090.2091.2139.2169  [2090, 2091, 2139, 2169]
Suche 2090    ArtikelNr               WGrStr                   WGrList
0      13471  2090.2091.2139.2169  [2090, 2091, 2139, 2169]
1      27898  2090.2091.2141.2181  [2090, 2091, 2141, 2181]
2     107921  2090.2091.2139.2169  [2090, 2091, 2139, 2169]
3     120472  2090.2091.2139.2169  [2090, 2091, 2140, 2180]
Suche 2181    ArtikelNr               WGrStr                   WGrList
0      13471  2090.2091.2139.2169  [2090, 2091, 2139, 2169]
1      27898  2090.2091.2141.2181  [2090, 2091, 2141, 2181]
2     107921  2090.2091.2139.2169  [2090, 2091, 2139, 2169]
3     120472  2090.2091.2139.2169  [2090, 2091, 2140, 2180]


In [None]:
print( "Suche 2181 oder 2080" ,  df[["ArtikelNr","WGrStr","WGrList"]][df['WGrList'].map( lambda ??????????  ])


In [29]:
with open('updated_references.bib', 'w') as bibtex_file:
    bibtexparser.dump(bib_database, bibtex_file)