In [2]:
import requests
import pathlib
import yaml
import re
import os
import sys

from json import load, dump
from typing import Set
from os.path import join, exists, basename
from tqdm import tqdm

In [18]:
with open('./resources/supernova.yaml', 'r') as fp:
    data = yaml.safe_load(fp)

In [26]:
bibtex_key_to_name = {}
for item in data:
    bibtex_key_to_name[item['bibtexKey']] = item['name']

In [3]:
with open('./notebook-va.bib', 'r') as fp:
    lines = [l for l in fp.readlines()]

bibtex_entries = []
cur_entry = ''
for line in lines:
    if line == '\n':
       bibtex_entries.append(cur_entry)
       cur_entry = ''

    else:
        cur_entry += line

bibtex_entries.append(cur_entry)

# Create a dictionary that maps bibtex key to the entry
bibtex_dict = {}

for i, entry in enumerate(bibtex_entries):
    first_line = entry.split('\n')[0]
    key = re.sub(r'^@.+{(.+),.*$', r'\1', first_line)

    # Also parse the year
    year = int(re.sub(r'^.*(\d{4})[a-z]?$', r'\1', key))

    bibtex_dict[key] = {
        'bibtex': entry,
        'year': year
    }

len(bibtex_entries)

193

In [60]:
doi_map = {}
year_map = {}

for entry in bibtex_entries:
    first_line = entry.split('\n')[0]
    key = re.sub(r'^@.+{(.+),.*$', r'\1', first_line)
    year = int(re.sub(r'^.*(\d{4})[a-z]?$', r'\1', key))

    if key not in bibtex_key_to_name:
        continue

    name = bibtex_key_to_name[key]
    year_map[name] = year

    match = re.search(r'.*doi = {(.*?)}.*', entry)
    if match:
        doi = match.group(1)
        doi_map[name] = doi
        continue
    else:
        match = re.search(r'.*url = {(.*?)}.*', entry)
        if match:
            url = match.group(1)
            if 'arxiv' in url:
                arxiv = 'arXiv:' + url.split('/')[-1]
                doi_map[name] = arxiv

In [30]:
len(doi_map)

81

## Query Citation Count

In [48]:
url = 'https://api.semanticscholar.org/graph/v1/paper/'

In [54]:
# Query citation count from semantic scholar
citation_map = {}
failed_dois = []

for name in doi_map:
    doi = doi_map[name]
    cur_url = url + doi
    parameter = {
        'fields': 'citationCount'
    }
    response = requests.get(cur_url, parameter).json()

    if 'citationCount' in response:
        count = response['citationCount']
        citation_map[name] = count
    else:
        failed_dois.append(doi)

In [58]:
for doi in failed_dois:
    cur_name = ''
    for name in doi_map:
        if doi_map[name] == doi:
            cur_name = name
            break

    print(cur_name, doi)
    # print(f"citation_map['{cur_name}'] = 0")

networkit 10.1109/IPDPSW55747.2022.00055
mols2grid 10.5281/zenodo.6591473
ivpy 10.11588/DAH.2019.4.66401
cytoscapejs 10.5281/ZENODO.6828253
geopandas 10.5281/ZENODO.7422493
dea-tools 10.26186/145234
shap 10.48550/arXiv.1705.07874
itkwidgets 10.5281/ZENODO.7489693
data+shift 10.2312/EVS.20221097
tissuumaps 10.1101/2022.01.28.478131


In [59]:
# Manually add citation for failed papers
citation_map['networkit'] = 1
citation_map['mols2grid'] = 0
citation_map['ivpy'] = 6
citation_map['cytoscapejs'] = 0
citation_map['geopandas'] = 0
citation_map['dea-tools'] = 0
citation_map['shap'] = 11652
citation_map['itkwidgets'] = 0
citation_map['data+shift'] = 3
citation_map['tissuumaps'] = 0

In [61]:
citation_info = {}

for name in citation_map:
    citation_info[name] = {
        'citation': citation_map[name],
        'year': year_map[name]
    }

In [63]:
dump(citation_info, open('./resources/citation_info.json', 'w'))