# Sample codes to interact with the Scholarly toolbox

In [None]:
# check if the scholarly module is installed, if not install it from
# pip3 install -U git+https://github.com/scholarly-python-package/scholarly.git

import importlib
schoarly_loader = importlib.util.find_spec('scholarly')
found = schoarly_loader is not None
if not found:
    import pip
    pip.main(['install', '-U', 'git+https://github.com/scholarly-python-package/scholarly.git'])

from scholarly import scholarly
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

In [None]:
# load the dataset names into a list
dataset_names = pd.read_csv("directories_list.txt", header=None)  # read the dataset names from the file
dataset_names = dataset_names[0].tolist()  # convert the dataframe to a list

In [None]:
# Using Scholarly, get the number of citations that contain the dataset and save it to a Series
citations = pd.Series()
for dataset in dataset_names:
    search_query = scholarly.search_pubs(dataset)
    citations[dataset] = search_query.total_results
    print(dataset, citations[dataset])

In [None]:
# In case the search fails continue again from the last dataset
# Note: The query will fail every three minutes, need to change the IP address before continuing

last_dataset = citations.index[-1]
last_dataset_index = dataset_names.index(last_dataset)
dataset_names = dataset_names[last_dataset_index + 1:]

# Using Scholarly, get the number of citations that contain the dataset and save it to a Series
for dataset in dataset_names:
    search_query = scholarly.search_pubs(dataset)
    citations[dataset] = search_query.total_results
    print(dataset, citations[dataset])

In [None]:
# save the citations to a csv file
citations.to_csv("citations.csv",header=['number_of_citations '], index_label='dataset_id')

## Sample script to use `getCitations` module to retrieve citation number and citation details

In [1]:
import getCitations
dataset = "ds003374"

# Number of Citations
citation_nums = getCitations.get_citation_numbers(dataset)

# Citation Details
if citation_nums != 0:
    citations = getCitations.get_citations(dataset, citation_nums)

print(citations.keys())

  citations = citations.append({'title': entry['bib']['title'],
  citations = citations.append({'title': entry['bib']['title'],
  citations = citations.append({'title': entry['bib']['title'],


Index(['title', 'author', 'venue', 'year', 'url', 'cited_by', 'bib'], dtype='object')


  citations = citations.append({'title': entry['bib']['title'],


In [2]:
citations

Unnamed: 0,title,author,venue,year,url,cited_by,bib
0,Dataset of spiking and LFP activity invasively...,"[T Fedele, E Boran, V Chirkov, P Hilfiker, T G...",Scientific data,2021.0,https://www.nature.com/articles/s41597-020-007...,6,{'title': 'Dataset of spiking and LFP activity...
1,Open multimodal iEEG-fMRI dataset from natural...,"[J Berezutskaya, MJ Vansteensel, EJ Aarnoutse]",Scientific Data,2022.0,https://www.nature.com/articles/s41597-022-011...,11,{'title': 'Open multimodal iEEG-fMRI dataset f...
2,Epileptic high-frequency oscillations in intra...,"[E Boran, L Stieglitz, J Sarnthein]",Frontiers in Human Neuroscience,2021.0,https://www.frontiersin.org/articles/10.3389/f...,8,{'title': 'Epileptic high-frequency oscillatio...
3,Dataset of spiking and LFP activity invasively...,"[L Stieglitz, H Jokeit, J Sarnthein]",,,,0,{'title': 'Dataset of spiking and LFP activity...


## Sample commands to save the citations into a Pickle or CSV

In [None]:
citations.to_pickle("citations/"+dataset+".pkl")

In [None]:
import pandas as pd
loaded_cites = pd.read_pickle("citations/ds000117.pkl")

In [None]:
loaded_cites.to_excel("citations/ds000117.xlsx")