In [1]:
import os
import requests
import json
import csv
import pandas as pd
from time import sleep
from dotenv import load_dotenv
load_dotenv()
import numpy as np

In [2]:
from pybliometrics.scopus import CitationOverview, AuthorRetrieval, AbstractRetrieval
from pybliometrics.scopus import config

print(config['Authentication']['APIKey'])

2e5922c95a0e4f1fb73cede25eca779a


In [87]:
PATH_TO_INPUT_CSV = "output.csv"
PUBLICATION_CUTOFF_YEAR = 2013 #only publications DURING and AFTER this year will be counted
START_YEAR = 2018
END_YEAR = 2023
YEARS_INCLUDED = END_YEAR - START_YEAR + 1 #year range count, inclusive

In [4]:
member_arr = []
with open(PATH_TO_INPUT_CSV, "r") as f:
    r = csv.reader(f)
    for row in r:
        if row[0] != "":
            member_arr.append(row)

# skip header rows
member_arr = member_arr[1:]

In [88]:
# prepare columns 
irp_dict = {
    "Mental Health & Addictions": 1, 
    "Brain Development & Neurodevelopmental Disorders": 2, 
    "Learning/Memory & Dementias": 3,
    "Sensory/Motor Systems & Movement Disorders": 4,
    "Brain Injury & Repair": 5,
}

titles = []
eids = []
publication_years = []

member_lastnames = []
member_firstnames = []
member_ids = []
member_irps = []

all_citations = []

In [104]:
def fill_article_rows(lastname, firstname, mem_id, mem_irp, documents):
    for document in documents:
        titles.append(document.title)
        eids.append(document.eid)
        articles_authors.append(document.author_names)
        publication_years.append(document.coverDate.split("-")[0])
        member_lastnames.append(lastname)
        member_firstnames.append(firstname)
        member_ids.append(mem_id)
        member_irps.append(mem_irp)

In [105]:
def get_all_citations(documents, all_citations):
    yearly_citations_data = []

    #scopus ID is everything after 2nd hyphen for the eid
    scopus_id_list = [document.eid.split("-",2)[2] for document in documents]
    print(scopus_id_list)

    pages = int(len(documents)/25)
    
    if pages == 0:
        print(f"gathering citations")
        co = CitationOverview(scopus_id_list, start=START_YEAR, end=END_YEAR)
        yearly_citations_data = co.cc
        api_urls.append(co.url)
    elif len(documents)%25 == 0:
        urls = []
        for page in range(pages):
            print(f"gathering citations for page {page}/{pages-1}")
            list_per_page = scopus_id_list[(25*page):(25*(page+1))]
            co = CitationOverview(list_per_page, start=START_YEAR, end=END_YEAR)
            yearly_citations_data = yearly_citations_data + co.cc
            urls = urls + co.url
        api_urls.append(urls)
    else:
        urls = []
        for page in range(pages+1):
            print(f"gathering citations for page {page}/{pages}")
            list_per_page = scopus_id_list[(25*page):(25*(page+1))]
            co = CitationOverview(list_per_page, start=START_YEAR, end=END_YEAR)
            yearly_citations_data = yearly_citations_data + co.cc
            urls = urls + co.url
        api_urls.append(urls)
        

    return yearly_citations_data

In [106]:
titles = []
eids = []
# scopus_urls = []
api_urls = []
publication_years = []
articles_authors = []

member_lastnames = []
member_firstnames = []
member_ids = []
member_irps = []

all_citations = []


for member in member_arr:
    sleep(1)
    lastname = member[0]
    firstname = member[1]
    mem_id = member[3]
    mem_irp = member[5]

    print(f"looking up {firstname} {lastname} ({mem_id}) in {mem_irp} IRP...")

    documents = AuthorRetrieval(mem_id).get_documents()

    #get documents that are older than PUBLICATION CUTOFF YEAR
    adjusted_docs = [document for document in documents if int(document.coverDate.split("-")[0]) >= PUBLICATION_CUTOFF_YEAR]

    print(f"found {len(adjusted_docs)} publications between {PUBLICATION_CUTOFF_YEAR} and now")
    
    if len(adjusted_docs) == 0:
        print("no documents found in year range")
        pass
    else:
        citations_data = get_all_citations(adjusted_docs, all_citations)
        author_citations = [[citation[year][1] for year in range(YEARS_INCLUDED)] for citation in citations_data]
        all_citations = all_citations + author_citations

        fill_article_rows(lastname, firstname, mem_id, mem_irp, adjusted_docs)
        print("done\n")

looking up Khaled Abd-Elrahman (36653563400) in Learning Memory & Dementias IRP...
found 30 publications between 2013 and now
['85163237613', '85147216812', '85141459709', '85138088523', '85141347606', '85130001876', '85128779382', '85124094229', '85124732889', '85123390051', '85118903573', '85100384698', '85105564385', '85098131416', '85090179072', '85090178131', '85088850670', '85075346550', '85070517518', '85069845225', '85063075055', '85064132864', '85050820157', '85045196406', '85039172923', '85014735935', '85006511789', '84991712767', '84929705321', '84924130733']
gathering citations for page 0/1
gathering citations for page 1/1
done

looking up Douglas Allan (7201584872) in Brain Development & Neurodevelopmental Disorders IRP...
found 22 publications between 2013 and now
['85152266156', '85139375385', '85126148554', '85114784524', '85083991959', '85095703281', '85060597878', '85058922286', '85045248206', '85040763404', '85036587539', '85014039981', '85013869617', '85017040282', 

In [108]:
print(api_urls)
#  https://api.elsevier.com/content/abstract/scopus_id/

[['https://api.elsevier.com/content/abstract/scopus_id/85163237613', 'https://api.elsevier.com/content/abstract/scopus_id/85147216812', 'https://api.elsevier.com/content/abstract/scopus_id/85141459709', 'https://api.elsevier.com/content/abstract/scopus_id/85138088523', 'https://api.elsevier.com/content/abstract/scopus_id/85141347606', 'https://api.elsevier.com/content/abstract/scopus_id/85130001876', 'https://api.elsevier.com/content/abstract/scopus_id/85128779382', 'https://api.elsevier.com/content/abstract/scopus_id/85124094229', 'https://api.elsevier.com/content/abstract/scopus_id/85124732889', 'https://api.elsevier.com/content/abstract/scopus_id/85123390051', 'https://api.elsevier.com/content/abstract/scopus_id/85118903573', 'https://api.elsevier.com/content/abstract/scopus_id/85100384698', 'https://api.elsevier.com/content/abstract/scopus_id/85105564385', 'https://api.elsevier.com/content/abstract/scopus_id/85098131416', 'https://api.elsevier.com/content/abstract/scopus_id/8509017

In [109]:
csv_dict = {
    "title": titles,
    "eid": eids,
    "publication year": publication_years,
    "authors": articles_authors,
    
    "last name": member_lastnames,
    "first name": member_firstnames,
    "member id": member_ids, 
    "irp": member_irps,
}

citations_array = np.array(all_citations)
citations_array = citations_array.T

for key in csv_dict:
    print(f"{key}: {len(csv_dict[key])}")

for year in range(YEARS_INCLUDED):
    year_col = START_YEAR + year
    print(year_col)
    csv_dict[str(year_col)] = citations_array[year].tolist()

for key in csv_dict:
    print(f"{key}: {len(csv_dict[key])}")

df = pd.DataFrame(csv_dict)
df.to_csv("citations_per_year_authorsURL.csv")

title: 7833
eid: 7833
publication year: 7833
authors: 7833
last name: 7833
first name: 7833
member id: 7833
irp: 7833
2018
2019
2020
2021
2022
2023
title: 7833
eid: 7833
publication year: 7833
authors: 7833
last name: 7833
first name: 7833
member id: 7833
irp: 7833
2018: 7833
2019: 7833
2020: 7833
2021: 7833
2022: 7833
2023: 7833


In [None]:
documents = AuthorRetrieval(36653563400).get_documents()
docs_eids = AuthorRetrieval(36653563400).get_document_eids()
# for document in documents:
    

In [None]:
#print(docs_eids)
for document in documents:
    print(document.coverDate)

adjusted_docs = [document for document in documents if int(document.coverDate.split("-")[0]) >= PUBLICATION_CUTOFF_YEAR]
print(adjusted_docs)
scopus_id_list = [eid.split("-",2)[2] for eid in docs_eids]
print(scopus_id_list)

In [None]:
print()

In [None]:
co = CitationOverview(scopus_id_list[0:25], start=2018, end=2023)

In [None]:
article_yeardata = co.cc

In [None]:
co2 = CitationOverview(scopus_id_list[25:], start=2018, end=2023)
print(co2.cc)

In [None]:
all_citations = co.cc + co2.cc
#print(len(documents))
#print(documents[25:50])
#print(len(documents[25:50]))
print(all_citations[0])
print(all_citations[0][5][1])
print(YEARS_INCLUDED)
print([[citation[year][1] for year in range(YEARS_INCLUDED)] for citation in all_citations])

print(len([[[citation[year][1]] for year in range(YEARS_INCLUDED)] for citation in all_citations]))

In [None]:
citation_test = np.array([[citation[year][1] for year in range(YEARS_INCLUDED)] for citation in all_citations])
print(citation_test.T)
dict = {"a":0}
dict[f"{START_YEAR}"] = citation_test[0].tolist()
print(dict)

In [83]:
API_KEY = os.getenv("OFFICIAL_API_KEY")
API_COUNT = 25
header = {
    "X-ELS-APIKey": API_KEY,
}
api_url = "https://api.elsevier.com/content/search/scopus?"

def get_api_json(query: str, start=0):
    params = {
        "query":query,
    }
    try:
        response = requests.get(url=api_url, params=params, headers=header)
        response.raise_for_status
        data = json.loads(response.text)
    except requests.exceptions.HTTPError as e:
        print("HTTPS error: ", e)
    except requests.exceptions.RetryError as e:
        print("Max retries exceeded: ", e)
    return data

In [None]:
query = f"au-id(36653563400)"
data = get_api_json(query, start=0)
print(data)

In [84]:
query = f"au-id(36653563400)"
data = get_api_json(query, start=0)
print(data)

{'search-results': {'opensearch:totalResults': '34', 'opensearch:startIndex': '0', 'opensearch:itemsPerPage': '25', 'opensearch:Query': {'@role': 'request', '@searchTerms': 'au-id(36653563400)', '@startPage': '0'}, 'link': [{'@_fa': 'true', '@ref': 'self', '@href': 'https://api.elsevier.com/content/search/scopus?start=0&count=25&query=au-id%2836653563400%29', '@type': 'application/json'}, {'@_fa': 'true', '@ref': 'first', '@href': 'https://api.elsevier.com/content/search/scopus?start=0&count=25&query=au-id%2836653563400%29', '@type': 'application/json'}, {'@_fa': 'true', '@ref': 'next', '@href': 'https://api.elsevier.com/content/search/scopus?start=25&count=25&query=au-id%2836653563400%29', '@type': 'application/json'}, {'@_fa': 'true', '@ref': 'last', '@href': 'https://api.elsevier.com/content/search/scopus?start=9&count=25&query=au-id%2836653563400%29', '@type': 'application/json'}], 'entry': [{'@_fa': 'true', 'link': [{'@_fa': 'true', '@ref': 'self', '@href': 'https://api.elsevier.c

In [85]:
data_str = json.dumps(data["search-results"]["entry"][0], indent=4)
print(data_str)

{
    "@_fa": "true",
    "link": [
        {
            "@_fa": "true",
            "@ref": "self",
            "@href": "https://api.elsevier.com/content/abstract/scopus_id/85163237613"
        },
        {
            "@_fa": "true",
            "@ref": "author-affiliation",
            "@href": "https://api.elsevier.com/content/abstract/scopus_id/85163237613?field=author,affiliation"
        },
        {
            "@_fa": "true",
            "@ref": "scopus",
            "@href": "https://www.scopus.com/inward/record.uri?partnerID=HzOxMe3b&scp=85163237613&origin=inward"
        },
        {
            "@_fa": "true",
            "@ref": "scopus-citedby",
            "@href": "https://www.scopus.com/inward/citedby.uri?partnerID=HzOxMe3b&scp=85163237613&origin=inward"
        }
    ],
    "prism:url": "https://api.elsevier.com/content/abstract/scopus_id/85163237613",
    "dc:identifier": "SCOPUS_ID:85163237613",
    "eid": "2-s2.0-85163237613",
    "dc:title": "VGLUT3 Deletion Re