In [None]:
import os
import requests
import json
import csv
import pandas as pd
from time import sleep
from dotenv import load_dotenv
load_dotenv()
import numpy as np

In [None]:
from pybliometrics.scopus import CitationOverview, AuthorRetrieval, AbstractRetrieval


#to confirm API key used
from pybliometrics.scopus import config
#print(config['Authentication']['APIKey']) 

In [None]:
PATH_TO_INPUT_CSV = "output.csv"
PUBLICATION_CUTOFF_YEAR = 2013 #only publications DURING and AFTER this year will be counted
START_YEAR = 2018
END_YEAR = 2023
YEARS_INCLUDED = END_YEAR - START_YEAR + 1 #year range count, inclusive

In [None]:
member_arr = []
with open(PATH_TO_INPUT_CSV, "r") as f:
    r = csv.reader(f)
    for row in r:
        if row[0] != "":
            member_arr.append(row)

# skip header rows
member_arr = member_arr[1:]

In [None]:
# prepare columns 
output_cols = ["title", "eid", "publication year", "authors", "last name", "first name", "member id", "citations"]

output_dict = {f"{output_cols[i]}" for i in output_cols}

titles = []
eids = []
publication_years = []
articles_authors = []

member_lastnames = []
member_firstnames = []
member_ids = []
member_irps = []

all_citations = []

In [None]:
def fill_article_rows(lastname, firstname, mem_id, documents):
    for document in documents:
        titles.append(document.title)
        eids.append(document.eid)
        articles_authors.append(document.author_names)
        publication_years.append(document.coverDate.split("-")[0])
        member_lastnames.append(lastname)
        member_firstnames.append(firstname)
        member_ids.append(mem_id)

In [None]:
def get_all_citations(documents):
    '''return yearly citation data for the documents'''
    yearly_citations_data = []

    #scopus ID is everything after 2nd hyphen for the eid
    scopus_id_list = [document.eid.split("-",2)[2] for document in documents]
    print(scopus_id_list)

    pages = int(len(documents)/25)
    
    if pages == 0:
        print(f"gathering citations")
        co = CitationOverview(scopus_id_list, start=START_YEAR, end=END_YEAR)
        yearly_citations_data = co.cc
    elif len(documents)%25 == 0:
        urls = []
        for page in range(pages):
            print(f"gathering citations for page {page}/{pages-1}")
            list_per_page = scopus_id_list[(25*page):(25*(page+1))]
            co = CitationOverview(list_per_page, start=START_YEAR, end=END_YEAR)
            yearly_citations_data = yearly_citations_data + co.cc
            urls = urls + co.url
    else:
        urls = []
        for page in range(pages+1):
            print(f"gathering citations for page {page}/{pages}")
            list_per_page = scopus_id_list[(25*page):(25*(page+1))]
            co = CitationOverview(list_per_page, start=START_YEAR, end=END_YEAR)
            yearly_citations_data = yearly_citations_data + co.cc
            urls = urls + co.url
        
    return yearly_citations_data

In [None]:
titles = []
eids = []
# scopus_urls = []
api_urls = []
publication_years = []
articles_authors = []

member_lastnames = []
member_firstnames = []
member_ids = []

all_citations = []

for member in member_arr:
    sleep(1)
    lastname = member[0]
    firstname = member[1]
    mem_id = member[3]

    print(f"looking up {firstname} {lastname} ({mem_id})...")

    documents = AuthorRetrieval(mem_id).get_documents()

    #get documents that are older than PUBLICATION CUTOFF YEAR
    adjusted_docs = [document for document in documents if int(document.coverDate.split("-")[0]) >= PUBLICATION_CUTOFF_YEAR]

    print(f"found {len(adjusted_docs)} publications between {PUBLICATION_CUTOFF_YEAR} and now")
    
    if len(adjusted_docs) == 0:
        print("no documents found in year range")
        pass
    else:
        citations_data = get_all_citations(adjusted_docs, all_citations)
        author_citations = [[citation[year][1] for year in range(YEARS_INCLUDED)] for citation in citations_data]
        all_citations = all_citations + author_citations

        fill_article_rows(lastname, firstname, mem_id, adjusted_docs)
        print("done\n")

In [None]:
print(api_urls)
#  https://api.elsevier.com/content/abstract/scopus_id/

In [None]:
csv_dict = {
    "title": titles,
    "eid": eids,
    "publication year": publication_years,
    "authors": articles_authors,
    
    "last name": member_lastnames,
    "first name": member_firstnames,
    "member id": member_ids, 
}

citations_array = np.array(all_citations)
citations_array = citations_array.T

for key in csv_dict:
    print(f"{key}: {len(csv_dict[key])}")

for year in range(YEARS_INCLUDED):
    year_col = START_YEAR + year
    print(year_col)
    csv_dict[str(year_col)] = citations_array[year].tolist()

for key in csv_dict:
    print(f"{key}: {len(csv_dict[key])}")

df = pd.DataFrame(csv_dict)
df.to_csv("citations_per_year_psychiatry.csv")

In [None]:
documents = AuthorRetrieval(36653563400).get_documents()
docs_eids = AuthorRetrieval(36653563400).get_document_eids()
# for document in documents:
    

In [None]:
#print(docs_eids)
for document in documents:
    print(document.coverDate)

adjusted_docs = [document for document in documents if int(document.coverDate.split("-")[0]) >= PUBLICATION_CUTOFF_YEAR]
print(adjusted_docs)
scopus_id_list = [eid.split("-",2)[2] for eid in docs_eids]
print(scopus_id_list)

In [None]:
print()

In [None]:
co = CitationOverview(scopus_id_list[0:25], start=2018, end=2023)

In [None]:
article_yeardata = co.cc

In [None]:
co2 = CitationOverview(scopus_id_list[25:], start=2018, end=2023)
print(co2.cc)

In [None]:
all_citations = co.cc + co2.cc
#print(len(documents))
#print(documents[25:50])
#print(len(documents[25:50]))
print(all_citations[0])
print(all_citations[0][5][1])
print(YEARS_INCLUDED)
print([[citation[year][1] for year in range(YEARS_INCLUDED)] for citation in all_citations])

print(len([[[citation[year][1]] for year in range(YEARS_INCLUDED)] for citation in all_citations]))

In [None]:
citation_test = np.array([[citation[year][1] for year in range(YEARS_INCLUDED)] for citation in all_citations])
print(citation_test.T)
dict = {"a":0}
dict[f"{START_YEAR}"] = citation_test[0].tolist()
print(dict)

In [None]:
API_KEY = os.getenv("OFFICIAL_API_KEY")
API_COUNT = 25
header = {
    "X-ELS-APIKey": API_KEY,
}
api_url = "https://api.elsevier.com/content/search/scopus?"

def get_api_json(query: str, start=0):
    params = {
        "query":query,
    }
    try:
        response = requests.get(url=api_url, params=params, headers=header)
        response.raise_for_status
        data = json.loads(response.text)
    except requests.exceptions.HTTPError as e:
        print("HTTPS error: ", e)
    except requests.exceptions.RetryError as e:
        print("Max retries exceeded: ", e)
    return data

In [None]:
query = f"au-id(36653563400)"
data = get_api_json(query, start=0)
print(data)

In [None]:
query = f"au-id(36653563400)"
data = get_api_json(query, start=0)
print(data)

In [None]:
data_str = json.dumps(data["search-results"]["entry"][0], indent=4)
print(data_str)