In [1]:
# Dependencies
import requests
import json
import pandas as pd
import numpy as np
import re
import io
from config import api_key_inCites
from config import api_key_WOS
from collections import OrderedDict
from pandas.io.json import json_normalize  
import base64
import xmltodict

In [2]:
##WOS Resources:
#https://github.com/Jask-Code/Research-Analytics/blob/master/WOS_API_Automation.ipynb
#http://help.incites.clarivate.com/wosWebServicesExpanded/WebServicesExpandedOverviewGroup/Introduction/ServiceEndpointAddresses.html
#http://search.webofknowledge.com/esti/wokmws/ws/WokSearch
#http://help.incites.clarivate.com/wosWebServicesExpanded/appendix1Group/wosfieldNameTable.html
#https://github.com/langner/wok_search

##InCites Resources: 
#https://clarivate.com/webofsciencegroup/solutions/xml-and-apis/
#https://developer.clarivate.com/help
#https://api.clarivate.com/api/incites/DocumentLevelMetricsByUT/json
#https://github.com/Clarivate-SAR/incites-retrieve
#https://api.clarivate.com/swagger-ui/?url=https%3A%2F%2Fdeveloper.clarivate.com%2Fapis%2Fincites%2Fswagger%3FforUser%3D9c13dcee882598956b564212f82c2236a51e3f56

#Author-name Bold Request
#https://pandas.pydata.org/pandas-docs/stable/user_guide/style.html
#https://stackoverflow.com/questions/49961211/python-pandas-highlight-matching-text-and-row
#https://stackoverflow.com/questions/51938245/display-dataframe-values-in-bold-font-in-one-row-only
#https://stackoverflow.com/questions/20035518/insert-a-link-inside-a-pandas-table

In [3]:
## Basic Clarivate Analytics InCites API Request

url = "https://api.clarivate.com/api/incites/DocumentLevelMetricsByUT/json"

headers = {
     'X-ApiKey': api_key_inCites,
     'Accept':'application/json'
}



parameters = {
     "UT": "000492801600048"       
   
}

response = requests.get(url, headers=headers, params=parameters)


print(response.url)
print(response.status_code)

response_json = response.json()
print(json.dumps(response_json, indent=4, sort_keys=True))

https://api.clarivate.com/api/incites/DocumentLevelMetricsByUT/json?UT=000492801600048
429
{
    "message": "API rate limit exceeded"
}


In [None]:
#Upload a .csv of multiple UTs 

file_path = "WOS_UT_LIST.csv"

def load_csv(file_path):
    # File to Load
    UT_list = file_path

    # Read the CSV file and store into Pandas DataFrame with the column Scopus Author ID as a string
    UT_list_df = pd.read_csv(UT_list, encoding="utf-8", dtype ={'UT': str})

    #Change the column names to lower case with underscore for spaces
    UT_list_df.columns =  UT_list_df.columns.str.strip().str.lower().str.replace(" ", "_").str.replace("(","").str.replace(")","")
#     UT_list_df.head()
    return UT_list_df

UT_list_df = load_csv(file_path)
load_csv(file_path)

In [None]:
#The function "prep_UT_list" takes in the UT_list_df dataframe and formats the "id" column 
#as a string, then uses the column to create a list, removes any of the "nan" values lines that don't 
#have an ID, and finally returns a list called "cleaned_UT_list"

def prep_UT_list(UT_list_df):
    
    #Change the data type in the dataframe column called "id" from int64 to a string. 
    UT_list_df['id'] = UT_list_df['id'].astype(str)

    #Save the column called id to a list called Interim_UT_list
    Interim_UT_list = UT_list_df['id'].tolist()
    #print(Interim_UT_list)
    
    #Remove the WOS: characters from each item in the list
    Interim_UT_list_2 = [i.replace("WOS:", "") for i in Interim_UT_list]
    
    #Clean the Interim_UT_list to remove nan
    cleaned_UT_list = [x for x in Interim_UT_list_2 if str(x) != 'nan']
    #print( cleaned_UT_list)
    
    
    
    return  cleaned_UT_list

cleaned_UT_list = prep_UT_list(UT_list_df)
prep_UT_list(UT_list_df)

In [None]:
#The function "get_incites_metrics" takes in the "cleaned_UT_list" and creates a necessary URL 
#for querying the Incites API. The InCites API key is passed in through the "headers" (see above Dependencies 
#"from config import api_key") and the config file is also referenced in the git ignore so it won't be exposed 
#on Github. The API is called and returns a response for each UT in the list. Each response is 
#saved in a "single_UT_metric_dict". Each of the "single_UT_metric_dict" are then appended to the 
#"multiple_UT_metric_list". The function returns a list of dictionaries called 
#the "multiple_UT_metric_list". 

multiple_UT_metric_list = []
single_UT_metric_dict = {}
BATCH_SIZE= 100

def get_incites_metrics(cleaned_UT_list):
    
    for item in cleaned_UT_list:
        url = "https://api.clarivate.com/api/incites/DocumentLevelMetricsByUT/json"
        fields = ["ACCESSION_NUMBER","DOCUMENT_TYPE","TIMES_CITED","JOURNAL_EXPECTED_CITATIONS","JNCI","IMPACT_FACTOR",
              "HARMEAN_CAT_EXP_CITATION", "AVG_CNCI","ESI_HIGHLY_CITED_PAPER","ESI_HOT_PAPER","IS_INTERNATIONAL_COLLAB",
              "JOURNAL_ACT_EXP_CITATIONS", "JOURNAL_EXPECTED_CITATIONS","IS_INSTITUTION_COLLAB","IS_INDUSTRY_COLLAB","OA_FLAG","OA_TYPE"]
             
        headers = {
             'X-ApiKey': api_key_inCites,
             'Accept':'application/json'
            }



        parameters = {
            'UT' : item,
            'esci': n,
            'batch': BATCH_SIZE,
            'field': ",".join(fields)
                  

        }
        
        
        try:
            print(f"Retrieving Metrics for UT:{item}.")
            #Make the API request 
            single_UT_response = requests.get(url, headers=headers, params=parameters)
            #print(ssingle_UT_response.url)
            #print(single_UT_response.status_code)


            #Append each single_UT_metric_dict response to multiple_UT_metric_list to create a list of dictionaries
            single_UT_metric_dict = single_UT_response.json()
            #print(type(single_UT_metric_dict)) 
            #print(single_UT_metric_dict)
            multiple_UT_metric_list.append(single_UT_metric_dict.copy())
    
        except (KeyError, IndexError):
            print("Missing field/result... skipping.")
        
        print("------------")
    
    
    return multiple_UT_metric_list
       
get_incites_metrics(cleaned_UT_list)
print(multiple_UT_metric_list)

## Basic Clarivate Analytics InCites API Request with Many IDs


# Set ESCI to True to include ESCI in results
# Number of UTs to send to InCites at once - 100 is limit set by API.


# url = "https://api.clarivate.com/api/incites/DocumentLevelMetricsByUT/json"

# headers = {
#      'X-ApiKey': api_key_inCites,
#      'Accept':'application/json'
# }



# parameters = {
#      'ESCI': False,
#      'count': BATCH_SIZE,
#      "UT": "000492801600048"       
   
# }

# response = requests.get(url, headers=headers, params=parameters)


# print(response.url)
# print(response.status_code)

# response_json = response.json()
# print(json.dumps(response_json, indent=4, sort_keys=True))



In [None]:
##Basic Clarivate Analytics Web of SCience (WOS) API Request

BATCH_SIZE = 50
query = "UT=000492801600048"
databasename = 'WOS'

headers = {
     'X-ApiKey': api_key_WOS,
     'Accept':'application/json'
     
}


parameters = {
   "databaseId": databasename,
    "usrQuery": query,
    "count": BATCH_SIZE,
    'firstRecord' : 1
    
}


    # wos returns the most recent paper on top so it's alright to just grab 1 record
url = 'https://api.clarivate.com/api/wos/'
response = requests.get(url, headers=headers, params=parameters)
  
#addresses = response.json()['Data']['Records']['records']['REC'][0]
    
print(response.url)
print(response.status_code)
response_json = response.json()['Data']['Records']['records']['REC'][0]
#['static_data']['fullrecord_metadata']['addresses']['address_name']
#['dynamic_data']['cluster_related']
print(json.dumps(response_json, indent=4, sort_keys=True))



In [None]:
## Unique Identifier Matching Clarivate Analytics Web of SCience (WOS) API Request
# Start with a list of publications that have DOIs or PMIDs, and need to find the WoS UT to use for InCites. 

BATCH_SIZE = 50
query = "UT=000492801600048"
databasename = 'WOS'

headers = {
     'X-ApiKey': api_key_WOS,
     'Accept':'application/json'
     
}


parameters = {
   "databaseId": databasename,
    "usrQuery": query,
    "count": BATCH_SIZE,
    'firstRecord' : 1
    
}


    # wos returns the most recent paper on top so it's alright to just grab 1 record
url = 'https://api.clarivate.com/api/wos/'
response = requests.get(url, headers=headers, params=parameters)
  
#addresses = response.json()['Data']['Records']['records']['REC'][0]
    
print(response.url)
print(response.status_code)
response_json = response.json()
print(json.dumps(response_json, indent=4, sort_keys=True))