In [42]:
# Dependencies
import requests
import json
import pandas as pd
import numpy as np
import re
import io
from config import api_key
from collections import OrderedDict
from pandas.io.json import json_normalize  

## Get Scopus Author Information using Multiple Scopus Author IDs

In [2]:
# File to Load
radiation_oncology = "radiation_oncology.csv"

# Read the CSV file and store into Pandas DataFrame with the column Scopus Author ID as a string
radiation_oncology_df = pd.read_csv(radiation_oncology, encoding="utf-8", dtype ={'scopus_author_id': str})

#Change the column names to lower case with underscore for spaces
radiation_oncology_df.columns =  radiation_oncology_df.columns.str.strip().str.lower().str.replace(" ", "_").str.replace("(","").str.replace(")","")
radiation_oncology_df.head()

Unnamed: 0,last_name,first_name,mi,department,netid,position,division,career_track,in_elements?,scopus_author_id,scopus_search,unnamed:_11
0,Donnelly,Eric,D,Radiation Oncology,edo615,Associate Professor,,Clinician-Educator,Yes,21233377200,AU-ID(21233377200),AU-ID(21233377200) OR
1,Gentile,Michelle,S,Radiation Oncology,msg842,Assistant Professor,,Clinician-Educator,Yes,56018970700,AU-ID(56018970700),AU-ID(56018970700) OR
2,Gius,David,R,Radiation Oncology,drg815,Professor,,Investigator,Yes,7003610066,AU-ID(7003610066),AU-ID(7003610066) OR
3,Hayes,John,P,Radiation Oncology,jph125,Assistant Professor,,Clinician-Educator,Yes,55313350000,AU-ID(55313350000),AU-ID(55313350000) OR
4,Kalapurakal,John,A,Radiation Oncology,jak801,Professor,,Clinician-Educator,Yes,7003993738,AU-ID(7003993738),AU-ID(7003993738) OR


In [3]:
#List the column names in the dataframe
radiation_oncology_df.columns

Index(['last_name', 'first_name', 'mi', 'department', 'netid', 'position',
       'division', 'career_track', 'in_elements?', 'scopus_author_id',
       'scopus_search', 'unnamed:_11'],
      dtype='object')

In [4]:
#Check the data type in the dataframe columns called scopus_author_id and scopus_search
radiation_oncology_df.scopus_author_id.dtype
#radiation_oncology_df.scopus_search.dtype

dtype('int64')

In [5]:
#Change the data type in the dataframe column called "scopus_author_id" from int64 to a string. 
radiation_oncology_df['scopus_author_id'] = radiation_oncology_df['scopus_author_id'].astype(str)

In [6]:
#Check the data type in the dataframe columns called scopus_author_id and scopus_search
radiation_oncology_df.scopus_author_id.dtype
#radiation_oncology_df.scopus_search.dtype

dtype('O')

In [7]:
#Save the column called scopus_author_id to a list called Author_ID_List
author_ID_List = radiation_oncology_df['scopus_author_id'].tolist()
print(author_ID_List)

['21233377200', '56018970700', '7003610066', '55313350000', '7003993738', '24448583300', '7102661470', '57207807061', '56443683800', '6701754514', '36143455500', '24336584500', '7005165328', '453']


In [8]:
#Clean the Author_ID_List to remove nan
cleaned_Author_ID_List = [x for x in author_ID_List if str(x) != 'nan']
print(cleaned_Author_ID_List)

['21233377200', '56018970700', '7003610066', '55313350000', '7003993738', '24448583300', '7102661470', '57207807061', '56443683800', '6701754514', '36143455500', '24336584500', '7005165328', '453']


In [9]:
#Add the necessary syntax to the cleaned_Author_ID_List
scopus_Mulitple_AuthorID_Query = []
for x in cleaned_Author_ID_List:
    authorID_string = "".join(("AU-ID(", x,")"))
    print(authorID_string)
    scopus_Mulitple_AuthorID_Query.append(authorID_string)
    
print(scopus_Mulitple_AuthorID_Query)




AU-ID(21233377200)
AU-ID(56018970700)
AU-ID(7003610066)
AU-ID(55313350000)
AU-ID(7003993738)
AU-ID(24448583300)
AU-ID(7102661470)
AU-ID(57207807061)
AU-ID(56443683800)
AU-ID(6701754514)
AU-ID(36143455500)
AU-ID(24336584500)
AU-ID(7005165328)
AU-ID(453)
['AU-ID(21233377200)', 'AU-ID(56018970700)', 'AU-ID(7003610066)', 'AU-ID(55313350000)', 'AU-ID(7003993738)', 'AU-ID(24448583300)', 'AU-ID(7102661470)', 'AU-ID(57207807061)', 'AU-ID(56443683800)', 'AU-ID(6701754514)', 'AU-ID(36143455500)', 'AU-ID(24336584500)', 'AU-ID(7005165328)', 'AU-ID(453)']


In [10]:
# scopus_Search_List = radiation_oncology_df['scopus_search'].tolist()
# print(scopus_Search_List)

In [11]:
# cleaned_Scopus_Search_List = [x for x in scopus_Search_List if str(x) != 'nan']
# print(cleaned_Scopus_Search_List)

In [12]:
#https://dev.elsevier.com/guides/ScopusSearchViews.htm
#https://stackoverflow.com/questions/53558837/python-loop-to-pull-api-data-for-iterating-urls
#https://stackoverflow.com/questions/36410800/python-3-parse-json-from-multiple-api-requests-into-a-list-and-output-to-a-fil
#https://www.pluralsight.com/guides/web-scraping-with-request-python

multiple_author_list = []
multiple_author_dict = {}
# outfilepath = "multiple_author_json.json"
# keys = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"]

def get_scopus_articles(scopus_Mulitple_AuthorID_Query):
    
    for authorid in scopus_Mulitple_AuthorID_Query:
        url = "http://api.elsevier.com/content/search/scopus?"
        fieldList = ["dc:identifier", "eid", "dc:title","prism:aggregationType", "subtype", "citedby-count",
                     "prism:publicationName","prism:volume","prism:issueIdentifier", "prism:pageRange", 
                     "prism:coverDate", "prism:doi","pubmed-id", "authid", "authname"]
                    
        headers = {
             "X-ELS-APIKey": api_key,
             'Accept':'application/json'
        }
        parameters = {
            "query": authorid,
            "field": ",".join(fieldList),
            "date": "2002-2003"
        }
        
        #Make the API request 
        single_author_response = requests.get(url, headers=headers, params=parameters)
        #print(single_author_response.url)
        #print(single_author_response.status_code)
        
              
        #Append each single_author_dict response to multiple_author_list to create a list of dictionaries
        single_author_dict = single_author_response.json()
        #print(type(single_author_dict)) 
        #print(single_author_dict)
        multiple_author_list.append(single_author_dict.copy())
    
    return multiple_author_list
       
get_scopus_articles(scopus_Mulitple_AuthorID_Query)



[{'search-results': {'opensearch:totalResults': '0',
   'opensearch:startIndex': '0',
   'opensearch:itemsPerPage': '0',
   'opensearch:Query': {'@role': 'request',
    '@searchTerms': 'AU-ID(21233377200)',
    '@startPage': '0'},
   'link': [{'@_fa': 'true',
     '@ref': 'self',
     '@href': 'https://api.elsevier.com/content/search/scopus?start=0&count=25&query=AU-ID%2821233377200%29&field=dc%3Aidentifier%2Ceid%2Cdc%3Atitle%2Cprism%3AaggregationType%2Csubtype%2Ccitedby-count%2Cprism%3ApublicationName%2Cprism%3Avolume%2Cprism%3AissueIdentifier%2Cprism%3ApageRange%2Cprism%3AcoverDate%2Cprism%3Adoi%2Cpubmed-id%2Cauthid%2Cauthname&date=2002-2003',
     '@type': 'application/json'}],
   'entry': [{'@_fa': 'true', 'error': 'Result set was empty'}]}},
 {'search-results': {'opensearch:totalResults': '1',
   'opensearch:startIndex': '0',
   'opensearch:itemsPerPage': '1',
   'opensearch:Query': {'@role': 'request',
    '@searchTerms': 'AU-ID(56018970700)',
    '@startPage': '0'},
   'link': [

In [13]:
#https://stackoverflow.com/questions/48177934/flatten-or-unpack-list-of-nested-dicts-in-dataframe
#https://stackoverflow.com/questions/50161070/convert-list-of-dicts-of-dict-into-dataframe
#https://stackoverflow.com/questions/43984865/python-having-trouble-returning-a-pandas-data-frame-from-a-user-defined-functio
#https://stackoverflow.com/questions/37668291/flatten-double-nested-json

def make_scopus_articles_df(multiple_author_list):
        #final_list = json_normalize(multiple_author_list, meta=["search-results"], record_path=["search-results", "entry"])
    scopus_articles_df = pd.DataFrame.from_dict(json_normalize(multiple_author_list, meta=["search-results"], record_path=["search-results", "entry"]),orient="columns")
    
    return scopus_articles_df

scopus_articles_df = make_scopus_articles_df(multiple_author_list)
make_scopus_articles_df(multiple_author_list)

Unnamed: 0,@_fa,author,citedby-count,dc:identifier,dc:title,eid,error,prism:aggregationType,prism:coverDate,prism:doi,prism:issueIdentifier,prism:pageRange,prism:publicationName,prism:url,prism:volume,pubmed-id,subtype,subtypeDescription,search-results
0,True,,,,,,Result set was empty,,,,,,,,,,,,"{'opensearch:totalResults': '0', 'opensearch:s..."
1,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",6.0,SCOPUS_ID:0042128695,Targeting colon cancer cells with genistein-17...,2-s2.0-0042128695,,Journal,2003-01-01,,5,955-959,International journal of oncology,https://api.elsevier.com/content/abstract/scop...,22.0,12684659.0,ar,Article,"{'opensearch:totalResults': '1', 'opensearch:s..."
2,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",157.0,SCOPUS_ID:9144261127,Geldanamycin and 17-Allylamino-17-demethoxygel...,2-s2.0-9144261127,,Journal,2003-12-15,,24,8984-8995,Cancer Research,https://api.elsevier.com/content/abstract/scop...,63.0,14695217.0,ar,Article,"{'opensearch:totalResults': '15', 'opensearch:..."
3,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",6.0,SCOPUS_ID:0347949519,Inhibition of cyclooxygenase-2 with NS-398 and...,2-s2.0-0347949519,,Journal,2003-11-01,10.1080/09553000310001621400,11,879-888,International Journal of Radiation Biology,https://api.elsevier.com/content/abstract/scop...,79.0,14698956.0,ar,Article,"{'opensearch:totalResults': '15', 'opensearch:..."
4,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",26.0,SCOPUS_ID:0042383099,Those in gene therapy should pay closer attent...,2-s2.0-0042383099,,Journal,2003-10-01,10.1016/S0360-3016(03)00421-8,2,597-599,International Journal of Radiation Oncology Bi...,https://api.elsevier.com/content/abstract/scop...,57.0,12957278.0,le,Letter,"{'opensearch:totalResults': '15', 'opensearch:..."
5,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",113.0,SCOPUS_ID:0038407274,2-Deoxy-D-glucose-induced cytotoxicity and rad...,2-s2.0-0038407274,,Journal,2003-06-15,,12,3413-3417,Cancer Research,https://api.elsevier.com/content/abstract/scop...,63.0,12810678.0,ar,Article,"{'opensearch:totalResults': '15', 'opensearch:..."
6,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",2.0,SCOPUS_ID:0037215656,The holy grail of radiation oncology: Lessons ...,2-s2.0-0037215656,,Journal,2003-01-01,10.1016/S0360-3016(02)03861-0,1,3-4,International Journal of Radiation Oncology Bi...,https://api.elsevier.com/content/abstract/scop...,55.0,12504029.0,ed,Editorial,"{'opensearch:totalResults': '15', 'opensearch:..."
7,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",0.0,SCOPUS_ID:0037262512,Clinical Trials Referral Resource. Clinical tr...,2-s2.0-0037262512,,Journal,2003-01-01,,1,84-89,"Oncology (Williston Park, N.Y.)",https://api.elsevier.com/content/abstract/scop...,17.0,12599933.0,ar,Article,"{'opensearch:totalResults': '15', 'opensearch:..."
8,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",27.0,SCOPUS_ID:0037216745,Inhibition of stress-inducible kinase pathways...,2-s2.0-0037216745,,Journal,2003-01-01,10.1128/MCB.23.1.322-334.2003,1,322-334,Molecular and Cellular Biology,https://api.elsevier.com/content/abstract/scop...,23.0,12482984.0,ar,Article,"{'opensearch:totalResults': '15', 'opensearch:..."
9,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",2.0,SCOPUS_ID:0037032514,Treatment of nasopharyngeal cancer: Raising th...,2-s2.0-0037032514,,Journal,2002-11-06,,21,1594-1595,Journal of the National Cancer Institute,https://api.elsevier.com/content/abstract/scop...,94.0,12419779.0,ed,Editorial,"{'opensearch:totalResults': '15', 'opensearch:..."


In [28]:
#https://stackoverflow.com/questions/29325458/dictionary-column-in-pandas-dataframe/29330853
def flatten_search_results(scopus_articles_df):
    remove_searchresults_nest = pd.concat([scopus_articles_df.drop(['search-results'], axis=1), scopus_articles_df['search-results'].apply(pd.Series)], axis=1, join="outer")
    remove_opensearchQuery_nest = pd.concat([remove_searchresults_nest.drop(['opensearch:Query'], axis=1), remove_searchresults_nest['opensearch:Query'].apply(pd.Series)], axis=1, join="outer")
    remove_opensearchQuery_nest = pd.concat([remove_searchresults_nest.drop(['opensearch:Query'], axis=1), remove_searchresults_nest['opensearch:Query'].apply(pd.Series)], axis=1, join="outer")
    return remove_opensearchQuery_nest

scopus_flatten_search_results_df = flatten_search_results(scopus_articles_df)
flatten_search_results(scopus_articles_df)


Unnamed: 0,@_fa,author,citedby-count,dc:identifier,dc:title,eid,error,prism:aggregationType,prism:coverDate,prism:doi,...,subtype,subtypeDescription,opensearch:totalResults,opensearch:startIndex,opensearch:itemsPerPage,link,entry,@role,@searchTerms,@startPage
0,True,,,,,,Result set was empty,,,,...,,,0,0,0,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'error': 'Result set was emp...",request,AU-ID(21233377200),0
1,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",6.0,SCOPUS_ID:0042128695,Targeting colon cancer cells with genistein-17...,2-s2.0-0042128695,,Journal,2003-01-01,,...,ar,Article,1,0,1,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(56018970700),0
2,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",157.0,SCOPUS_ID:9144261127,Geldanamycin and 17-Allylamino-17-demethoxygel...,2-s2.0-9144261127,,Journal,2003-12-15,,...,ar,Article,15,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0
3,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",6.0,SCOPUS_ID:0347949519,Inhibition of cyclooxygenase-2 with NS-398 and...,2-s2.0-0347949519,,Journal,2003-11-01,10.1080/09553000310001621400,...,ar,Article,15,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0
4,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",26.0,SCOPUS_ID:0042383099,Those in gene therapy should pay closer attent...,2-s2.0-0042383099,,Journal,2003-10-01,10.1016/S0360-3016(03)00421-8,...,le,Letter,15,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0
5,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",113.0,SCOPUS_ID:0038407274,2-Deoxy-D-glucose-induced cytotoxicity and rad...,2-s2.0-0038407274,,Journal,2003-06-15,,...,ar,Article,15,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0
6,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",2.0,SCOPUS_ID:0037215656,The holy grail of radiation oncology: Lessons ...,2-s2.0-0037215656,,Journal,2003-01-01,10.1016/S0360-3016(02)03861-0,...,ed,Editorial,15,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0
7,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",0.0,SCOPUS_ID:0037262512,Clinical Trials Referral Resource. Clinical tr...,2-s2.0-0037262512,,Journal,2003-01-01,,...,ar,Article,15,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0
8,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",27.0,SCOPUS_ID:0037216745,Inhibition of stress-inducible kinase pathways...,2-s2.0-0037216745,,Journal,2003-01-01,10.1128/MCB.23.1.322-334.2003,...,ar,Article,15,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0
9,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",2.0,SCOPUS_ID:0037032514,Treatment of nasopharyngeal cancer: Raising th...,2-s2.0-0037032514,,Journal,2002-11-06,,...,ed,Editorial,15,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0


In [15]:
#remove_opensearchQuery_nest.to_csv (r'C:\Users\keg827\Documents\10. WorkStuff_KEG\scopusAPIrequests\export_dataframe.csv', index = None, header=True)

In [16]:
#https://stackoverflow.com/questions/48637219/iterate-over-a-dictionary-of-list-of-dictionaries-in-pandas-dataframe
# authorids = []
# authornames = []


# for item in remove_opensearchQuery_nest["author"][1]:
#     #print(item.keys())
#     #print(item.values())
#     #print(item["authid"])
#     authorids.append(item["authid"])
#     authornames.append(item["authname"])
# print(authornames)
# print(authorids)  


In [17]:
def fix_empty_author(scopus_flatten_search_results_df):

    for row in scopus_flatten_search_results_df.loc[scopus_flatten_search_results_df.author.isnull(), 'author'].index:
        scopus_flatten_search_results_df.at[row, 'author'] = []
    
    return scopus_flatten_search_results_df

scopus_fix_empty_author_df = fix_empty_author(scopus_flatten_search_results_df)
fix_empty_author(scopus_flatten_search_results_df)

Unnamed: 0,@_fa,author,citedby-count,dc:identifier,dc:title,eid,error,prism:aggregationType,prism:coverDate,prism:doi,...,subtype,subtypeDescription,opensearch:totalResults,opensearch:startIndex,opensearch:itemsPerPage,link,entry,@role,@searchTerms,@startPage
0,True,[],,,,,Result set was empty,,,,...,,,0,0,0,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'error': 'Result set was emp...",request,AU-ID(21233377200),0
1,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",6.0,SCOPUS_ID:0042128695,Targeting colon cancer cells with genistein-17...,2-s2.0-0042128695,,Journal,2003-01-01,,...,ar,Article,1,0,1,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(56018970700),0
2,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",157.0,SCOPUS_ID:9144261127,Geldanamycin and 17-Allylamino-17-demethoxygel...,2-s2.0-9144261127,,Journal,2003-12-15,,...,ar,Article,15,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0
3,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",6.0,SCOPUS_ID:0347949519,Inhibition of cyclooxygenase-2 with NS-398 and...,2-s2.0-0347949519,,Journal,2003-11-01,10.1080/09553000310001621400,...,ar,Article,15,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0
4,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",26.0,SCOPUS_ID:0042383099,Those in gene therapy should pay closer attent...,2-s2.0-0042383099,,Journal,2003-10-01,10.1016/S0360-3016(03)00421-8,...,le,Letter,15,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0
5,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",113.0,SCOPUS_ID:0038407274,2-Deoxy-D-glucose-induced cytotoxicity and rad...,2-s2.0-0038407274,,Journal,2003-06-15,,...,ar,Article,15,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0
6,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",2.0,SCOPUS_ID:0037215656,The holy grail of radiation oncology: Lessons ...,2-s2.0-0037215656,,Journal,2003-01-01,10.1016/S0360-3016(02)03861-0,...,ed,Editorial,15,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0
7,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",0.0,SCOPUS_ID:0037262512,Clinical Trials Referral Resource. Clinical tr...,2-s2.0-0037262512,,Journal,2003-01-01,,...,ar,Article,15,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0
8,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",27.0,SCOPUS_ID:0037216745,Inhibition of stress-inducible kinase pathways...,2-s2.0-0037216745,,Journal,2003-01-01,10.1128/MCB.23.1.322-334.2003,...,ar,Article,15,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0
9,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",2.0,SCOPUS_ID:0037032514,Treatment of nasopharyngeal cancer: Raising th...,2-s2.0-0037032514,,Journal,2002-11-06,,...,ed,Editorial,15,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0


In [18]:
# new_df.reindex(columns=[*new_df.columns.tolist(), 'author_id', 'author_name'])


In [19]:
# for row in new_df.loc[new_df.author_id.isnull(), 'author_id'].index:
#     new_df.at[row, 'author_id'] = []
    
# new_df

#scopus_flatten_search_results_df.head()
#scopus_flatten_search_results_df.index

In [20]:
authorid_dict = OrderedDict()
authorname_dict= OrderedDict() 



def flatten_author(scopus_fix_empty_author_df):
    authorids = []
    #authorid_dict = OrderedDict()
    authornames = []
    #authorname_dict= OrderedDict() 
    for i in range(len(scopus_fix_empty_author_df)):
        #print(scopus_fix_empty_author_df.loc[i, "author"]) 
        column = scopus_fix_empty_author_df.loc[i, "author"]
        #print(i)
        #print(column)

        for item in column:
            #print(item)
            #print(item["authid"])
            #print(column)
            authorids.append(item["authid"])
            authornames.append(item["authname"])
            #print(authorids)
         
        #print(i)
        authorid_dict[i] = authorids
        authorname_dict[i]= authornames
        authorids=[]
        authornames=[]
        
    return authorid_dict, authorname_dict

#print(authorid_dict)
#print(authorname_dict)
flatten_author(scopus_fix_empty_author_df)
    


(OrderedDict([(0, []),
              (1,
               ['56018970700',
                '6701662630',
                '7404024068',
                '6701854664',
                '7102541014',
                '7005140598',
                '7102955789',
                '6701449622']),
              (2,
               ['7005370416',
                '57207801922',
                '7102211768',
                '6603982052',
                '7003524707',
                '8610676300',
                '35830961200',
                '7003569813',
                '7202169291',
                '7005392044',
                '55615581300',
                '35400609400',
                '7003610066']),
              (3,
               ['7005370416',
                '57207801922',
                '57193119887',
                '7004715706',
                '6603982052',
                '7005433388',
                '7003610066']),
              (4, ['36046300600', '35408019300', '6506436385', '700361

In [21]:
# for key, value in authorname_dict.items():
#     print(key, value)

In [22]:
def add_author_info(scopus_fix_empty_author_df):
    scopus_fix_empty_author_df["author_names"] = pd.Series(authorname_dict)
    scopus_fix_empty_author_df["author_ids"] = pd.Series(authorid_dict)
    return scopus_fix_empty_author_df

scopus_add_author_info_df = add_author_info(scopus_fix_empty_author_df)
add_author_info(scopus_fix_empty_author_df)

Unnamed: 0,@_fa,author,citedby-count,dc:identifier,dc:title,eid,error,prism:aggregationType,prism:coverDate,prism:doi,...,opensearch:totalResults,opensearch:startIndex,opensearch:itemsPerPage,link,entry,@role,@searchTerms,@startPage,author_names,author_ids
0,True,[],,,,,Result set was empty,,,,...,0,0,0,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'error': 'Result set was emp...",request,AU-ID(21233377200),0,[],[]
1,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",6.0,SCOPUS_ID:0042128695,Targeting colon cancer cells with genistein-17...,2-s2.0-0042128695,,Journal,2003-01-01,,...,1,0,1,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(56018970700),0,"[Gentile M., Vasu C., Green A., Murillo G., Da...","[56018970700, 6701662630, 7404024068, 67018546..."
2,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",157.0,SCOPUS_ID:9144261127,Geldanamycin and 17-Allylamino-17-demethoxygel...,2-s2.0-9144261127,,Journal,2003-12-15,,...,15,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0,"[Bisht K., Bradbury C., Mattson D., Kaushal A....","[7005370416, 57207801922, 7102211768, 66039820..."
3,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",6.0,SCOPUS_ID:0347949519,Inhibition of cyclooxygenase-2 with NS-398 and...,2-s2.0-0347949519,,Journal,2003-11-01,10.1080/09553000310001621400,...,15,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0,"[Bisht K., Bradbury C., Zoberi I., Curry H., K...","[7005370416, 57207801922, 57193119887, 7004715..."
4,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",26.0,SCOPUS_ID:0042383099,Those in gene therapy should pay closer attent...,2-s2.0-0042383099,,Journal,2003-10-01,10.1016/S0360-3016(03)00421-8,...,15,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0,"[Dewhirst M., Sneed P., Karimpour S., Gius D.]","[36046300600, 35408019300, 6506436385, 7003610..."
5,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",113.0,SCOPUS_ID:0038407274,2-Deoxy-D-glucose-induced cytotoxicity and rad...,2-s2.0-0038407274,,Journal,2003-06-15,,...,15,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0,"[Lin X., Zhang F., Bradbury C., Kaushal A., Li...","[7404513477, 57199242888, 57207801922, 6603982..."
6,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",2.0,SCOPUS_ID:0037215656,The holy grail of radiation oncology: Lessons ...,2-s2.0-0037215656,,Journal,2003-01-01,10.1016/S0360-3016(02)03861-0,...,15,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0,"[Karimpour S., Gius D.]","[6506436385, 7003610066]"
7,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",0.0,SCOPUS_ID:0037262512,Clinical Trials Referral Resource. Clinical tr...,2-s2.0-0037262512,,Journal,2003-01-01,,...,15,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0,"[Trimble E., Schoenfeldt M., Streicher H., Giu...","[7005267919, 6603616084, 7005313461, 700361006..."
8,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",27.0,SCOPUS_ID:0037216745,Inhibition of stress-inducible kinase pathways...,2-s2.0-0037216745,,Journal,2003-01-01,10.1128/MCB.23.1.322-334.2003,...,15,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0,"[Ohiro Y., Usheva A., Kobayashi S., Duffy S., ...","[8660958800, 56259611200, 57199809113, 5719656..."
9,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",2.0,SCOPUS_ID:0037032514,Treatment of nasopharyngeal cancer: Raising th...,2-s2.0-0037032514,,Journal,2002-11-06,,...,15,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0,"[Gius D., Coleman C.]","[7003610066, 7201507251]"


In [23]:
#https://stackoverflow.com/questions/32913960/python-regex-remove-a-pattern-at-the-end-of-string
#https://stackoverflow.com/questions/16842001/copy-text-between-parentheses-in-pandas-dataframe-column-into-another-column
#https://stackoverflow.com/questions/37593550/pandas-replacing-elements-not-working

def fix_search_term(scopus_add_author_info_df):
    scopus_add_author_info_df['scopus_author_id_api'] = scopus_add_author_info_df['@searchTerms']
    scopus_fix_empty_author_df["scopus_author_id_api"].replace(r'[^(]*\(|\)[^)]*', '', inplace=True,regex=True)
    return scopus_fix_empty_author_df

scopus_fix_search_term_df = fix_search_term(scopus_add_author_info_df)
fix_search_term(scopus_add_author_info_df)

Unnamed: 0,@_fa,author,citedby-count,dc:identifier,dc:title,eid,error,prism:aggregationType,prism:coverDate,prism:doi,...,opensearch:startIndex,opensearch:itemsPerPage,link,entry,@role,@searchTerms,@startPage,author_names,author_ids,scopus_author_id_api
0,True,[],,,,,Result set was empty,,,,...,0,0,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'error': 'Result set was emp...",request,AU-ID(21233377200),0,[],[],21233377200
1,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",6.0,SCOPUS_ID:0042128695,Targeting colon cancer cells with genistein-17...,2-s2.0-0042128695,,Journal,2003-01-01,,...,0,1,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(56018970700),0,"[Gentile M., Vasu C., Green A., Murillo G., Da...","[56018970700, 6701662630, 7404024068, 67018546...",56018970700
2,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",157.0,SCOPUS_ID:9144261127,Geldanamycin and 17-Allylamino-17-demethoxygel...,2-s2.0-9144261127,,Journal,2003-12-15,,...,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0,"[Bisht K., Bradbury C., Mattson D., Kaushal A....","[7005370416, 57207801922, 7102211768, 66039820...",7003610066
3,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",6.0,SCOPUS_ID:0347949519,Inhibition of cyclooxygenase-2 with NS-398 and...,2-s2.0-0347949519,,Journal,2003-11-01,10.1080/09553000310001621400,...,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0,"[Bisht K., Bradbury C., Zoberi I., Curry H., K...","[7005370416, 57207801922, 57193119887, 7004715...",7003610066
4,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",26.0,SCOPUS_ID:0042383099,Those in gene therapy should pay closer attent...,2-s2.0-0042383099,,Journal,2003-10-01,10.1016/S0360-3016(03)00421-8,...,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0,"[Dewhirst M., Sneed P., Karimpour S., Gius D.]","[36046300600, 35408019300, 6506436385, 7003610...",7003610066
5,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",113.0,SCOPUS_ID:0038407274,2-Deoxy-D-glucose-induced cytotoxicity and rad...,2-s2.0-0038407274,,Journal,2003-06-15,,...,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0,"[Lin X., Zhang F., Bradbury C., Kaushal A., Li...","[7404513477, 57199242888, 57207801922, 6603982...",7003610066
6,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",2.0,SCOPUS_ID:0037215656,The holy grail of radiation oncology: Lessons ...,2-s2.0-0037215656,,Journal,2003-01-01,10.1016/S0360-3016(02)03861-0,...,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0,"[Karimpour S., Gius D.]","[6506436385, 7003610066]",7003610066
7,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",0.0,SCOPUS_ID:0037262512,Clinical Trials Referral Resource. Clinical tr...,2-s2.0-0037262512,,Journal,2003-01-01,,...,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0,"[Trimble E., Schoenfeldt M., Streicher H., Giu...","[7005267919, 6603616084, 7005313461, 700361006...",7003610066
8,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",27.0,SCOPUS_ID:0037216745,Inhibition of stress-inducible kinase pathways...,2-s2.0-0037216745,,Journal,2003-01-01,10.1128/MCB.23.1.322-334.2003,...,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0,"[Ohiro Y., Usheva A., Kobayashi S., Duffy S., ...","[8660958800, 56259611200, 57199809113, 5719656...",7003610066
9,True,"[{'@_fa': 'true', 'author-url': 'https://api.e...",2.0,SCOPUS_ID:0037032514,Treatment of nasopharyngeal cancer: Raising th...,2-s2.0-0037032514,,Journal,2002-11-06,,...,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0,"[Gius D., Coleman C.]","[7003610066, 7201507251]",7003610066


In [27]:
#https://stackoverflow.com/questions/20375561/joining-pandas-dataframes-by-column-names
merged_df = pd.merge(radiation_oncology_df, scopus_fix_search_term_df, how='inner', left_on='scopus_author_id', right_on='scopus_author_id_api',  )    
merged_df

Unnamed: 0,last_name,first_name,mi,department,netid,position,division,career_track,in_elements?,scopus_author_id,...,opensearch:startIndex,opensearch:itemsPerPage,link,entry,@role,@searchTerms,@startPage,author_names,author_ids,scopus_author_id_api
0,Donnelly,Eric,D,Radiation Oncology,edo615,Associate Professor,,Clinician-Educator,Yes,21233377200,...,0,0,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'error': 'Result set was emp...",request,AU-ID(21233377200),0,[],[],21233377200
1,Gentile,Michelle,S,Radiation Oncology,msg842,Assistant Professor,,Clinician-Educator,Yes,56018970700,...,0,1,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(56018970700),0,"[Gentile M., Vasu C., Green A., Murillo G., Da...","[56018970700, 6701662630, 7404024068, 67018546...",56018970700
2,Gius,David,R,Radiation Oncology,drg815,Professor,,Investigator,Yes,7003610066,...,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0,"[Bisht K., Bradbury C., Mattson D., Kaushal A....","[7005370416, 57207801922, 7102211768, 66039820...",7003610066
3,Gius,David,R,Radiation Oncology,drg815,Professor,,Investigator,Yes,7003610066,...,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0,"[Bisht K., Bradbury C., Zoberi I., Curry H., K...","[7005370416, 57207801922, 57193119887, 7004715...",7003610066
4,Gius,David,R,Radiation Oncology,drg815,Professor,,Investigator,Yes,7003610066,...,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0,"[Dewhirst M., Sneed P., Karimpour S., Gius D.]","[36046300600, 35408019300, 6506436385, 7003610...",7003610066
5,Gius,David,R,Radiation Oncology,drg815,Professor,,Investigator,Yes,7003610066,...,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0,"[Lin X., Zhang F., Bradbury C., Kaushal A., Li...","[7404513477, 57199242888, 57207801922, 6603982...",7003610066
6,Gius,David,R,Radiation Oncology,drg815,Professor,,Investigator,Yes,7003610066,...,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0,"[Karimpour S., Gius D.]","[6506436385, 7003610066]",7003610066
7,Gius,David,R,Radiation Oncology,drg815,Professor,,Investigator,Yes,7003610066,...,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0,"[Trimble E., Schoenfeldt M., Streicher H., Giu...","[7005267919, 6603616084, 7005313461, 700361006...",7003610066
8,Gius,David,R,Radiation Oncology,drg815,Professor,,Investigator,Yes,7003610066,...,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0,"[Ohiro Y., Usheva A., Kobayashi S., Duffy S., ...","[8660958800, 56259611200, 57199809113, 5719656...",7003610066
9,Gius,David,R,Radiation Oncology,drg815,Professor,,Investigator,Yes,7003610066,...,0,15,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...","[{'@_fa': 'true', 'prism:url': 'https://api.el...",request,AU-ID(7003610066),0,"[Gius D., Coleman C.]","[7003610066, 7201507251]",7003610066


In [29]:
#merged_df.to_csv (r'C:\Users\keg827\Documents\10. WorkStuff_KEG\scopusAPIrequests\merged_dataframe.csv', index = None, header=True)

In [None]:
#NLM CITATION FORMAT
#THIS FEATURE IS NOT YET DONE
#Brantingham JW, Bonnefin D, Perle SM, Cassa TK, Globe G, Pribicevic M, Hicks M, Korporaal C. Manipulative therapy for lower extremity conditions: update of a literature review. J Manipulative Physiol Ther. 2012 Feb;35(2):127-66.



In [43]:
#FLAG FIRST AND LAST AUTHORS
#BOLD AUTHOR NAME ON EXPORT
#THIS DOES IS NOT YET DONE

#https://stackoverflow.com/questions/52819114/pandas-to-csv-with-some-words-as-bold
#https://stackoverflow.com/questions/41212273/pandaspython-fill-empty-cells-with-with-previous-row-value
#https://stackoverflow.com/questions/52651074/python-pandas-equivalent-to-the-excel-fill-handle
#https://stackoverflow.com/questions/51938245/display-dataframe-values-in-bold-font-in-one-row-only
#https://stackoverflow.com/questions/54512133/string-matching-of-two-pandas-series

# author_index=[]

# for i in range(len(merged_df)):
#     #print(merged_df.loc[i, "scopus_author_id_api"]) 
#     author_id = merged_df.loc[i, "scopus_author_id"]
#     #print(author_id)
#     for id_list in merged_df["author_ids"]:
#         #print(id_list)
#         if author_id == matchID:
#              print(matchID.index)
#         else:
#              print("did not match")

# def CheckDF(df1,df2):
#     for (item, Value),(item1, Value1) in 
#     zip(df1['account'].iteritems(),df2['account'].iteritems()):
#         if len(str(Value).strip()) == len(str(Value1).strip()):
#             print(True)
#         else:
#             print(False)

# CheckDF(df1,df2)