# API - IEEE

# Imports

In [345]:
import pandas as pd
import numpy as np
import os
import glob
import sqlite3
import yaml
import json
import requests
from datetime import datetime

import warnings
warnings.simplefilter(action = 'ignore', category = FutureWarning)

from functions import read_bib
from functions import load_bib
from functions import write_yaml
from functions import write_json
from functions import write_csv
from functions import read_yaml

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 1000)

# Params

In [337]:
#API_Params
config_api = read_yaml("../05_Config/config_api.yaml")
print("config_api: \n", config_api)

#API Elsevier
config_query = read_yaml("../05_Config/config_query.yaml")
print("config_query: \n", config_query)

url = f"https://api.elsevier.com/content/search/sciencedirect?query={config_query['api_elsevier_string_search']}&apiKey={config_api['apikey_elsevier']}"
print(url)



../05_Config/config_api.yaml read successfully

config_api: 
 {'apikey_ieee': '3jk2p7za5jtd7a39gwknygvk', 'format': 'json', 'sort_order': 'asc', 'sort_field': 'article', 'apikey_elsevier': '7f59af901d2d86f78a1fd60c1bf9426a'}


../05_Config/config_query.yaml read successfully

config_query: 
 {'call_api_ieee': False, 'call_api_eslsevier': False, 'max_api_ieee_pagination': 2, 'api_ieee_field_search': 'article_title', 'api_ieee_string_search': 'big+data', 'api_elsevier_string_search': 'big+data', 'path_db': '../03_OutputFiles/doi.db', 'use_query_sql_bib_csv': True, 'use_query_sql_ieee': True, 'query_bib_csv': "SELECT * FROM bib_csv WHERE abstract LIKE '%big data%' OR abstract LIKE '%quality%'", 'query_ieee': "SELECT * FROM api_ieee WHERE  abstract LIKE '%big data%' OR abstract LIKE '%quality%'", 'output_extensions': ['json', 'csv', 'yaml']}
https://api.elsevier.com/content/search/sciencedirect?query=big+data&apiKey=7f59af901d2d86f78a1fd60c1bf9426a


In [338]:
req_elsevier = requests.get(url)

if req_elsevier.status_code >= 400:
    print('Status code: ',req_elsevier.status_code, '\nProvavelmente foi atingido o limite de consultas diario!')
else:
    dict_api_elsevier = req_elsevier.json()

dict_api_elsevier = req_elsevier.json()

for i, j in enumerate (dict_api_elsevier['search-results']['entry']):
    print(i,j,'\n\n')

0 {'@_fa': 'true', 'load-date': '2022-11-11T00:00:00.000Z', 'link': [{'@_fa': 'true', '@ref': 'self', '@href': 'https://api.elsevier.com/content/article/pii/S2214579622000521'}, {'@_fa': 'true', '@ref': 'scidir', '@href': 'https://www.sciencedirect.com/science/article/pii/S2214579622000521?dgcid=api_sd_search-api-endpoint'}], 'dc:identifier': 'DOI:10.1016/j.bdr.2022.100358', 'prism:url': 'https://api.elsevier.com/content/article/pii/S2214579622000521', 'dc:title': 'Data-Efficient Performance Modeling for Configurable Big Data Frameworks by Reducing Information Overlap Between Training Examples', 'dc:creator': 'Zhiqiang Liu', 'prism:publicationName': 'Big Data Research', 'prism:volume': '30', 'prism:coverDate': '2022-11-28', 'prism:startingPage': '100358', 'prism:doi': '10.1016/j.bdr.2022.100358', 'openaccess': False, 'pii': 'S2214579622000521', 'authors': {'author': [{'$': 'Zhiqiang Liu'}, {'$': 'Xuanhua Shi'}, {'$': 'Hai Jin'}]}} 


1 {'@_fa': 'true', 'load-date': '2022-11-17T00:00:00

In [364]:
#Parsing json to df
doi = [i['prism:doi'] for i in dict_api_elsevier['search-results']['entry']]
title = [i['dc:title'] for i in dict_api_elsevier['search-results']['entry']]
publisher = [i['prism:publicationName'] for i in dict_api_elsevier['search-results']['entry']]
publication_year = [i['prism:coverDate'][:4] for i in dict_api_elsevier['search-results']['entry']]

#Adjusting Authors feature
authors = [i.get('authors') for i in dict_api_elsevier['search-results']['entry']]
list_index = []
list_authors = []
for i, x in enumerate(authors):
    if type(x.get('author')) == str:
        list_index.append(i)
        list_authors.append(x.get('author'))
    
    else:
        for y in x.get('author'):
            list_index.append(i)
            list_authors.append(y.get('$'))
df_author = pd.DataFrame({'index':list_index, 'authors': list_authors})
df_author = df_author.groupby('index')['authors'].apply(list)
authors = df_author.tolist()

#Creating DataFrame
df_api_elsevier = pd.DataFrame({'doi': doi,
                                'title': title,
                                'publisher': publisher,
                                'publication_year': publication_year,
                                'authors': authors})

#Convert list to string
df_api_elsevier['authors'] = df_api_elsevier['authors'].map(lambda x: ', '.join(x))

#Insert id_search [elsevier_YYYYMMDD_HH_MM_SS]
search_id = datetime.now().strftime("elsevier_%Y%m%d_%H_%M_%S")
df_api_elsevier.insert(0, 'search_id', search_id)
print(df_api_elsevier.dtypes)
df_api_elsevier.head()

search_id           object
doi                 object
title               object
publisher           object
publication_year    object
authors             object
dtype: object


Unnamed: 0,search_id,doi,title,publisher,publication_year,authors
0,elsevier_20221204_19_43_57,10.1016/j.bdr.2022.100358,Data-Efficient Performance Modeling for Config...,Big Data Research,2022,"Zhiqiang Liu, Xuanhua Shi, Hai Jin"
1,elsevier_20221204_19_43_57,10.1016/j.techfore.2022.122154,Examining the role of virtue ethics and big da...,Technological Forecasting and Social Change,2023,"Surajit Bag, Muhammad Sabbir Rahman, Pratibha Ram"
2,elsevier_20221204_19_43_57,10.1016/j.brs.2020.07.018,Large-scale analysis of interindividual variab...,Brain Stimulation,2020,"Daniel T. Corp, Hannah G. K. Bereznicki, Peter..."
3,elsevier_20221204_19_43_57,10.1016/j.apenergy.2022.119986,Digital twin and big data-driven sustainable s...,Applied Energy,2022,"Shuaiyin Ma, Wei Ding, Haidong Yang"
4,elsevier_20221204_19_43_57,10.1016/j.scib.2022.07.015,Measuring and evaluating SDG indicators with B...,Science Bulletin,2022,"Huadong Guo, Dong Liang, Zeeshan Shirazi"


# Export to Database

In [368]:
#Exporting to SQlite database
dbfile = config_query['path_db']
tabela ='api_elsevier'
db = sqlite3.connect(dbfile)
sqlDataTypes={}
for c in df_api_elsevier.columns:
    if df_api_elsevier[c].dtype.kind == 'i':  
        sqlDataTypes[c]='INTEGER'
    elif df_api_elsevier[c].dtype.kind == 'f':
        sqlDataTypes[c]='REAL'
    else:
        sqlDataTypes[c]='TEXT'
df_api_elsevier.to_sql(tabela, index=False, if_exists='append', dtype = sqlDataTypes, con = db)   
db.commit()
db.close() 

In [369]:
#Check db content

db = sqlite3.connect(dbfile)

tabela = 'api_elsevier'

query_api = pd.read_sql_query(f'select * from {tabela} LIMIT 10', db)
display(pd.read_sql_query(f'select count(*) from {tabela}', db))
display(query_api)

Unnamed: 0,count(*)
0,50


Unnamed: 0,search_id,doi,title,publisher,publication_year,authors
0,elsevier_20221204_19_43_57,10.1016/j.bdr.2022.100358,Data-Efficient Performance Modeling for Config...,Big Data Research,2022,"Zhiqiang Liu, Xuanhua Shi, Hai Jin"
1,elsevier_20221204_19_43_57,10.1016/j.techfore.2022.122154,Examining the role of virtue ethics and big da...,Technological Forecasting and Social Change,2023,"Surajit Bag, Muhammad Sabbir Rahman, Pratibha Ram"
2,elsevier_20221204_19_43_57,10.1016/j.brs.2020.07.018,Large-scale analysis of interindividual variab...,Brain Stimulation,2020,"Daniel T. Corp, Hannah G. K. Bereznicki, Peter..."
3,elsevier_20221204_19_43_57,10.1016/j.apenergy.2022.119986,Digital twin and big data-driven sustainable s...,Applied Energy,2022,"Shuaiyin Ma, Wei Ding, Haidong Yang"
4,elsevier_20221204_19_43_57,10.1016/j.scib.2022.07.015,Measuring and evaluating SDG indicators with B...,Science Bulletin,2022,"Huadong Guo, Dong Liang, Zeeshan Shirazi"
5,elsevier_20221204_19_43_57,10.1016/j.heliyon.2022.e11834,"Connotation, characteristics and framework of ...",Heliyon,2022,"Wanguan Qiao, Xue Chen"
6,elsevier_20221204_19_43_57,10.1016/j.urbmob.2022.100027,Simulating micro-level attributes of railway p...,Journal of Urban Mobility,2022,"Eusebio Odiari, Mark Birkin"
7,elsevier_20221204_19_43_57,10.1016/j.jclepro.2022.134261,Digital and intelligent empowerment: Can big d...,Journal of Cleaner Production,2022,"Hongna Tian, Yunfang Li, Yan Zhang"
8,elsevier_20221204_19_43_57,10.1016/j.heliyon.2022.e10312,Impact of big data resources on clinicians’ ac...,Heliyon,2022,"Sufen Wang, Junyi Yuan, Changqing Pan"
9,elsevier_20221204_19_43_57,10.1016/j.techsoc.2022.102114,A csQCA study of value creation in logistics c...,Technology in Society,2022,"Qiaohong Pan, Wenping Luo, Yi Fu"
