# CONFIGURAÇÕES

»» Documentação de interesse:

- Markdown: https://www.markdownguide.org/


## Ativar as bibliotecas de interesse

In [1]:
import os
import sys

In [3]:
import requests
import json

import pickle

import math 
import pandas as pd

## Diretorias


In [3]:
os.getcwd()

'c:\\Users\\paulo\\OneDrive\\ONEDRIVE_CLOUD_DISK\\TRABALHO_AULAS\\AL20242025\\1SEM\\ICD\\projICD2425\\notebooks'

In [4]:
# When this jupyter notebook is opened, the current working directory is the directory where the notebook is saved.
# However, for a better organization of code, we recomend to change the working directory to the root directory of the project.
# Here a windows terminal command to change the working directory is executed. 
# Please, consider to use the right command if you are using another operative system (UNIX).
# Note that the root path is located one level on the top of the folders tree. Thus, we use the relative path symbol "../" to go up one level.

%cd ../

c:\Users\paulo\OneDrive\ONEDRIVE_CLOUD_DISK\TRABALHO_AULAS\AL20242025\1SEM\ICD\projICD2425


# Adicionar os módulos auxiliares

A pasta scripts contém um conjunto de módulos (ficheiros) auxiliares que englobam várias ferramentas (funções) de interesse ao longo do projeto.

In [5]:
# This block of code is used to add the path to the auxiliar modules to the system path.
# Only when the path is added to the system path, the modules can be imported in the notebook as with other
# libraries such as pandas or numpy.


scripts_path = os.path.abspath(os.path.join(r'./scripts'))
scripts_path


'c:\\Users\\paulo\\OneDrive\\ONEDRIVE_CLOUD_DISK\\TRABALHO_AULAS\\AL20242025\\1SEM\\ICD\\projICD2425\\scripts'

In [6]:
# Add the scripts directory to the path
if scripts_path not in sys.path:
    sys.path.append(scripts_path)

In [7]:
print(sys.path)

['c:\\Users\\paulo\\.conda\\envs\\projICD2425\\python313.zip', 'c:\\Users\\paulo\\.conda\\envs\\projICD2425\\DLLs', 'c:\\Users\\paulo\\.conda\\envs\\projICD2425\\Lib', 'c:\\Users\\paulo\\.conda\\envs\\projICD2425', '', 'c:\\Users\\paulo\\.conda\\envs\\projICD2425\\Lib\\site-packages', 'c:\\Users\\paulo\\.conda\\envs\\projICD2425\\Lib\\site-packages\\win32', 'c:\\Users\\paulo\\.conda\\envs\\projICD2425\\Lib\\site-packages\\win32\\lib', 'c:\\Users\\paulo\\.conda\\envs\\projICD2425\\Lib\\site-packages\\Pythonwin', 'c:\\Users\\paulo\\OneDrive\\ONEDRIVE_CLOUD_DISK\\TRABALHO_AULAS\\AL20242025\\1SEM\\ICD\\projICD2425\\scripts']


### Exemplo de uso dos módulos auxiliares


In [8]:
from auxiliar_tools import *


In [9]:
print(print_hello_world())

Hello World!


## Configuração Chave API

In [10]:
from decouple import config

# https://medium.com/@alensabu12xtz/configure-environment-variables-with-python-decouple-b5f5446381a3

MY_SCOPUS_API_KEY = config('MY_SCOPUS_API_KEY')

print('my scopus api key is :', MY_SCOPUS_API_KEY) 

my scopus api key is : d0178a452f02df51b64ba796b0b9b4da


In [11]:
from decouple import Config, RepositoryEnv

DOTENV_FILE = './.env'
env_config = Config(RepositoryEnv(DOTENV_FILE))



# use the Config().get() method as you normally would since 
# decouple.config uses that internally. 
# i.e. config('SECRET_KEY') = env_config.get('SECRET_KEY')
MY_SCOPUS_API_KEY = env_config.get('MY_SCOPUS_API_KEY')

print('my scopus api key is :', MY_SCOPUS_API_KEY) 

my scopus api key is : d0178a452f02df51b64ba796b0b9b4da


# RECOLHA DE DADOS - Parte 1
Pesquisa e recolha de resultados com a API Scoupus API

## Análise exploratória dos resultados de pesquisa

### Exploratório: 1. Pesquisa API

In [12]:
# IMPORTANTE: https://dev.elsevier.com/sc_search_tips.html

user_query = "data mining housing automated valuation model"

response = requests.get("https://api.elsevier.com/content/search/scopus",
                    headers={'Accept':'application/json',
                             'X-ELS-APIKey': MY_SCOPUS_API_KEY},
                         
                    params={    'query' : user_query,
                                #'start' : "1"
                        }

                        )

print(response.url)



https://api.elsevier.com/content/search/scopus?query=data+mining+housing+automated+valuation+model


### Exploratório: 2. Analisar resultados / resposta API

In [13]:
print(response.status_code)

200


In [14]:
# A simples invocação da resposta da API (em formato json) imprime o contéudo obtido com a respetiva syntaxe json
# No entanto, como facilmente se depreende desta forma é dificil discernir a estrutura hierárquica dos dados e, desta forma, extrair os diferentes dados em cada nível
# por forma a transformar os dados numa estrutura de dados mais amigável (por exemplo, um data.frame da livraria "pandas", que nos é uma estrutura de dados mais familiar
# para manipulação)
results = response.json()
results


{'search-results': {'opensearch:totalResults': '413',
  'opensearch:startIndex': '0',
  'opensearch:itemsPerPage': '25',
  'opensearch:Query': {'@role': 'request',
   '@searchTerms': 'data mining housing automated valuation model',
   '@startPage': '0'},
  'link': [{'@_fa': 'true',
    '@ref': 'self',
    '@href': 'https://api.elsevier.com/content/search/scopus?start=0&count=25&query=data+mining+housing+automated+valuation+model',
    '@type': 'application/json'},
   {'@_fa': 'true',
    '@ref': 'first',
    '@href': 'https://api.elsevier.com/content/search/scopus?start=0&count=25&query=data+mining+housing+automated+valuation+model',
    '@type': 'application/json'},
   {'@_fa': 'true',
    '@ref': 'next',
    '@href': 'https://api.elsevier.com/content/search/scopus?start=25&count=25&query=data+mining+housing+automated+valuation+model',
    '@type': 'application/json'},
   {'@_fa': 'true',
    '@ref': 'last',
    '@href': 'https://api.elsevier.com/content/search/scopus?start=388&count=

In [15]:
# Podemos assim invocar a função "jprint" que definimos inicialmente para obtermos uma visualização mais amigável da estrutura de dados codificada no json
# Esta visualização permite-nos ainda identificar a "localização" (na hierarquia) dos dados de interesse e, portanto, as respetivas chaves (que nos permitem
# aceder a esses mesmos dados)
jprint(response.json())

'{\n      "search-results": {\n            "entry": [\n                  {\n                        "@_fa": "true",\n                        "affiliation": [\n                              {\n                                    "@_fa": "true",\n                                    "affiliation-city": "Zurich",\n                                    "affiliation-country": "Switzerland",\n                                    "affilname": "ETH Z\\u00fcrich"\n                              }\n                        ],\n                        "article-number": "125640",\n                        "citedby-count": "0",\n                        "dc:creator": "Bergmann S.",\n                        "dc:identifier": "SCOPUS_ID:85208664307",\n                        "dc:title": "Machine learning for predicting used car resale prices using granular vehicle equipment information",\n                        "eid": "2-s2.0-85208664307",\n                        "link": [\n                              {\n

In [16]:
results["search-results"]['entry'][0]["affiliation"]


[{'@_fa': 'true',
  'affilname': 'ETH Zürich',
  'affiliation-city': 'Zurich',
  'affiliation-country': 'Switzerland'}]

In [17]:
'affiliation' in results["search-results"]['entry'][0]

True

### Exploratório: 3. Número total de resultados obtidos na pesquisa

In [18]:
number_of_articles_retrieved = results["search-results"]["opensearch:totalResults"]
number_of_articles_retrieved

'413'

In [19]:
number_of_articles_perResultPage = results["search-results"]["opensearch:itemsPerPage"]
number_of_articles_perResultPage

'25'

### Exploratório: 4. Obter dados de cada um dos resultados de pesquisa e transformar numa estrutura de dados amigável (pandas -» data.frame )

#### Opção 1

In [20]:
df = pd.DataFrame.from_records(results["search-results"]['entry'] )
df[:3]

Unnamed: 0,@_fa,link,prism:url,dc:identifier,eid,dc:title,dc:creator,prism:publicationName,prism:issn,prism:volume,...,subtypeDescription,article-number,source-id,openaccess,openaccessFlag,prism:eIssn,prism:issueIdentifier,freetoread,freetoreadLabel,prism:isbn
0,True,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85208664307,2-s2.0-85208664307,Machine learning for predicting used car resal...,Bergmann S.,Expert Systems with Applications,9574174.0,263,...,Article,125640,24201,1,True,,,,,
1,True,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85210158477,2-s2.0-85210158477,A GIS-based tool for dynamic assessment of com...,Wilkho R.S.,Journal of Flood Risk Management,,18,...,Article,e13049,18700156719,1,True,1753318X,1.0,{'value': 'all'},{'value': 'All Open Access'},
2,True,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85207884320,2-s2.0-85207884320,What is the “best” way to measure the relative...,Ruggeri A.,Land Use Policy,2648377.0,148,...,Article,107405,14500,1,True,,,,,


#### Opção 2

In [21]:
df = pd.json_normalize(results["search-results"],['entry'] )
df[:3]

Unnamed: 0,@_fa,link,prism:url,dc:identifier,eid,dc:title,dc:creator,prism:publicationName,prism:issn,prism:volume,...,subtypeDescription,article-number,source-id,openaccess,openaccessFlag,prism:eIssn,prism:issueIdentifier,freetoread.value,freetoreadLabel.value,prism:isbn
0,True,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85208664307,2-s2.0-85208664307,Machine learning for predicting used car resal...,Bergmann S.,Expert Systems with Applications,9574174.0,263,...,Article,125640,24201,1,True,,,,,
1,True,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85210158477,2-s2.0-85210158477,A GIS-based tool for dynamic assessment of com...,Wilkho R.S.,Journal of Flood Risk Management,,18,...,Article,e13049,18700156719,1,True,1753318X,1.0,all,All Open Access,
2,True,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85207884320,2-s2.0-85207884320,What is the “best” way to measure the relative...,Ruggeri A.,Land Use Policy,2648377.0,148,...,Article,107405,14500,1,True,,,,,


In [22]:
df.columns

Index(['@_fa', 'link', 'prism:url', 'dc:identifier', 'eid', 'dc:title',
       'dc:creator', 'prism:publicationName', 'prism:issn', 'prism:volume',
       'prism:pageRange', 'prism:coverDate', 'prism:coverDisplayDate',
       'prism:doi', 'pii', 'citedby-count', 'affiliation',
       'prism:aggregationType', 'subtype', 'subtypeDescription',
       'article-number', 'source-id', 'openaccess', 'openaccessFlag',
       'prism:eIssn', 'prism:issueIdentifier', 'freetoread.value',
       'freetoreadLabel.value', 'prism:isbn'],
      dtype='object')

### Exploratório: 5. Aceder a dados em diferentes níveis hierárquicos

In [23]:
df_affiliation = pd.json_normalize(results["search-results"],['entry', 'affiliation'] )
df_affiliation.columns
#df['prism:doi']
#df_affiliation



Index(['@_fa', 'affilname', 'affiliation-city', 'affiliation-country'], dtype='object')

In [24]:
df_affiliation[:5]



Unnamed: 0,@_fa,affilname,affiliation-city,affiliation-country
0,True,ETH Zürich,Zurich,Switzerland
1,True,College of Engineering,College Station,United States
2,True,Università degli Studi di Padova,Padua,Italy
3,True,Université Abou Bekr Belkaid Tlemcen,Tlemcen,Algeria
4,True,Govt.V.Y.T.PG. Autonomous College,Durg,India


### Exploratório: 5.1 Aceder a dados em diferentes níveis hierárquicos »» POSSÍVEIS DIFICULDADES

Por vezes, nos resultados de pesquisa alguns items podem não conter certos elementos / informação codificada em níveis hierárquicos
inferiores do json.
Por exemplo, a informação sobre a afiliação dos autores pode estar omissa. 
Nestes casos, ao usar a função "pd.json_normalize" para navegar para níveis inferiores pode devolver um erro e inviabilizar o processo automatizado (isto porque a função
"json_normalize" não permite uma forma expedita de solucionar este problema).

Uma estratégia "naife" para ultrapassar esta questão passa por  transformar os dados para um data.frame considerando
apenas o nível inicial da hierarquia do json (ou seja, utilizar a função json_normalize como fizemos para obter o objeto "df" anteriormente).
Como verificamos, certos elementos do data.frame "df" armazenam dados em formato json / dicionário nas suas células.
Ao invés de utilizarmos a função "json_normalize" para obtermos dados nos níveis hierárquicos inferiores, podemos desenvolver o nosso próprio algoritmo
por forma a solucionarmos os problemas de dados omissos.

Assim, uma implementação alternativa do ponto 5, para os dados da coluna "affiliation" seria algo como o que se ilustra no código seguinte.

NOTA: reparem que se optarem por esta estratégia deverão alterar o código da seccção "Automatização global" em conformidade!

In [25]:
import numpy as np 

i=0 #devem utilizar um índice (de linha) para o qual existam dados na coluna "affiliation" do vosso data.frame «df»
AFF_COL_NAMES = pd.json_normalize(df.affiliation[0]).columns

df_affiliation_v2 = pd.DataFrame(columns=AFF_COL_NAMES)

for i in range(len(df.affiliation)) :
    if  type(df.affiliation.iloc[i]) == list :
        df_aux = pd.json_normalize(df.affiliation.iloc[i])
        df_affiliation_v2 = pd.concat([df_affiliation_v2, df_aux], ignore_index=True)

    else:
        emptydf = pd.DataFrame(np.nan, index=[0],columns=AFF_COL_NAMES) 
        df_affiliation_v2 = pd.concat([df_affiliation_v2,emptydf], ignore_index=True)

In [26]:
df_affiliation_v2[:5]

Unnamed: 0,@_fa,affilname,affiliation-city,affiliation-country
0,True,ETH Zürich,Zurich,Switzerland
1,True,College of Engineering,College Station,United States
2,True,Università degli Studi di Padova,Padua,Italy
3,True,Université Abou Bekr Belkaid Tlemcen,Tlemcen,Algeria
4,True,Govt.V.Y.T.PG. Autonomous College,Durg,India


### Exploratório: 6. Combinar dados de diferentes hierarquias num único "data.frame"
https://pandas.pydata.org/docs/getting_started/intro_tutorials/08_combine_dataframes.html 

In [27]:
FIELDS = ['dc:title', "prism:doi", 'dc:creator', 'citedby-count', 'openaccess']
df[FIELDS][:3]

Unnamed: 0,dc:title,prism:doi,dc:creator,citedby-count,openaccess
0,Machine learning for predicting used car resal...,10.1016/j.eswa.2024.125640,Bergmann S.,0,1
1,A GIS-based tool for dynamic assessment of com...,10.1111/jfr3.13049,Wilkho R.S.,0,1
2,What is the “best” way to measure the relative...,10.1016/j.landusepol.2024.107405,Ruggeri A.,0,1


In [28]:
df_all = pd.merge(df[FIELDS], df_affiliation_v2, how='left', left_index=True, right_index=True)
df_all[:3]


Unnamed: 0,dc:title,prism:doi,dc:creator,citedby-count,openaccess,@_fa,affilname,affiliation-city,affiliation-country
0,Machine learning for predicting used car resal...,10.1016/j.eswa.2024.125640,Bergmann S.,0,1,True,ETH Zürich,Zurich,Switzerland
1,A GIS-based tool for dynamic assessment of com...,10.1111/jfr3.13049,Wilkho R.S.,0,1,True,College of Engineering,College Station,United States
2,What is the “best” way to measure the relative...,10.1016/j.landusepol.2024.107405,Ruggeri A.,0,1,True,Università degli Studi di Padova,Padua,Italy


## Automatização global
Recolha de resultados individuais da API, seleção de dados de interesse, criação de estrutura de dados amigável

In [29]:
user_query = "data mining housing automated valuation model"
LEVEL0_FIELDS_OF_INTEREST = ['dc:title', "prism:doi", 'dc:creator']
AFFILIATION_FIELDS_OF_INTEREST = ['affilname', 'affiliation-city', 'affiliation-country']

df_results_list = pd.DataFrame(columns=LEVEL0_FIELDS_OF_INTEREST+AFFILIATION_FIELDS_OF_INTEREST)

cursor = "*"

for i in range(0,math.ceil(int(number_of_articles_retrieved)/int(number_of_articles_perResultPage))) :
#for i in range(0,1) :
    
    
    response = requests.get("https://api.elsevier.com/content/search/scopus",
                    headers={'Accept':'application/json',
                             'X-ELS-APIKey': MY_SCOPUS_API_KEY},
                         
                    params={    'query' : user_query,
                                'cursor' : cursor
                        }

                        )
    print(i)
    print(response.url)
    print(response.status_code)

    if(response.status_code == 200):
        results_aux = response.json()

        df_level1_aux = pd.DataFrame.from_records(results_aux["search-results"]['entry'] )
        df_level1_aux = df_level1_aux[LEVEL0_FIELDS_OF_INTEREST]
        
        df_affil_aux_out =pd.DataFrame(columns=AFF_COL_NAMES)

        for i in range(len(results_aux["search-results"]['entry'])) :

            if 'affiliation' in results["search-results"]['entry'][i]:
                
                df_affil_aux_in = pd.json_normalize(results["search-results"]['entry'][i]['affiliation'])
                df_affil_aux_in = df_affil_aux_in[AFFILIATION_FIELDS_OF_INTEREST]
                df_affil_aux_out = pd.concat([df_affil_aux_in, df_affil_aux_out], axis=0, ignore_index=True)
               

            else:

                df_affil_aux_in = pd.DataFrame(np.nan, index=[0],columns=AFF_COL_NAMES)
                
        
        df_all_aux = pd.merge(df_level1_aux, df_affil_aux_out, how='left', left_index=True, right_index=True)
        df_results_list = pd.concat([df_results_list, df_all_aux], ignore_index=True)
        cursor = results_aux["search-results"]["cursor"]['@next']
        




        
    

0
https://api.elsevier.com/content/search/scopus?query=data+mining+housing+automated+valuation+model&cursor=%2A
200
1
https://api.elsevier.com/content/search/scopus?query=data+mining+housing+automated+valuation+model&cursor=AoJZjZtNMjItczIuMC04NTE5Mjg0MjM5MA%3D%3D
200
2
https://api.elsevier.com/content/search/scopus?query=data+mining+housing+automated+valuation+model&cursor=AoJV7ppNMjItczIuMC04NTIwOTgxMTMxNg%3D%3D
200
3
https://api.elsevier.com/content/search/scopus?query=data+mining+housing+automated+valuation+model&cursor=AoJV7ppNMjItczIuMC04NTE3NTY1NzczMw%3D%3D
200
4
https://api.elsevier.com/content/search/scopus?query=data+mining+housing+automated+valuation+model&cursor=AoJZnJZNMjItczIuMC04NTE0MzQyMDExMQ%3D%3D
200
5
https://api.elsevier.com/content/search/scopus?query=data+mining+housing+automated+valuation+model&cursor=AoJV%2FZVNMjItczIuMC04NTE3MzkzODk5Mw%3D%3D
200
6
https://api.elsevier.com/content/search/scopus?query=data+mining+housing+automated+valuation+model&cursor=AoJdy5FNM

In [30]:
len(results_aux["search-results"]['entry'])

13

In [31]:
results["search-results"]['entry'][20]['affiliation']

[{'@_fa': 'true',
  'affilname': 'University of Ottawa',
  'affiliation-city': 'Ottawa',
  'affiliation-country': 'Canada'}]

In [32]:
pd.json_normalize(results["search-results"]['entry'][20]['affiliation'])

Unnamed: 0,@_fa,affilname,affiliation-city,affiliation-country
0,True,University of Ottawa,Ottawa,Canada


In [33]:
"affiliation" in results["search-results"]['entry'][20]

True

In [None]:
jprint(response.json())

In [34]:
df_results_list[:200]

Unnamed: 0,dc:title,prism:doi,dc:creator,affilname,affiliation-city,affiliation-country,@_fa
0,Machine learning for predicting used car resal...,10.1016/j.eswa.2024.125640,Bergmann S.,Dr. Balasaheb Sawant Konkan Krishi Vidyapeeth,Ratnagiri,India,
1,A GIS-based tool for dynamic assessment of com...,10.1111/jfr3.13049,Wilkho R.S.,"Central Agriculture University, India",Imphal,India,
2,What is the “best” way to measure the relative...,10.1016/j.landusepol.2024.107405,Ruggeri A.,FAMU-FSU College of Engineering,Tallahassee,United States,
3,Controlling stormwater at the source: dawn of ...,10.1007/s13201-024-02324-x,Siphambe T.V.,Faculty of Engineering Ain Shams University,Cairo,Egypt,
4,Feature reduction in multiple linear regressio...,10.1007/s42044-024-00206-8,Thakur P.S.,University of Oradea,Oradea,Romania,
...,...,...,...,...,...,...,...
195,Using Geolocated Text to Quantify Location in ...,,Heuwinkel T.N.,University of Hertfordshire,Hatfield,United Kingdom,
196,Logic-Driven Traffic Big Data Analytics: Metho...,10.1007/978-981-16-8016-8,Zhong S.,Govt.V.Y.T.PG. Autonomous College,Durg,India,
197,Rapid Modelling of Machine Learning in Predict...,10.14569/IJACSA.2022.0131266,Mohd T.,Université Abou Bekr Belkaid Tlemcen,Tlemcen,Algeria,
198,Estate Price Predictor for Multan City Townshi...,10.1109/ICCR56254.2022.9996072,Kurdi B.A.,Università degli Studi di Padova,Padua,Italy,


In [36]:
df_results_list.to_csv(r'./data/df_results_list_records.csv', index=False)

# RECOLHA DE DADOS - Parte 2
Recolha do contéudo dos artigos selecionados com a API Scopus

NOTA: Como facilmente se verifica na [página](https://dev.elsevier.com/api_docs.html) os artigos completos 
estão disponíveis no serviço «ScienceDirect APIs», especificamente na API «Article Retrieval» (documentação [aqui](https://dev.elsevier.com/documentation/ArticleRetrievalAPI.wadl) )

## Recolha de artigos completos
Utilizando o identificador DOI

### Análise exploratória

In [37]:
dois = df_results_list['prism:doi']
dois[:3]

0          10.1016/j.eswa.2024.125640
1                  10.1111/jfr3.13049
2    10.1016/j.landusepol.2024.107405
Name: prism:doi, dtype: object

In [39]:
response_article = requests.get("https://api.elsevier.com/content/article/doi/"+dois[2],
                    headers={ 'Accept':'application/json',
                             'X-ELS-APIKey': MY_SCOPUS_API_KEY},
                         
                    params={   }

                        )
print(response_article.url)
print(response_article.status_code)

https://api.elsevier.com/content/article/doi/10.1016/j.landusepol.2024.107405
200


In [None]:
#jprint(response_article.json())

In [40]:
type(dois)

pandas.core.series.Series

### Implementação global

In [None]:
# - a definir -


## Recolha de resumos (dos artigos) 

Recolha do contéudo dos resumos dos artigos selecionados com a API Scopus (usando o identificador DOI guardado anteriormente)

NOTA: Como facilmente se verifica na [página](https://dev.elsevier.com/api_docs.html) os resumos podem ser obtidos de duas formas:
* Através do serviço «ScienceDirect APIs» e especificamente da API «Article Retrieval» (documentação [aqui](https://dev.elsevier.com/documentation/ArticleRetrievalAPI.wadl) ) descrita anteriormente
* Através do serviço «Scopus APIs» (usado anteriormente para fazer a pesquisa) e especificamente da API «Abstract Retrieval» (documentação [aqui](https://dev.elsevier.com/documentation/AbstractRetrievalAPI.wadl))

### Análise Exploratória

In [41]:
dois = df_results_list['prism:doi']

In [43]:
response_abst = requests.get("https://api.elsevier.com/content/abstract/doi/"+dois[114],
                    headers={#'Accept': 'application/json',
                    'Accept': 'application/json',
                    'X-ELS-APIKey': MY_SCOPUS_API_KEY},
                    
                    params={   }
                )
print(response_abst.url)
print(response_abst.status_code)   

https://api.elsevier.com/content/abstract/doi/10.3390/buildings13020441
200


In [44]:
results_abstr = response_abst.json()
results_abstr

{'abstracts-retrieval-response': {'item': {'ait:process-info': {'ait:status': {'@state': 'update',
     '@type': 'core',
     '@stage': 'S300'},
    'ait:date-delivered': {'@day': '25',
     '@year': '2024',
     '@timestamp': '2024-08-25T20:51:31.000031-04:00',
     '@month': '08'},
    'ait:date-sort': {'@day': '01', '@year': '2023', '@month': '02'}},
   'bibrecord': {'head': {'author-group': [{'affiliation': {'country': 'Australia',
        'postal-code': '2052',
        '@afid': '60028333',
        '@country': 'aus',
        'city': 'Sydney',
        'organization': [{'$': 'School of Built Environment'},
         {'$': 'The University of New South Wales'}],
        'affiliation-id': {'@afid': '60028333', '@dptid': '104435148'},
        'state': 'NSW',
        'ce:source-text': 'School of Built Environment, The University of New South Wales, Sydney, NSW 2052, Australia',
        '@dptid': '104435148'},
       'author': [{'ce:given-name': 'Samad M. E.',
         'preferred-name': {'c

In [45]:
jprint(response_abst.json())

'{\n      "abstracts-retrieval-response": {\n            "affiliation": [\n                  {\n                        "@href": "https://api.elsevier.com/content/affiliation/affiliation_id/60090755",\n                        "@id": "60090755",\n                        "affiliation-city": "Sydney",\n                        "affiliation-country": "Australia",\n                        "affilname": "Faculty of Engineering"\n                  },\n                  {\n                        "@href": "https://api.elsevier.com/content/affiliation/affiliation_id/60031846",\n                        "@id": "60031846",\n                        "affiliation-city": "Adelaide",\n                        "affiliation-country": "Australia",\n                        "affilname": "University of South Australia"\n                  },\n                  {\n                        "@href": "https://api.elsevier.com/content/affiliation/affiliation_id/60028333",\n                        "@id": "60028333",\n 

In [46]:
resumo = results_abstr["abstracts-retrieval-response"][ "coredata"]['dc:description']
resumo

'The construction industry is slow to adopt new technologies. The implementation of digital technologies and remote operations using robots were considered farfetched affairs and unbelievable approaches. However, the effect of COVID-19 on clients and construction companies put high pressure on construction managers to seek digital solutions and justified the need for remote operating or distant controlling technologies. This paper aims to investigate the state of play in construction technology implementation and presents a roadmap for developing and implementing required technologies for the construction industry. The COVID-19 disruption required new methods of working safely and remotely and coincided with the advent of advanced automation and autonomous technologies. This paper aims to identify gaps and 11 disruptive technologies that may lead to upheaval and transformation of the construction sector, perhaps in this decade. A road map for technology implementation can be helpful in

### Implementação Global

In [47]:
dois = df_results_list['prism:doi']
dois

0              10.1016/j.eswa.2024.125640
1                      10.1111/jfr3.13049
2        10.1016/j.landusepol.2024.107405
3              10.1007/s13201-024-02324-x
4              10.1007/s42044-024-00206-8
                      ...                
408             10.1108/02637470510631474
409              10.1144/1470-9236/04-072
410    10.1016/B978-0-7506-7555-0.X5081-6
411         10.1016/S0377-2217(02)00178-9
412               10.1111/1540-6229.00048
Name: prism:doi, Length: 413, dtype: object

In [49]:
abstract_list = []

i=0
for x in dois :
    print(x)
    response_abst = requests.get("https://api.elsevier.com/content/abstract/doi/"+str(x),
                    headers={
                    'Accept': 'application/json',
                    'X-ELS-APIKey': MY_SCOPUS_API_KEY},
                    
                    params={   }
                )
    
    print(response_abst.url)
    print(response_abst.status_code)
    print(i)
    i=i+1

    if(response_abst.status_code == 200):
        results_abst = response_abst.json()
        if 'dc:description' in results_abst["abstracts-retrieval-response"][ "coredata"]:
            abstract_list.append( results_abst["abstracts-retrieval-response"][ "coredata"]['dc:description'] )
        else:
            abstract_list.append("NULL")
    else:
        abstract_list.append("NULL")




10.1016/j.eswa.2024.125640
https://api.elsevier.com/content/abstract/doi/10.1016/j.eswa.2024.125640
200
0
10.1111/jfr3.13049
https://api.elsevier.com/content/abstract/doi/10.1111/jfr3.13049
200
1
10.1016/j.landusepol.2024.107405
https://api.elsevier.com/content/abstract/doi/10.1016/j.landusepol.2024.107405
200
2
10.1007/s13201-024-02324-x
https://api.elsevier.com/content/abstract/doi/10.1007/s13201-024-02324-x
200
3
10.1007/s42044-024-00206-8
https://api.elsevier.com/content/abstract/doi/10.1007/s42044-024-00206-8
200
4
10.1108/SASBE-06-2022-0113
https://api.elsevier.com/content/abstract/doi/10.1108/SASBE-06-2022-0113
200
5
10.3390/land13111881
https://api.elsevier.com/content/abstract/doi/10.3390/land13111881
200
6
10.1007/s10618-024-01046-7
https://api.elsevier.com/content/abstract/doi/10.1007/s10618-024-01046-7
200
7
10.1007/s11156-024-01306-z
https://api.elsevier.com/content/abstract/doi/10.1007/s11156-024-01306-z
200
8
10.3390/buildings14103068
https://api.elsevier.com/content/abs

In [None]:
abstract_list[0]

In [None]:
type(abstract_list[0])

# GUARDAR DADOS RECOLHIDOS E PROCESSADOS

In [None]:
df_results_list.to_csv('../data/input/'+'search_results.csv',  index=False, encoding = "utf-8")

pd.DataFrame(abstract_list,  columns =['Abstract']).to_csv('../data/input/'+'search_results_abstracts.csv', index=False, encoding = "utf-8")

In [None]:
# Save the file
#pickle.dump( df_results_list, file = open(path_dados+"ICD2122_TextMining_DataStructures.pickle", "wb"))

# Reload the file
#test_grouped_df_reloaded = pickle.load(open(path_dados+"ICD2122_TextMining_DataStructures.pickle", "rb"))