# CONFIGURAÇÕES INICIAIS

»» Recursos de interesse:

- Markdown: https://www.markdownguide.org/


## Diretorias


In [1]:
#Colocar o path correto
#C:\Users\Paulo B\OneDrive - Universidade de Aveiro\TRABALHO_AULAS\AL20222023\ICD\projeto_ICD2223
path = "C:/Users/Paulo B/OneDrive - Universidade de Aveiro/"

path_projeto_ICD2021 = "TRABALHO_AULAS/AL20212022/ICD_202120221/"

path_dados = path + path_projeto_ICD2021 + "projeto_ICD2021/dados/" 

## Ativar os módulos de interesse

In [2]:
import requests
import json

import pickle

import math 
import pandas as pd

## Funções auxiliares

In [3]:
# A função "jprint()" tem como objetivo "imprimir" o json (formato de resposta escolhido / solicitado à API) 
# O formato ".json" é uma linguagem utilizada para representação de dados num formato do tipo "chave-valor" - numa estrutura de dados similar a um "dicionário", 
# disponibilizada na linguagem Python. 
# Os dados, tal como numa base de dados usual, podem ter uma estrutura relacional; é usual que estas relações possam ser representadas num formato
# hierárquico. O formato json é assim compatível com esta representação relacional hierárquica, sendo a navegação entre os diferentes níveis definida pelas chaves, 
# que correspondem aos nós da representação hierárquica em rede dos referidos dados codificados no formato json
# Esta função permite representar a estrutura hierárquica do json, através da "impressão" em formato textual, utilizando tabulações por forma a alinhar as chaves que se encontram
# em cada nível hierárquico

def jprint(obj):
    # create a formatted string of the Python JSON object
    text = json.dumps(obj, sort_keys=True, indent=6)
    print(text)



## Configuração Chave API

In [4]:
MY_API_KEY = "d0178a452f02df51b64ba796b0b9b4da"
MY_API_KEY

'd0178a452f02df51b64ba796b0b9b4da'

# RECOLHA DE DADOS - Parte 1
Pesquisa e recolha de resultados com a API Scoupus API

## Análise exploratória dos resultados de pesquisa

### Exploratório: 1. Pesquisa API

In [6]:
# IMPORTANTE: https://dev.elsevier.com/sc_search_tips.html

user_query = "data mining housing automated valuation model"
cursor = "*"

response = requests.get("https://api.elsevier.com/content/search/scopus",
                    headers={'Accept':'application/json',
                             'X-ELS-APIKey': MY_API_KEY},
                         
                    params={    'query' : user_query,
                                #'count' : 1000,
                                #'start' : "25",
                                #'cursor' : '*',
                        }

                        )

print(response.url)



https://api.elsevier.com/content/search/scopus?query=data+mining+housing+automated+valuation+model


### Exploratório: 2. Analisar resultados / resposta API

In [7]:
print(response.status_code)

200


In [8]:
# A simples invocação da resposta da API (em formato json) imprime o contéudo obtido com a respetiva syntaxe json
# No entanto, como facilmente se depreende desta forma é dificil discernir a estrutura hierárquica dos dados e, desta forma, extrair os diferentes dados em cada nível
# por forma a transformar os dados numa estrutura de dados mais amigável (por exemplo, um data.frame da livraria "pandas", que nos é uma estrutura de dados mais familiar
# para manipulação)
results = response.json()
results


{'search-results': {'opensearch:totalResults': '256',
  'opensearch:startIndex': '0',
  'opensearch:itemsPerPage': '25',
  'opensearch:Query': {'@role': 'request',
   '@searchTerms': 'data mining housing automated valuation model',
   '@startPage': '0'},
  'link': [{'@_fa': 'true',
    '@ref': 'self',
    '@href': 'https://api.elsevier.com/content/search/scopus?start=0&count=25&query=data+mining+housing+automated+valuation+model',
    '@type': 'application/json'},
   {'@_fa': 'true',
    '@ref': 'first',
    '@href': 'https://api.elsevier.com/content/search/scopus?start=0&count=25&query=data+mining+housing+automated+valuation+model',
    '@type': 'application/json'},
   {'@_fa': 'true',
    '@ref': 'next',
    '@href': 'https://api.elsevier.com/content/search/scopus?start=25&count=25&query=data+mining+housing+automated+valuation+model',
    '@type': 'application/json'},
   {'@_fa': 'true',
    '@ref': 'last',
    '@href': 'https://api.elsevier.com/content/search/scopus?start=231&count=

In [9]:
# Podemos assim invocar a função "jprint" que definimos inicialmente para obtermos uma visualização mais amigável da estrutura de dados codificada no json
# Esta visualização permite-nos ainda identificar a "localização" (na hierarquia) dos dados de interesse e, portanto, as respetivas chaves (que nos permitem
# aceder a esses mesmos dados)
jprint(response.json())

{
      "search-results": {
            "entry": [
                  {
                        "@_fa": "true",
                        "affiliation": [
                              {
                                    "@_fa": "true",
                                    "affiliation-city": "Freiburg im Breisgau",
                                    "affiliation-country": "Germany",
                                    "affilname": "Universit\u00e4t Freiburg"
                              }
                        ],
                        "article-number": "119147",
                        "citedby-count": "0",
                        "dc:creator": "Baur K.",
                        "dc:identifier": "SCOPUS_ID:85141519670",
                        "dc:title": "Automated real estate valuation with machine learning models using property descriptions",
                        "eid": "2-s2.0-85141519670",
                        "link": [
                              {
                  

In [10]:
results["search-results"]['entry'][0]["affiliation"]


[{'@_fa': 'true',
  'affilname': 'Universität Freiburg',
  'affiliation-city': 'Freiburg im Breisgau',
  'affiliation-country': 'Germany'}]

In [11]:
'affiliation' in results["search-results"]['entry'][0]

True

### Exploratório: 3. Número total de resultados obtidos na pesquisa

In [12]:
number_of_articles_retrieved = results["search-results"]["opensearch:totalResults"]
number_of_articles_retrieved

'256'

In [13]:
number_of_articles_perResultPage = results["search-results"]["opensearch:itemsPerPage"]
number_of_articles_perResultPage

'25'

### Exploratório: 4. Obter dados de cada um dos resultados de pesquisa e transformar numa estrutura de dados amigável (pandas -» data.frame )

#### Opção 1

In [14]:
df = pd.DataFrame.from_records(results["search-results"]['entry'] )
df[:3]

Unnamed: 0,@_fa,link,prism:url,dc:identifier,eid,dc:title,dc:creator,prism:publicationName,prism:issn,prism:volume,...,article-number,source-id,openaccess,openaccessFlag,prism:eIssn,prism:issueIdentifier,freetoread,freetoreadLabel,pubmed-id,prism:isbn
0,True,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85141519670,2-s2.0-85141519670,Automated real estate valuation with machine l...,Baur K.,Expert Systems with Applications,9574174,213,...,119147,24201,0,False,,,,,,
1,True,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85138774719,2-s2.0-85138774719,Predicting Carpark Prices Indices in Hong Kong...,Li R.Y.M.,CMES - Computer Modeling in Engineering and Sc...,15261492,134,...,,28642,1,True,15261506.0,3.0,"{'value': [{'$': 'all'}, {'$': 'publisherfree2...","{'value': [{'$': 'All Open Access'}, {'$': 'Br...",,
2,True,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85138536850,2-s2.0-85138536850,House price prediction using hedonic pricing m...,Zaki J.,Concurrency and Computation: Practice and Expe...,15320626,34,...,e7342,27871,0,False,15320634.0,27.0,,,,


#### Opção 2

In [15]:
df = pd.json_normalize(results["search-results"],['entry'] )
df[:3]

Unnamed: 0,@_fa,link,prism:url,dc:identifier,eid,dc:title,dc:creator,prism:publicationName,prism:issn,prism:volume,...,article-number,source-id,openaccess,openaccessFlag,prism:eIssn,prism:issueIdentifier,freetoread.value,freetoreadLabel.value,pubmed-id,prism:isbn
0,True,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85141519670,2-s2.0-85141519670,Automated real estate valuation with machine l...,Baur K.,Expert Systems with Applications,9574174,213,...,119147,24201,0,False,,,,,,
1,True,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85138774719,2-s2.0-85138774719,Predicting Carpark Prices Indices in Hong Kong...,Li R.Y.M.,CMES - Computer Modeling in Engineering and Sc...,15261492,134,...,,28642,1,True,15261506.0,3.0,"[{'$': 'all'}, {'$': 'publisherfree2read'}, {'...","[{'$': 'All Open Access'}, {'$': 'Bronze'}, {'...",,
2,True,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85138536850,2-s2.0-85138536850,House price prediction using hedonic pricing m...,Zaki J.,Concurrency and Computation: Practice and Expe...,15320626,34,...,e7342,27871,0,False,15320634.0,27.0,,,,


### Exploratório: 5. Aceder a dados em diferentes níveis hierárquicos

In [16]:
df_affiliation = pd.json_normalize(results["search-results"],['entry', 'affiliation'] )
df_affiliation.columns
#df['prism:doi']
#df_affiliation



Index(['@_fa', 'affilname', 'affiliation-city', 'affiliation-country'], dtype='object')

In [17]:
df_affiliation[:5]



Unnamed: 0,@_fa,affilname,affiliation-city,affiliation-country
0,True,Universität Freiburg,Freiburg im Breisgau,Germany
1,True,Hong Kong Shue Yan University,Hong Kong,China
2,True,Faculty of Engineering,Mansoura,Egypt
3,True,University of Colorado Boulder,Boulder,United States
4,True,Hanyang University ERICA Campus,Ansan,South Korea


### Exploratório: 5.1 Aceder a dados em diferentes níveis hierárquicos »» POSSÍVEIS DIFICULDADES

Por vezes, nos resultados de pesquisa alguns items podem não conter certos elementos / informação codificada em níveis hierárquicos
inferiores do json.
Por exemplo, a informação sobre a afiliação dos autores pode estar omissa. 
Nestes casos, ao usar a função "pd.json_normalize" para navegar para níveis inferiores pode devolver um erro e inviabilizar o processo automatizado (isto porque a função
"json_normalize" não permite uma forma expedita de solucionar este problema).

Uma estratégia "naife" para ultrapassar esta questão passa por  transformar os dados para um data.frame considerando
apenas o nível inicial da hierarquia do json (ou seja, utilizar a função json_normalize como fizemos para obter o objeto "df" anteriormente).
Como verificamos, certos elementos do data.frame "df" armazenam dados em formato json / dicionário nas suas células.
Ao invés de utilizarmos a função "json_normalize" para obtermos dados nos níveis hierárquicos inferiores, podemos desenvolver o nosso próprio algoritmo
por forma a solucionarmos os problemas de dados omissos.

Assim, uma implementação alternativa do ponto 5, para os dados da coluna "affiliation" seria algo como o que se ilustra no código seguinte.

NOTA: reparem que se optarem por esta estratégia deverão alterar o código da seccção "Automatização global" em conformidade!

In [18]:

i=0 #devem utilizar um índice (de linha) para o qual existam dados na coluna "affiliation" do vosso data.frame «df»
AFF_COL_NAMES = pd.json_normalize(df.affiliation[0]).columns

df_affiliation_v2 = pd.DataFrame(columns=AFF_COL_NAMES)

for i in range(len(df.affiliation)) :
    if  type(df.affiliation.iloc[i]) == list :
        df_aux = pd.json_normalize(df.affiliation.iloc[i])
        df_affiliation_v2 = df_affiliation_v2.append(df_aux, ignore_index=True)

    else:
        emptydf = pd.DataFrame(np.nan, index=[0],columns=AFF_COL_NAMES) 
        df_affiliation_v2 = df_affiliation_v2.append(emptydf, ignore_index=True)

In [19]:
df_affiliation_v2[:5]

Unnamed: 0,@_fa,affilname,affiliation-city,affiliation-country
0,True,Universität Freiburg,Freiburg im Breisgau,Germany
1,True,Hong Kong Shue Yan University,Hong Kong,China
2,True,Faculty of Engineering,Mansoura,Egypt
3,True,University of Colorado Boulder,Boulder,United States
4,True,Hanyang University ERICA Campus,Ansan,South Korea


### Exploratório: 6. Combinar dados de diferentes hierarquias num único "data.frame"
https://pandas.pydata.org/docs/getting_started/intro_tutorials/08_combine_dataframes.html 

In [None]:
FIELDS = ['dc:title', "prism:doi", 'dc:creator', 'citedby-count', 'openaccess']
df[FIELDS][:3]

Unnamed: 0,dc:title,prism:doi,dc:creator,citedby-count,openaccess
0,Nonlinear public transit accessibility effects...,10.1016/j.tranpol.2022.01.004,Jin T.,0,0
1,Forecasting: theory and practice,10.1016/j.ijforecast.2021.11.001,Petropoulos F.,1,1
2,Rapid Earthquake Loss Estimation Model for Alg...,10.1080/15583058.2021.1958394,Boukri M.,0,0


In [None]:
df_all = pd.merge(df[FIELDS], df_affiliation, how='left', left_index=True, right_index=True)
df_all[:3]


## Automatização global
Recolha de resultados individuais da API, seleção de dados de interesse, criação de estrutura de dados amigável

In [20]:
'entry' in results["search-results"]

True

In [36]:
user_query = "data mining housing automated valuation model"
LEVEL0_FIELDS_OF_INTEREST = ['dc:title', "prism:doi", 'dc:creator']
AFFILIATION_FIELDS_OF_INTEREST = ['affilname', 'affiliation-city', 'affiliation-country']

df_results_list = pd.DataFrame(columns=LEVEL0_FIELDS_OF_INTEREST+AFFILIATION_FIELDS_OF_INTEREST)

cursor = "*"
start = 0

while start <= int(number_of_articles_retrieved):
    
    
    response = requests.get("https://api.elsevier.com/content/search/scopus",
                    headers={'Accept':'application/json',
                             'X-ELS-APIKey': MY_API_KEY},
                         
                    params={    'query' : user_query,
                                'start' : start
                                #'cursor' : cursor,
                        }

                        )
    print(i)
    print(response.url)
    print(response.status_code)



    if(response.status_code == 200):

        results_aux = response.json()

        df_level1_aux = pd.DataFrame.from_records(results_aux["search-results"]['entry'] )
        df_level1_aux = df_level1_aux[LEVEL0_FIELDS_OF_INTEREST]

        if 'affiliation' in results["search-results"]['entry'][0]:
            
            df_affil_aux = pd.json_normalize(results["search-results"],['entry', 'affiliation'] )
            df_affil_aux = df_affil_aux[AFFILIATION_FIELDS_OF_INTEREST]
            df_all_aux = pd.merge(df_level1_aux, df_affil_aux, how='left', left_index=True, right_index=True)
            df_results_list = df_results_list.append(df_all_aux, ignore_index=True)
            #cursor = results_aux["search-results"]["cursor"]['@next']

        else:

            df_affil_aux = pd.DataFrame(np.nan, index=[0],columns=AFF_COL_NAMES)
            df_all_aux = pd.merge(df_level1_aux, df_affil_aux, how='left', left_index=True, right_index=True)
            df_results_list = df_results_list.append(df_all_aux, ignore_index=True)
            #cursor = results_aux["search-results"]["cursor"]['@next']
    start = start+int(number_of_articles_perResultPage)+1

    
    

24
https://api.elsevier.com/content/search/scopus?query=data+mining+housing+automated+valuation+model&start=0
200
24
https://api.elsevier.com/content/search/scopus?query=data+mining+housing+automated+valuation+model&start=26
200
24
https://api.elsevier.com/content/search/scopus?query=data+mining+housing+automated+valuation+model&start=52
200
24
https://api.elsevier.com/content/search/scopus?query=data+mining+housing+automated+valuation+model&start=78
200
24
https://api.elsevier.com/content/search/scopus?query=data+mining+housing+automated+valuation+model&start=104
200
24
https://api.elsevier.com/content/search/scopus?query=data+mining+housing+automated+valuation+model&start=130
200
24
https://api.elsevier.com/content/search/scopus?query=data+mining+housing+automated+valuation+model&start=156
200
24
https://api.elsevier.com/content/search/scopus?query=data+mining+housing+automated+valuation+model&start=182
200
24
https://api.elsevier.com/content/search/scopus?query=data+mining+housing+a

In [38]:
df_results_list[:300]

Unnamed: 0,dc:title,prism:doi,dc:creator,affilname,affiliation-city,affiliation-country
0,Automated real estate valuation with machine l...,10.1016/j.eswa.2022.119147,Baur K.,Universität Freiburg,Freiburg im Breisgau,Germany
1,Predicting Carpark Prices Indices in Hong Kong...,10.32604/cmes.2022.020930,Li R.Y.M.,Hong Kong Shue Yan University,Hong Kong,China
2,House price prediction using hedonic pricing m...,10.1002/cpe.7342,Zaki J.,Faculty of Engineering,Mansoura,Egypt
3,Gridded land use data for the conterminous Uni...,10.1038/s41597-022-01591-0,Mc Shane C.,University of Colorado Boulder,Boulder,United States
4,DOES MACHINE LEARNING PREDICTION DAMPEN THE IN...,10.3846/ijspm.2022.17590,Jung J.,Hanyang University ERICA Campus,Ansan,South Korea
...,...,...,...,...,...,...
242,A new approach to spatial analysis in CAMA,10.1108/02637470510631474,González M.A.S.,Tsinghua University,Beijing,China
243,From concept towards reality: Developing the a...,10.1144/1470-9236/04-072,Culshaw M.G.,Saint Petersburg Mining University,Saint Petersburg,Russian Federation
244,Lees' Loss Prevention in the Process Industrie...,10.1016/B978-0-7506-7555-0.X5081-6,Mannan M.S.,Uniwersytet Warminsko-Mazurski w Olsztynie,Olsztyn WM,Poland
245,Quality function deployment: A literature review,10.1016/S0377-2217(02)00178-9,Chan L.K.,Gebze Teknik Üniversitesi,Kocaeli,Turkey


In [39]:
df_results_list["prism:doi"]

0              10.1016/j.eswa.2022.119147
1               10.32604/cmes.2022.020930
2                        10.1002/cpe.7342
3              10.1038/s41597-022-01591-0
4                10.3846/ijspm.2022.17590
                      ...                
242             10.1108/02637470510631474
243              10.1144/1470-9236/04-072
244    10.1016/B978-0-7506-7555-0.X5081-6
245         10.1016/S0377-2217(02)00178-9
246               10.1111/1540-6229.00048
Name: prism:doi, Length: 247, dtype: object

# RECOLHA DE DADOS - Parte 2
Recolha do contéudo dos artigos selecionados com a API Scopus

NOTA: Como facilmente se verifica na [página](https://dev.elsevier.com/api_docs.html) os artigos completos 
estão disponíveis no serviço «ScienceDirect APIs», especificamente na API «Article Retrieval» (documentação [aqui](https://dev.elsevier.com/documentation/ArticleRetrievalAPI.wadl) )

## Recolha de artigos completos
Utilizando o identificador DOI

### Análise exploratória

In [40]:
dois = df_results_list['prism:doi']
dois[:3]

0    10.1016/j.eswa.2022.119147
1     10.32604/cmes.2022.020930
2              10.1002/cpe.7342
Name: prism:doi, dtype: object

In [120]:
response_article = requests.get("https://api.elsevier.com/content/article/doi/"+dois[2],
                    headers={ 'Accept':'application/json',
                             'X-ELS-APIKey': MY_API_KEY},
                         
                    params={   }

                        )
print(response_article.url)
print(response_article.status_code)

https://api.elsevier.com/content/article/doi/10.1016/j.jobe.2021.102636
200


In [114]:
#jprint(response_article.json())

In [116]:
type(dois)

pandas.core.series.Series

### Implementação global

In [117]:
# - a definir -


## Recolha de resumos (dos artigos) 

Recolha do contéudo dos resumos dos artigos selecionados com a API Scopus (usando o identificador DOI guardado anteriormente)

NOTA: Como facilmente se verifica na [página](https://dev.elsevier.com/api_docs.html) os resumos podem ser obtidos de duas formas:
* Através do serviço «ScienceDirect APIs» e especificamente da API «Article Retrieval» (documentação [aqui](https://dev.elsevier.com/documentation/ArticleRetrievalAPI.wadl) ) descrita anteriormente
* Através do serviço «Scopus APIs» (usado anteriormente para fazer a pesquisa) e especificamente da API «Abstract Retrieval» (documentação [aqui](https://dev.elsevier.com/documentation/AbstractRetrievalAPI.wadl))

### Análise Exploratória

In [163]:
dois = df_results_list['prism:doi']

In [138]:
response_abst = requests.get("https://api.elsevier.com/content/abstract/doi/"+dois[114],
                    headers={#'Accept': 'application/json',
                    'Accept': 'application/json',
                    'X-ELS-APIKey': MY_API_KEY},
                    
                    params={   }
                )
print(response_abst.url)
print(response_abst.status_code)   

https://api.elsevier.com/content/abstract/doi/10.1002/9781118280249
200


In [139]:
results_abstr = response_abst.json()
results_abstr

{'abstracts-retrieval-response': {'item': {'ait:process-info': {'ait:status': {'@state': 'update',
     '@type': 'core',
     '@stage': 'S300'},
    'ait:date-delivered': {'@day': '06',
     '@year': '2020',
     '@timestamp': '2020-02-06T06:35:05.000005-05:00',
     '@month': '02'},
    'ait:date-sort': {'@day': '27', '@year': '2017', '@month': '11'}},
   'bibrecord': {'head': {'citation-title': 'From the preface to the first edition',
     'abstracts': None,
     'citation-info': {'citation-type': {'@code': 'ed'},
      'citation-language': {'@language': 'English', '@xml:lang': 'eng'}},
     'source': {'sourcetitle-abbrev': 'Vet. Epidemiol.: Fourth Ed.',
      'website': {'ce:e-address': {'$': 'http://onlinelibrary.wiley.com/book/10.1002/9781118280249',
        '@type': 'email'}},
      '@country': 'usa',
      'translated-sourcetitle': {'@xml:lang': 'eng'},
      'volisspag': {'pagerange': {'@first': 'xix'}},
      '@type': 'b',
      'publicationyear': {'@first': '2017'},
      'is

In [140]:
jprint(response_abst.json())

{
      "abstracts-retrieval-response": {
            "authkeywords": null,
            "authors": null,
            "coredata": {
                  "citedby-count": "0",
                  "dc:identifier": "SCOPUS_ID:85052962587",
                  "dc:publisher": "Wiley Blackwell",
                  "dc:title": "From the preface to the first edition",
                  "eid": "2-s2.0-85052962587",
                  "link": [
                        {
                              "@_fa": "true",
                              "@href": "https://api.elsevier.com/content/abstract/scopus_id/85052962587",
                              "@rel": "self"
                        },
                        {
                              "@_fa": "true",
                              "@href": "https://www.scopus.com/inward/record.uri?partnerID=HzOxMe3b&scp=85052962587&origin=inward",
                              "@rel": "scopus"
                        },
                        {
                

In [135]:
resumo = results_abstr["abstracts-retrieval-response"][ "coredata"]['dc:description']
resumo

'© 2021, Emerald Publishing Limited.Purpose: Real estate appraisals are becoming an increasingly important means of backing up financial operations based on the values of these kinds of assets. However, in very large databases, there is a reduction in the predictive capacity when traditional methods, such as multiple linear regression (MLR), are used. This paper aims to determine whether in these cases the application of data mining algorithms can achieve superior statistical results. First, real estate appraisal databases from five towns and cities in the State of Paraná, Brazil, were obtained from Caixa Econômica Federal bank. Design/methodology/approach: After initial validations, additional databases were generated with both real, transformed and nominal values, in clean and raw data. Each was assisted by the application of a wide range of data mining algorithms (multilayer perceptron, support vector regression, K-star, M5Rules and random forest), either isolated or combined (regre

### Implementação Global

In [41]:
dois = df_results_list['prism:doi']
dois

0              10.1016/j.eswa.2022.119147
1               10.32604/cmes.2022.020930
2                        10.1002/cpe.7342
3              10.1038/s41597-022-01591-0
4                10.3846/ijspm.2022.17590
                      ...                
242             10.1108/02637470510631474
243              10.1144/1470-9236/04-072
244    10.1016/B978-0-7506-7555-0.X5081-6
245         10.1016/S0377-2217(02)00178-9
246               10.1111/1540-6229.00048
Name: prism:doi, Length: 247, dtype: object

In [191]:
abstract_list = []

i=0
for x in dois :
    print(x)
    response_abst = requests.get("https://api.elsevier.com/content/abstract/doi/"+str(x),
                    headers={
                    'Accept': 'application/json',
                    'X-ELS-APIKey': MY_API_KEY},
                    
                    params={   }
                )
    
    print(response_abst.url)
    print(response_abst.status_code)
    print(i)
    i=i+1

    if(response_abst.status_code == 200):
        results_abst = response_abst.json()
        if 'dc:description' in results_abst["abstracts-retrieval-response"][ "coredata"]:
            abstract_list.append( results_abst["abstracts-retrieval-response"][ "coredata"]['dc:description'] )
        else:
            abstract_list.append("NULL")
    else:
        abstract_list.append("NULL")




10.3390/su132313088
https://api.elsevier.com/content/abstract/doi/10.3390/su132313088
200
0
here
10.1186/s40537-021-00476-0
https://api.elsevier.com/content/abstract/doi/10.1186/s40537-021-00476-0
200
1
here
10.1016/j.jobe.2021.102636
https://api.elsevier.com/content/abstract/doi/10.1016/j.jobe.2021.102636
200
2
here
10.1108/IJHMA-07-2020-0080
https://api.elsevier.com/content/abstract/doi/10.1108/IJHMA-07-2020-0080
200
3
here
10.3390/geosciences11110480
https://api.elsevier.com/content/abstract/doi/10.3390/geosciences11110480
200
4
here
10.1016/j.landurbplan.2021.104217
https://api.elsevier.com/content/abstract/doi/10.1016/j.landurbplan.2021.104217
200
5
here
10.1016/j.jclepro.2021.128824
https://api.elsevier.com/content/abstract/doi/10.1016/j.jclepro.2021.128824
200
6
here
10.3390/su132011450
https://api.elsevier.com/content/abstract/doi/10.3390/su132011450
200
7
here
10.3390/su132011259
https://api.elsevier.com/content/abstract/doi/10.3390/su132011259
200
8
here
10.1007/s12599-021-00

In [199]:
abstract_list[0]

'© 2021 by the authors. Licensee MDPI, Basel, Switzerland.The accurate estimation of real estate value helps the development of real estate policies that can respond to the complexities and instability of the real estate market. Previously, statistical methods were used to estimate real estate value, but machine learning methods have gained popularity because their predictions are more accurate. In contrast to existing studies that use various machine learning methods to estimate the transactions or list prices of real estate properties without separating the building and land prices, this study estimates land price using a large amount of land-use information obtained from various land-and building-related datasets. The random forest and XGBoost methods were used to estimate 52,900 land prices in Seoul, South Korea, from January 2017 to December 2020. The models were also separately trained for different land uses and different time periods. Overall, the results revealed that XGBoost 

In [194]:
type(abstract_list[0])

str

# GUARDAR DADOS RECOLHIDOS E PROCESSADOS

In [201]:
df_results_list.to_csv(path_dados+'search_results.csv',  index=False, encoding = "utf-8")

pd.DataFrame(abstract_list,  columns =['Abstract']).to_csv(path_dados+'search_results_abstracts.csv', index=False, encoding = "utf-8")

In [None]:
# Save the file
#pickle.dump( df_results_list, file = open(path_dados+"ICD2122_TextMining_DataStructures.pickle", "wb"))

# Reload the file
#test_grouped_df_reloaded = pickle.load(open(path_dados+"ICD2122_TextMining_DataStructures.pickle", "rb"))