In [11]:
# Importação de bibliotecas necessárias
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium import webdriver
from bs4 import BeautifulSoup
from lxml import html
import requests
import time
import re

def search_google_scholar(keyword):
    url = f"https://scholar.google.com/scholar?q={keyword}"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    results = soup.find('div', {'id': 'gs_ab_md'})
    if results:
        return results.text.split()[1]
    else:
        return "Data not available"

def search_pubmed(keyword):
    url = f"https://pubmed.ncbi.nlm.nih.gov/?term={keyword}"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    results = soup.find('div', {'class': 'results-amount'})
    if results:
        return results.text.strip().split()[0]
    else:
        return "Data not available"

def search_science_direct_selenium(keyword):
    options = webdriver.ChromeOptions()
    options.add_argument("--headless")
    driver = webdriver.Chrome(options=options)
    
    url = f"https://www.sciencedirect.com/search?qs={keyword}&articleTypes=REV%2CFLA&lastSelectedFacet=articleTypes"
    driver.get(url)
    
    try:
        element = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "span.search-body-results-text"))
        )
        text = element.text
        results = int(re.search(r'\d+', text).group())
        return str(results)
    except Exception as e:
        return f"Exception encountered: {e}"
    finally:
        driver.quit()


    
if __name__ == "__main__":
    keywords = ["DoWhy", "CausalML"]

    for keyword in keywords:
        print(f"Fetching data for {keyword}...")
        
        # Google Scholar
        gs_count = search_google_scholar(keyword)
        print(f"Number of articles mentioning {keyword} in Google Scholar: {gs_count:5}")

        # PubMed
        pm_count = search_pubmed(keyword)
        print(f"Number of articles mentioning {keyword} in         PubMed: {pm_count:5}")

        # Science Direct
        sd_count = search_science_direct(keyword)
        print(f"Number of articles mentioning {keyword} in Science Direct: {sd_count:5}")

        print("-" * 75)
        
        # Adding sleep to avoid rate-limiting
        time.sleep(5)

Fetching data for DoWhy...
Number of articles mentioning DoWhy in Google Scholar: 633  
Number of articles mentioning DoWhy in         PubMed: 14   
Number of articles mentioning DoWhy in Science Direct: Failed to retrieve data, HTTP Status Code: 403
---------------------------------------------------------------------------
Fetching data for CausalML...
Number of articles mentioning CausalML in Google Scholar: 504  
Number of articles mentioning CausalML in         PubMed: 10,630,105
Number of articles mentioning CausalML in Science Direct: Failed to retrieve data, HTTP Status Code: 403
---------------------------------------------------------------------------


In [8]:
url = f"https://www.sciencedirect.com/search?qs={keyword}&articleTypes=REV%2CFLA&lastSelectedFacet=articleTypes"
url

'https://www.sciencedirect.com/search?qs=CausalML&articleTypes=REV%2CFLA&lastSelectedFacet=articleTypes'

In [20]:
import requests
import re
import json
import time

def search_science_direct_api(keyword, api_key):
    """
    Searches the ScienceDirect API for articles related to a specific keyword while adhering to API limits.
    
    Parameters:
        keyword (str): The search keyword.
        api_key (str): Your ScienceDirect API key.
        
    Returns:
        int: The number of articles that match the query or an error message.
    """
    
    base_url = "https://api.elsevier.com/content/search/scidir"
    headers = {
        "X-ELS-APIKey": api_key
    }
    params = {
        "query": keyword,
        "count": 200  # Max number of results allowed per request as per API specs
    }
    
    try:
        # Respect the API rate limit (2 requests per second for ScienceDirect Search v2)
        time.sleep(0.5)
        
        # Make the API request
        response = requests.get(base_url, headers=headers, params=params)
        
        # Validate the response
        if response.status_code == 200:
            # Parse the JSON response
            data = json.loads(response.text)
            
            # Retrieve the total number of search results
            total_results = int(data['search-results']['opensearch:totalResults'])
            
            return total_results
        
        else:
            return f"Failed to retrieve data, HTTP Status Code: {response.status_code}"
    
    except Exception as e:
        return f"An error occurred: {e}"

# Example usage
api_key = "57f20882ed177ae828ae2c1c6d1db58f"
keyword = "DoWhy"
results = search_science_direct_api(keyword, api_key)
print(f"Number of articles related to '{keyword}' in ScienceDirect: {results}")


Number of articles related to 'DoWhy' in ScienceDirect: Failed to retrieve data, HTTP Status Code: 410


In [22]:
# !pip install elsapy

In [27]:
from elsapy.elsclient import ElsClient
from elsapy.elssearch import ElsSearch
import json

def search_science_direct_elsapy(api_key, keyword, article_type=['review', 'research-article']):
    """Search the number of articles of a specific type containing the keyword in ScienceDirect using elsapy.
    
    Parameters:
    - api_key (str): The API key for accessing Elsevier APIs.
    - keyword (str): The search keyword.
    - article_type (list): List of types of articles to be considered, default is ['review', 'research-article'].
    
    Returns:
    int: Number of articles found.
    """

    # Initialize the client
    client = ElsClient(api_key)
    
    # Initialize search object and execute search, specifying the index to search the query in ('scidir' for ScienceDirect)
    search_obj = ElsSearch(f"{keyword} AND ({' OR '.join(['DOCTYPE('+x+')' for x in article_type])})",'scidir')
    search_obj.execute(client, get_all = False)
    
    # Extract and return the number of results
    return search_obj.resultsFound

# Example usage
api_key = '57f20882ed177ae828ae2c1c6d1db58f'
keyword = "DoWhy"
result_count = search_science_direct_elsapy(api_key, keyword)
print(f"Number of articles related to '{keyword}' in ScienceDirect: {result_count}")


HTTPError: HTTP 410 Error from https://api.elsevier.com/content/search/scidir?query=DoWhy+AND+%28DOCTYPE%28review%29+OR+DOCTYPE%28research-article%29%29
and using headers {'X-ELS-APIKey': '57f20882ed177ae828ae2c1c6d1db58f', 'User-Agent': 'elsapy-v0.5.1', 'Accept': 'application/json'}:


In [28]:
myCl = ElsClient('[57f20882ed177ae828ae2c1c6d1db58f]')
myAuth.read(myCl)
myAuth.read()

NameError: name 'myAuth' is not defined