In [1]:
import requests
import pandas as pd

In [2]:
# Configurar la URL base de OpenAlex
BASE_URL = "https://api.openalex.org/works"

# Función para buscar papers
def search_papers(query, limit=50):
    params = {
        "filter": "abstract.search:landslides,publication_year:2020-2025",
        "sort": "citation_count:desc",
        "per_page": 50
    }

    response = requests.get(BASE_URL, params=params)

    if response.status_code == 200:
        return response.json()["results"]
    else:
        print(f"Error {response.status_code}: {response.text}")
        return []

# Buscar papers sobre el tema "machine learning and wildfires"
query = "machine learning and lanslides"
papers = search_papers(query, limit=50)

# Procesar los resultados
if papers:
    data = []
    for paper in papers:
        data.append({
            "Title": paper.get("title"),
            "Authors": ", ".join([author["author"]["display_name"] for author in paper.get("authorships", [])]),
            "Year": paper.get("publication_year"),
            "DOI": paper.get("doi"),
            "URL": paper.get("id"),
            "Abstract": paper.get("abstract_inverted_index"),
        })

    # Crear un DataFrame y guardar los datos en un archivo CSV
    df = pd.DataFrame(data)
    df.to_csv("openalex_papers.csv", index=False)
    print("Base de datos generada exitosamente.")
else:
    print("No se encontraron resultados.")

Error 403: {"error":"Invalid query parameters error.","message":"citation_count is not a valid field. Valid fields are underscore or hyphenated versions of: abstract.search, abstract.search.no_stem, apc_list.currency, apc_list.provenance, apc_list.value, apc_list.value_usd, apc_paid.currency, apc_paid.provenance, apc_paid.value, apc_paid.value_usd, author.id, author.orcid, authors_count, authorships.affiliations.institution_ids, authorships.author.id, authorships.author.orcid, authorships.countries, authorships.institutions.continent, authorships.institutions.country_code, authorships.institutions.id, authorships.institutions.is_global_south, authorships.institutions.lineage, authorships.institutions.ror, authorships.institutions.type, authorships.is_corresponding, best_oa_location.is_accepted, best_oa_location.is_oa, best_oa_location.is_published, best_oa_location.landing_page_url, best_oa_location.license, best_oa_location.license_id, best_oa_location.source.host_organization, best_o

In [3]:
import requests
import pandas as pd

# Configurar la URL base de OpenAlex
BASE_URL = "https://api.openalex.org/works"

# Función para reconstruir abstracts desde el índice invertido
def reconstruct_abstract(inverted_index):
    if not inverted_index:
        return ""
    abstract = [""] * max([max(pos_list) for pos_list in inverted_index.values()])
    for word, positions in inverted_index.items():
        for pos in positions:
            abstract[pos] = word
    return " ".join(abstract)

# Función para buscar papers
def search_papers(query, limit=50):
    params = {
        "filter": f"abstract.search:{query},publication_year:2020-2025",
        "sort": "cited_by_count:desc",  # Cambiado "citation_count" por "cited_by_count"
        "per_page": limit
    }

    response = requests.get(BASE_URL, params=params)

    if response.status_code == 200:
        return response.json().get("results", [])
    else:
        print(f"Error {response.status_code}: {response.text}")
        return []

# Buscar papers sobre "machine learning and landslides"
query = "machine learning and landslides"
papers = search_papers(query, limit=50)

# Procesar los resultados
if papers:
    data = []
    for paper in papers:
        data.append({
            "Title": paper.get("title"),
            "Authors": ", ".join([author["author"]["display_name"] for author in paper.get("authorships", []) if author.get("author")]),
            "Year": paper.get("publication_year"),
            "DOI": paper.get("doi"),
            "URL": paper.get("id"),
            "Abstract": reconstruct_abstract(paper.get("abstract_inverted_index"))
        })

    # Crear un DataFrame y guardar los datos en un archivo CSV
    df = pd.DataFrame(data)
    df.to_csv("openalex_papers.csv", index=False)
    print("Base de datos generada exitosamente.")
else:
    print("No se encontraron resultados.")


IndexError: list assignment index out of range

In [4]:
import requests
import pandas as pd

# Configurar la URL base de OpenAlex
BASE_URL = "https://api.openalex.org/works"

# Función para reconstruir abstracts desde el índice invertido
def reconstruct_abstract(inverted_index):
    if not inverted_index:
        return ""  # Si el abstract está vacío, devolver una cadena vacía
    
    positions = []
    for word, pos_list in inverted_index.items():
        positions.extend(pos_list)
    
    if not positions:
        return ""  # Si no hay posiciones válidas, devolver una cadena vacía

    max_pos = max(positions)
    
    # Si max_pos es menor que 0, significa que no hay valores válidos
    if max_pos < 0:
        return ""

    abstract = [""] * (max_pos + 1)  # Crear una lista con el tamaño correcto
    
    for word, pos_list in inverted_index.items():
        for pos in pos_list:
            if 0 <= pos < len(abstract):  # Evitar IndexError
                abstract[pos] = word

    return " ".join(abstract).strip()

# Función para buscar papers
def search_papers(query, limit=50):
    params = {
        "filter": f"abstract.search:{query},publication_year:2020-2025",
        "sort": "cited_by_count:desc",  # Cambiado "citation_count" por "cited_by_count"
        "per_page": limit
    }

    response = requests.get(BASE_URL, params=params)

    if response.status_code == 200:
        return response.json().get("results", [])
    else:
        print(f"Error {response.status_code}: {response.text}")
        return []

# Buscar papers sobre "machine learning and landslides"
query = "machine learning and landslides"
papers = search_papers(query, limit=50)

# Procesar los resultados
if papers:
    data = []
    for paper in papers:
        data.append({
            "Title": paper.get("title"),
            "Authors": ", ".join([author["author"]["display_name"] for author in paper.get("authorships", []) if author.get("author")]),
            "Year": paper.get("publication_year"),
            "DOI": paper.get("doi"),
            "URL": paper.get("id"),
            "Abstract": reconstruct_abstract(paper.get("abstract_inverted_index"))
        })

    # Crear un DataFrame y guardar los datos en un archivo CSV
    df = pd.DataFrame(data)
    df.to_csv("openalex_papers.csv", index=False)
    print("Base de datos generada exitosamente.")
else:
    print("No se encontraron resultados.")


Base de datos generada exitosamente.
