<a href="https://colab.research.google.com/github/jorgechavez6816/Caseware_web/blob/master/Literature_Review_using_PyAlex_V3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Descarga de 1000 papers usando PyAlex**
Elaborado por: Santiago Zevallos

In [None]:
# 👉 Edita solo esta línea con tus palabras clave de búsqueda:
PALABRAS_CLAVE = "artificial intelligence higher education"

In [None]:
# Paso 1: Instalar PyAlex
!pip install pyalex


Collecting pyalex
  Downloading pyalex-0.18-py3-none-any.whl.metadata (14 kB)
Downloading pyalex-0.18-py3-none-any.whl (13 kB)
Installing collected packages: pyalex
Successfully installed pyalex-0.18


In [None]:
# Paso 2: Importar librerías y configurar el polite pool
import pyalex
from pyalex import Works
import pandas as pd
from itertools import chain
from tqdm import tqdm

# Reemplaza esto con tu correo real
pyalex.config.email = "santiago.zevallosq@gmail.com"


In [None]:
# Paso 3: Ejecutar búsqueda con paginación (hasta 1000 resultados)
MAX_RESULTADOS = 1000  # puedes ajustar este valor según necesidad

query = Works().search(PALABRAS_CLAVE)
results_raw = list(chain(*query.paginate(per_page=200, n_max=MAX_RESULTADOS)))

print(f"Se encontraron {len(results_raw)} resultados (máximo configurado: {MAX_RESULTADOS})")


Se encontraron 1000 resultados (máximo configurado: 1000)


In [None]:
# Paso 4: Procesar resultados y extraer campos relevantes
rows = []
for r in tqdm(results_raw):
    resumen = r.get("abstract", None)

    # Check if 'primary_location' and 'source' keys exist before accessing them
    revista = ""
    if r.get("primary_location") and r.get("primary_location").get("source"):
        revista = r.get("primary_location", {}).get("source", {}).get("display_name", "")

    autores = "; ".join(
        [a.get("author", {}).get("display_name", "") for a in r.get("authorships", [])]
    )
    temas = "; ".join(
        [c["display_name"] for c in r.get("concepts", [])]
    )
    oa_status = r.get("open_access", {}).get("oa_status", "")
    oa_url = r.get("open_access", {}).get("oa_url", "")

    rows.append({
        "id": r.get("id"),
        "doi": r.get("doi"),
        "título": r.get("display_name"),
        "abstract": resumen,
        "año": r.get("publication_year"),
        "revista": revista,
        "autores": autores,
        "temas": temas,
        "citaciones": r.get("cited_by_count"),
        "open_access_status": oa_status,
        "open_access_url": oa_url
    })

df = pd.DataFrame(rows)
df.head()

100%|██████████| 1000/1000 [00:00<00:00, 76280.88it/s]


Unnamed: 0,id,doi,título,abstract,año,revista,autores,temas,citaciones,open_access_status,open_access_url
0,https://openalex.org/W2981863007,https://doi.org/10.1186/s41239-019-0171-0,Systematic review of research on artificial in...,,2019,International Journal of Educational Technolog...,Olaf Zawacki‐Richter; Victoria I. Marín; Melis...,Personalization; Profiling (computer programmi...,2856,diamond,https://educationaltechnologyjournal.springero...
1,https://openalex.org/W2770717476,https://doi.org/10.1186/s41039-017-0062-8,Exploring the impact of artificial intelligenc...,,2017,Research and Practice in Technology Enhanced L...,Ştefan Popenici; Sharon Kerr,Higher education; Educational technology; Orde...,1266,diamond,https://telrp.springeropen.com/track/pdf/10.11...
2,https://openalex.org/W4366783381,https://doi.org/10.1186/s41239-023-00392-8,Artificial intelligence in higher education: t...,,2023,International Journal of Educational Technolog...,Helen Crompton; Diane Burke,Vocabulary; Reading (process); Higher educatio...,691,diamond,https://educationaltechnologyjournal.springero...
3,https://openalex.org/W3035700103,https://doi.org/10.1186/s41239-020-00218-x,Can artificial intelligence transform higher e...,,2020,International Journal of Educational Technolog...,Tony Bates; Cristóbal Cobo; Olga Mariño; Steve...,Higher education; Artificial intelligence; Com...,342,diamond,https://educationaltechnologyjournal.springero...
4,https://openalex.org/W4313479734,https://doi.org/10.1007/s12195-022-00754-8,"A Conversation on Artificial Intelligence, Cha...",,2023,Cellular and Molecular Bioengineering,Michael R. King,Conversation; Chatbot; Computer science; Data ...,517,bronze,https://link.springer.com/content/pdf/10.1007/...


In [None]:
# Paso 5: Guardar como Excel y descargarlo desde Colab
df.to_excel("literature_review.xlsx", index=False)

from google.colab import files
files.download("literature_review.xlsx")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>