In [1]:
import requests
import json

In [3]:
def get_abstracts(query, size=30):
    url = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
    params = {
        "query": query,
        "resultType": "core",
        "pageSize": 100,
        "format": "json",
    }

    r = requests.get(url, params=params)
    data = r.json()

    abstracts = []
    for result in data.get("resultList", {}).get("result", []):
        # Only keep papers with an actual abstract
        if "abstractText" in result and result["abstractText"].strip():
            abstracts.append({
                "pmid": result.get("pmid", ""),
                "doi": result.get("doi", ""),
                "title": result.get("title", ""),
                "abstract": result.get("abstractText").strip(),
                "journal": result.get("journalTitle", ""),
                "year": result.get("pubYear", "")
            })

    return abstracts

In [5]:
query = '"Fabry Disease" AND HAS_ABSTRACT:Y'
fabry_abstracts = get_abstracts(query, size=100)
print(f"Total abstracts found: {len(fabry_abstracts)}")

Total abstracts found: 100


In [15]:
print(json.dumps(fabry_abstracts[23], indent=2)) #verification

{
  "pmid": "39864340",
  "doi": "10.1016/j.scr.2025.103666",
  "title": "Generation of an induced pluripotent stem cell line (SMBCi022-A) from a patient with Fabry disease.",
  "abstract": "Fabry disease (FD) is a systemic disease in which globotriaosylceramide and other naturally occurring glycosphingolipid accumulate in various tissues throughout the body due to mutation of \u03b1-galactosidase A (GLA). These induced pluripotent stem cells (iPSCs) were generated from a 10-year-old male patient's urine carrying the GLA c.1080_1082del Fabry disease mutation. The iPSCs were validated by confirming the pluripotent markers expression, trilineage differentiation capability, normal karyotype and targeted mutation. This resource enables further assessment of the pathophysiological development of Fabry disease and serves as a model to develop drugs for treating Fabry disease.",
  "journal": "",
  "year": "2025"
}


In [13]:
with open("C:\\AISSMS-IOIT-Practicals\\MLOps\\data\\fabry_disease.json", "w", encoding="utf-8") as f:
    json.dump(fabry_abstracts, f, indent=2, ensure_ascii=False)