### 使用ライブラリ

In [30]:
import requests
import xml.etree.ElementTree as ET
import pandas as pd

### 定数一覧
- esearch_url
    - PMIDを取得する際に使用する
- efetch_url
    - 論文の詳細情報を取得する際に使用する

In [38]:
# 以下のurlをベースに、後でdb・termパラメータを設定する
db = "pubmed"
esearch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
efetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
searching_words = """
(New England Journal of Medicine[Journal] OR BMJ[Journal] OR The Lancet[Journal] OR JAMA[Journal] OR Annals of Internal Medicine[Journal] OR Kidney International[Journal] OR Journal of the American Society of Nephrology[Journal] OR American Journal of Kidney Diseases[Journal] OR Clinical Journal of the American Society of Nephrology[Journal] OR Nephrology Dialysis Transplantation[Journal])
AND (glomerular hyperfiltration) AND (cardiovascular disease)
"""
retmax = 100  # 最大10000
retmode = "xml"

columns = ["PMID", "Title", "Journal", "PubYear", "Abstract"]

In [25]:
def make_request_url(base_url: str, params: dict[str, str | int]) -> str:
    base_url += "?"
    for k, v in params.items():
        base_url += f"{k}={v}&"
    # 最後の余分な "&" を除く
    url = base_url[: len(base_url) - 1]
    return url

In [21]:
def fetch_xml(base_url: str, params: dict[str, str | int]) -> ET.Element:
    res = requests.get(make_request_url(base_url, params))
    return ET.fromstring(res.text)

In [31]:
def extract_pmids(base_url: str, params: dict[str, str | int]) -> list[str]:
    root = fetch_xml(base_url, params)
    # pmidのリスト
    pmids = [pmid.text for pmid in res_xml.findall(".//Id")]
    print(f"{len(pmids)}件のPubMedIDを取得")
    return pmids

PMIDのリストを取得

In [29]:
esearch_params = {"db": db, "term": searching_words, "retmax": retmax}
pmids = extract_pmids(esearch_url, esearch_params)

64件のPubMedIDを取得


In [39]:
def gen_evid_tbl(base_url: str, params: dict[str, str | int]) -> pd.DataFrame:
    root = fetch_xml(base_url, params)
    # PMID,論文タイトル,雑誌タイトル,出版年,アブストを辞書のリストとして格納する
    articles = []
    for article in root.findall(".//PubmedArticle"):
        pmid = article.findtext(".//PMID")
        title = article.findtext(".//ArticleTitle")
        jounal = article.findtext(".//Journal/Title")
        pub_year = article.findtext(".//PubDate/Year")
        abstract = "".join([abst.text for abst in article.findall(".//AbstractText")])
        values = [pmid, title, jounal, pub_year, abstract]
        dic = {k: v for k, v in zip(columns, values)}
        articles.append(dic)
    return pd.DataFrame(articles)

In [40]:
pmids_csv = ",".join(pmids)
efetch_params = {"db": db, "id": pmids_csv, "retmode": retmode}
evid_tbl = gen_evid_tbl(efetch_url, efetch_params)
evid_tbl.head(5)

Unnamed: 0,PMID,Title,Journal,PubYear,Abstract
0,38280517,Pediatric kidney dimensions and risk of persis...,Kidney international,2024,"In Mexico, chronic kidney disease of unknown o..."
1,35977806,Sex Differences in Age-Related Loss of Kidney ...,Journal of the American Society of Nephrology ...,2022,"CKD is more prevalent in women, but more men r..."
2,35351818,Brief Early Life Angiotensin-Converting Enzyme...,Journal of the American Society of Nephrology ...,2022,Children born with a solitary functioning kidn...
3,31901353,Do sodium-glucose cotransporter-2 inhibitors a...,Kidney international,2020,Cardiovascular and renal outcome trials demons...
4,30799029,Renal Hemodynamic Function and RAAS Activation...,American journal of kidney diseases : the offi...,2019,The renin-angiotensin-aldosterone system (RAAS...
