In [None]:
import pandas as pd
from Bio import Entrez  # なければ追加install
import openpyxl  # なければ追加install
import time

In [None]:
df = pd.DataFrame([[
    'Respiratory V25',
    'AtopicDermatitis',
    'BodyComposition Status',
    'Body Water Mass',
    'Creatinine',
    'DailyLife Feelings Enjoyable',
    'Eating Habit Butter',
    'Exercise Habit Mountain Hiking',
    'baPWV',
    'Health Status Doctor Dementia',
    'Health Status Doctor HTN',
    'Health Status Doctor Pancreas',
    'Heart Disease',
    'Hypertension',
    'Right-Left Arm LeanMass',
    'Right-Left Leg R_50kHz impedance',
    'Weed Allergy I'
],[
    "Obesity",
    "COPD",
    "COPD & Obesity",
    "Hypertension",
    "CKD",
    "Obesity",
    "Diabetes & Obesity",
    "Hypertension & Diabetes & Obesity",
    "Arteriosclerosis",
    "COPD & Obesity",
    "Hypertension",
    "Diabetes",
    "Diabetes",
    "Hypertension",
    "Hypertension",
    "Obesity",
    "COPD & Obesity"
]], index = ["check_item", "disease"]).T
df

Unnamed: 0,check_item,disease
0,Respiratory V25,Obesity
1,AtopicDermatitis,COPD
2,BodyComposition Status,COPD & Obesity
3,Body Water Mass,Hypertension
4,Creatinine,CKD
5,DailyLife Feelings Enjoyable,Obesity
6,Eating Habit Butter,Diabetes & Obesity
7,Exercise Habit Mountain Hiking,Hypertension & Diabetes & Obesity
8,baPWV,Arteriosclerosis
9,Health Status Doctor Dementia,COPD & Obesity


上記の1対1ペアを検索するのが以下。emailの部分は変更必要（というか最初にアカウント登録しないといけない。）です。

In [None]:
import time
import pandas as pd
from Bio import Entrez

# ── 設定 ──
Entrez.email = "YOUR_MAIL_ADDRESS"
#Entrez.api_key = "YOUR_NCBI_API_KEY"
MAX_PER_QUERY = 5
DELAY = 0.34
YEAR_RANGE = ("2015", "2024")
RELATIONS = ["association", "risk", "correlation"]

RELATIONS = ["association", "stratification"]
MAX_PER_QUERY = 5
DELAY = 0.34

def make_query(item: str, disease: str, relation: str) -> str:
    group = lambda term: f'("{term}"[TIAB] OR "{term}"[MeSH])'
    return (
        f"{group(item)} AND {group(disease)} AND {relation}[TIAB] "
        f'AND ("2015"[PDAT] : "2024"[PDAT]) AND Humans[Mesh]'
    )

def search_and_fetch(query: str) -> list[dict]:
    handle = Entrez.esearch(db="pubmed", term=query, retmax=MAX_PER_QUERY)
    ids = Entrez.read(handle)["IdList"]
    handle.close()
    if not ids:
        return []
    records = Entrez.read(
        Entrez.efetch(db="pubmed", id=",".join(ids), rettype="medline", retmode="xml")
    )["PubmedArticle"]

    results = []
    for rec in records:
        pmid = rec["MedlineCitation"]["PMID"]
        art = rec["MedlineCitation"]["Article"]
        results.append({
            "PMID": pmid,
            "Link": f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/",
            "Title": art.get("ArticleTitle", ""),
            "Journal": art.get("Journal", {}).get("Title", ""),
            "PubDate": art.get("Journal", {}).get("JournalIssue", {}).get("PubDate", {}).get("Year", ""),
            "Abstract": " ".join(art.get("Abstract", {}).get("AbstractText", []))
        })
    return results

def batch_pubmed_search(df: pd.DataFrame) -> pd.DataFrame:
    rows = []
    for _, r in df.iterrows():
        for rel in RELATIONS:
            q = make_query(r.check_item, r.disease, rel)
            for paper in search_and_fetch(q):
                rows.append({
                    "check_item": r.check_item,
                    "disease": r.disease,
                    "relation": rel,
                    "query": q,
                    **paper
                })
            time.sleep(DELAY)
    return pd.DataFrame(rows)

In [10]:
df_results = batch_pubmed_search(df)
df_results

Unnamed: 0,check_item,disease,relation,query,PMID,Link,Title,Journal,PubDate,Abstract
0,Body Water Mass,Hypertension,association,"(""Body Water Mass""[TIAB] OR ""Body Water Mass""[...",38085646,https://pubmed.ncbi.nlm.nih.gov/38085646/,Association of modifiable risk factors with ob...,Aging,2023,The risk factors involved in obstructive sleep...
1,Creatinine,CKD,association,"(""Creatinine""[TIAB] OR ""Creatinine""[MeSH]) AND...",39977186,https://pubmed.ncbi.nlm.nih.gov/39977186/,[Not Available].,Salud publica de Mexico,2024,To estimate survival outcomes in a population ...
2,Creatinine,CKD,association,"(""Creatinine""[TIAB] OR ""Creatinine""[MeSH]) AND...",39831490,https://pubmed.ncbi.nlm.nih.gov/39831490/,Correlation between Serum Biomarkers and Disea...,"British journal of hospital medicine (London, ...",2024,<b>Aims/Background</b> The present study aimed...
3,Creatinine,CKD,association,"(""Creatinine""[TIAB] OR ""Creatinine""[MeSH]) AND...",39743167,https://pubmed.ncbi.nlm.nih.gov/39743167/,Association of Serum Afamin Concentrations Wit...,American journal of kidney diseases : the offi...,2025,Afamin is a vitamin E-binding glycoprotein pri...
4,Creatinine,CKD,association,"(""Creatinine""[TIAB] OR ""Creatinine""[MeSH]) AND...",39740300,https://pubmed.ncbi.nlm.nih.gov/39740300/,Association of creatinine-to-cystatin C ratio ...,"Clinical nutrition (Edinburgh, Scotland)",2025,Skeletal muscle (SM) health has significant pr...
5,Creatinine,CKD,association,"(""Creatinine""[TIAB] OR ""Creatinine""[MeSH]) AND...",39698033,https://pubmed.ncbi.nlm.nih.gov/39698033/,The association of RBP4 with chronic kidney di...,Frontiers in endocrinology,2024,"Retinol binding protein 4 (RBP4), as a novel a..."
6,Creatinine,CKD,stratification,"(""Creatinine""[TIAB] OR ""Creatinine""[MeSH]) AND...",39631491,https://pubmed.ncbi.nlm.nih.gov/39631491/,"Risk of Bleeding, Thrombosis and Death among A...",The American journal of cardiology,2025,There are limited data about the clinical bene...
7,Creatinine,CKD,stratification,"(""Creatinine""[TIAB] OR ""Creatinine""[MeSH]) AND...",39497801,https://pubmed.ncbi.nlm.nih.gov/39497801/,U-shaped association between dietary niacin in...,Frontiers in endocrinology,2024,"In addition to hypertension or diabetes, elder..."
8,Creatinine,CKD,stratification,"(""Creatinine""[TIAB] OR ""Creatinine""[MeSH]) AND...",39382257,https://pubmed.ncbi.nlm.nih.gov/39382257/,Albuminuria-based stratification of end-stage ...,Pharmacotherapy,2024,Clinical trials have shown the kidney-protecti...
9,Creatinine,CKD,stratification,"(""Creatinine""[TIAB] OR ""Creatinine""[MeSH]) AND...",39369692,https://pubmed.ncbi.nlm.nih.gov/39369692/,The Impact of Primary Renal Diagnosis on Progn...,American journal of nephrology,2025,"The definition of CKD is broad, which neglects..."


In [None]:
df_results.to_excel('./pubmed_search.xlsx')