In [24]:
# Import libraries
# ライブラリをインポートします
from datetime import datetime
import json
from lxml import etree
import numpy as np
import openpyxl
from openpyxl.styles import Border, Font, Side, PatternFill, Alignment
from openpyxl.utils.dataframe import dataframe_to_rows
import pandas as pd
import requests
import time


# Print for the iterations:
# 1, 2, 3,..., 9,
# 10, 20, 30,..., 90,
# 100, 200, 300,..., 900,
# ...
def counter_print(i):
    digit = int(np.log10(i))
    if (i / (10 ** digit)).is_integer():
        print(datetime.now().strftime('%Y/%m/%d %H:%M:%S'), ": processing entry #", i)
        

# Get the PubmMed IDs with the search query `term`
# termをPubMedにクエリとして投げ、結果をPubMed IDのリストとして返します
def eSearch(term, retmax=10):
    print("Fetching the list of PubMed IDs...")
    URL = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&retmode=json'
    option = '&retmax='+str(retmax)+'&term='+term
    query = URL + option
    response = requests.get(query)
    response_json = response.json()
    pmids = response_json['esearchresult']['idlist']
    return pmids


# Get summary statistics of the papers listed in `pmids` as pandas.DataFrame
# PubMed IDのリストから、それぞれの論文に関するサマリを取得し、pandas.DataFrameとして返します
def eSummary(pmids):
    print("Fetching the summaries based on the ID list...")
    URL = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&retmode=json&id='
    queries = [URL + pmid for pmid in pmids]
    responses = {}
    for i, query in enumerate(queries, 1):
        counter_print(i)
        response = requests.get(query)
        res_json = response.json()['result']
        responses.update(res_json)
        time.sleep(0.2)
    Summaries = [{'pmid':pmid,
                  'Title':responses[pmid]['title'], 
                  'Author':responses[pmid]['sortfirstauthor'],
                  'Journal_full' : responses[pmid]['fulljournalname'],
                  'Journal_abbr' : responses[pmid]['source'],
                  'Pubdate':responses[pmid]['epubdate']} for pmid in pmids]
    return pd.DataFrame(Summaries)


# Get the abstracts of the papers listed in `pmids` as pandas.DataFrame
# PubMed IDのリストから、それぞれの論文のAbstractを取得し、pandas.DataFrameとして返します
def eFetch(pmids):
    print("Fetching the abstracts based on the ID list...")
    URL = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&retmode=xml&id='
    queries = [URL + pmid for pmid in pmids]
    responses_abst = {}
    for i, query in enumerate(queries, 1):
        counter_print(i)
        response = requests.get(query)
        root = etree.fromstring(response.content)
        pmid = root.find('.//PMID').text #pmidを抽出
        abst = root.findall('.//AbstractText')
        if abst is None:
            abst_text = ''
        else:
            abst_text = ''.join(root.xpath('//Abstract//*/text()'))
        responses_abst[pmid]=abst_text
        time.sleep(0.2)
        abst_df = pd.DataFrame.from_dict(responses_abst, orient='index')
        abst_df.index.name = 'pmid'
        abst_df.columns = ['Abstract']
    return abst_df


# Export as .xlsx
# 結果を.xlsxファイルとして出力します
def to_excel(data, filename):
    wb = openpyxl.Workbook() #ワークブックの作成
    ws = wb.active #ワークブックのアクティブになってるシートを選択
    ws.title='論文' #シートの名前を変更

    #フォントの設定
    normal_font = Font(name = "Century Gothic",sz = 9,b = False)
    header_font = Font(name = "Century Gothic", sz = 9, b = True, color = 'FFFFFFFF')
    #ヘッダーの塗りの設定
    header_fill = PatternFill(patternType = "solid", fgColor = "FF808080")
    #ヘッダーを中央揃えにする設定
    header_center = Alignment(horizontal='center',vertical = 'center')    
    #ヘッダーを太字にする設定
    header_border = Border(
            outline=True,
            left=Side(style='thin', color='FF000000'),
            right=Side(style='thin', color='FF000000'),
            top=Side(style='thin', color='FF000000'),
            bottom=Side(style='thin', color='FF000000')
            )

    for r in dataframe_to_rows(data, index=False, header=True):
        ws.append(r)

    for row in ws:
        for cell in row:
            cell.font = normal_font

    header_cell = ['A1', 'B1', 'C1', 'D1', 'E1']
    for cell in header_cell:
        ws[cell].font = header_font
        ws[cell].fill = header_fill
        ws[cell].alignment = header_center
        ws[cell].border = header_border

    wb.save(filename)
    print("Exported as:", filename)


# Export PubMed ID + Title + Abstract to .txt
# PubMed ID、タイトル、Abstractを結合したものを.txtファイルに出力します
def to_chat_AI(data, filename):
    l_data = data[["pmid", "Title", "Abstract"]].values.tolist()
    l_data = ["pmid:" + l[0] + ", title:" + l[1] + ", abstract:" + l[2] for l in l_data]
    str_data = "\n\n".join(l_data)
    with open(filename, "w", encoding='UTF-8') as f:
        f.write(str_data)
    print("Exported as:", filename)

In [29]:
# Set the query - where "%20" is a space character.
# PubMedに投げるクエリ。「%20」はスペースです。
term = 'deep%20learning'

# Get the PubmMed IDs with the search query `term`
# termをPubMedにクエリとして投げ、結果をPubMed IDのリストとして返します
pmids = eSearch(term)
# Maximum output can be increased by "pmids = eSearch(term, retmax=100)" for example
# デフォルトでは10個までですが、変更する際はpmids = eSearch(term, retmax=100)などを入れます。

# Get summary statistics of the papers listed in `pmids`, as pandas.DataFrame
# PubMed IDのリストから、それぞれの論文に関するサマリを取得し、pandas.DataFrameとして返します
summary_df = eSummary(pmids)

# Get the abstracts of the papers listed in `pmids` as pandas.DataFrame
# PubMed IDのリストから、それぞれの論文のAbstractを取得し、pandas.DataFrameとして返します
abst_df = eFetch(pmids)

# Merge the summary statistics & abstracts
# サマリとAbstractを統合し一つのpandas.DataFrameとします。
df = pd.merge(summary_df, abst_df, on='pmid')

Fetching the list of PubMed IDs...
Fetching the summaries based on the ID list...
2023/02/21 14:55:56 : processing entry # 1
2023/02/21 14:55:57 : processing entry # 2
2023/02/21 14:55:58 : processing entry # 3
2023/02/21 14:55:59 : processing entry # 4
2023/02/21 14:56:00 : processing entry # 5
2023/02/21 14:56:01 : processing entry # 6
2023/02/21 14:56:02 : processing entry # 7
2023/02/21 14:56:03 : processing entry # 8
2023/02/21 14:56:04 : processing entry # 9
2023/02/21 14:56:05 : processing entry # 10
Fetching the abstracts based on the ID list...
2023/02/21 14:56:06 : processing entry # 1
2023/02/21 14:56:07 : processing entry # 2
2023/02/21 14:56:08 : processing entry # 3
2023/02/21 14:56:09 : processing entry # 4
2023/02/21 14:56:10 : processing entry # 5
2023/02/21 14:56:11 : processing entry # 6
2023/02/21 14:56:12 : processing entry # 7
2023/02/21 14:56:13 : processing entry # 8
2023/02/21 14:56:14 : processing entry # 9
2023/02/21 14:56:15 : processing entry # 10


In [30]:
# The result is something like this
# 中身はこのようになります
df

Unnamed: 0,pmid,Title,Author,Journal_full,Journal_abbr,Pubdate,Abstract
0,36800255,A partial convolution generative adversarial n...,Liu Y,Journal of applied clinical medical physics,J Appl Clin Med Phys,2023 Feb 17,Lesion segmentation is critical for clinicians...
1,36800155,Few-shot learning using explainable Siamese tw...,Tummala S,Medical & biological engineering & computing,Med Biol Eng Comput,2023 Feb 17,Automated classification of blood cells from m...
2,36800143,Segmentation of the aorta in systolic phase fr...,Marin-Castrillon DM,"Magma (New York, N.Y.)",MAGMA,2023 Feb 17,"In the management of the aortic aneurysm, 4D f..."
3,36800112,Diagnostic performance of deep learning-based ...,Yang W,La Radiologia medica,Radiol Med,2023 Feb 17,Post-processing and interpretation of coronary...
4,36799660,Using Explainable Artificial Intelligence in t...,Jiménez-Mesa C,International journal of neural systems,Int J Neural Syst,2023 Feb 16,The prevalence of dementia is currently increa...
5,36799418,An innovative metal artifact reduction algorit...,Zhang Z,Current medical imaging,Curr Med Imaging,2023 Feb 17,"During X-ray computed tomography (CT) scans, t..."
6,36799417,Cancer detection based on Medical Image Analys...,Sood T,Current medical imaging,Curr Med Imaging,2023 Feb 17,Cancer is a deadly disease. It is crucial to d...
7,36799341,Implementable Deep Learning for Multi-sequence...,Astley JR,Journal of magnetic resonance imaging : JMRI,J Magn Reson Imaging,2023 Feb 17,"Recently, deep learning via convolutional neur..."
8,36799277,Micromirrors in Neurosurgery: Technical Overvi...,Ordóñez-Rubiano EG,Turkish neurosurgery,Turk Neurosurg,2022 Aug 26,Micromirrors are 45°-angled reflectors able to...
9,36799198,Construction of Exosome SORL1 Detection Platfo...,Li P,"Small (Weinheim an der Bergstrasse, Germany)",Small,2023 Feb 17,Exosomes are promising new biomarkers for colo...


In [31]:
# Export as .xlsx
# 結果を.xlsxファイルとして出力します
to_excel(df, "test3.xlsx")

Exported as: test3.xlsx


In [32]:
# Export PubMed ID + Title + Abstract to .txt
# PubMed ID、タイトル、Abstractを結合したものを.txtファイルに出力します
to_chat_AI(df, "test3.txt")

Exported as: test3.txt
