# Function form

In [65]:
# https://lifesciencehack-ai.hatenablog.com/entry/2020/08/07/Python%E3%81%A7%E8%AB%96%E6%96%87%E6%83%85%E5%A0%B1%E3%82%92%E3%81%BE%E3%81%A8%E3%82%81%E3%81%A6%E3%82%B2%E3%83%83%E3%83%88%E3%81%99%E3%82%8B%E2%91%A5%EF%BD%9EEFetch%E3%82%92%E4%BD%BF

import json
from lxml import etree
import pandas as pd
import requests
import time

def eSearch(term, retmax=10):
    URL = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&retmode=json'
    option = '&retmax='+str(retmax)+'&term='+term
    query = URL + option
    response = requests.get(query)
    response_json = response.json()
    pmids = response_json['esearchresult']['idlist']
    return pmids

def eSummary(pmids):
    URL = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&retmode=json&id='
    queries = [URL + pmid for pmid in pmids]
    responses = {}
    for query in queries:
        response = requests.get(query)
        res_json = response.json()['result']
        responses.update(res_json)
        time.sleep(0.2)

    Summaries = [{'pmid':pmid,
                  'Title':responses[pmid]['title'], 
                  'Author':responses[pmid]['sortfirstauthor'],
                  'Journal_full' : responses[pmid]['fulljournalname'],
                  'Journal_abbr' : responses[pmid]['source'],
                  'Pubdate':responses[pmid]['epubdate']} for pmid in pmids]
    return pd.DataFrame(Summaries)

def eFetch(pmids):
    URL = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&retmode=xml&id='

    queries = [URL + pmid for pmid in pmids]

    responses_abst = {}

    for query in queries:
        response = requests.get(query)
        root = etree.fromstring(response.content)
        pmid = root.find('.//PMID').text#pmidを抽出
        abst = root.findall('.//AbstractText')
        if abst is None:
            abst_text = ''
        else:
            abst_text = ''.join(root.xpath('//Abstract//*/text()'))
        responses_abst[pmid]=abst_text
        time.sleep(0.2)
        abst_df = pd.DataFrame.from_dict(responses_abst, orient='index')
        abst_df.index.name = 'pmid'
        abst_df.columns = ['Abstract']
    
    return abst_df

def to_excel(data, filename):
    # https://lifesciencehack-ai.hatenablog.com/entry/2019/02/23/Python%E3%81%A7%E8%AB%96%E6%96%87%E6%83%85%E5%A0%B1%E3%82%92%E3%81%BE%E3%81%A8%E3%82%81%E3%81%A6%E3%82%B2%E3%83%83%E3%83%88%E3%81%99%E3%82%8B%E2%91%A4_~_Excel%E3%83%95%E3%82%A1%E3%82%A4

    wb = openpyxl.Workbook() #ワークブックの作成
    ws = wb.active #ワークブックのアクティブになってるシートを選択
    ws.title='論文' #シートの名前を変更

    #フォントの設定
    normal_font = Font(name = "Century Gothic",sz = 9,b = False)
    header_font = Font(name = "Century Gothic", sz = 9, b = True, color = 'FFFFFFFF')
    #ヘッダーの塗りの設定
    header_fill = PatternFill(patternType = "solid", fgColor = "FF808080")
    #ヘッダーを中央揃えにする設定
    header_center = Alignment(horizontal='center',vertical = 'center')    
    #ヘッダーを太字にする設定
    header_border = Border(
            outline=True,
            left=Side(style='thin', color='FF000000'),
            right=Side(style='thin', color='FF000000'),
            top=Side(style='thin', color='FF000000'),
            bottom=Side(style='thin', color='FF000000')
            )

    for r in dataframe_to_rows(data, index=False, header=True):
        ws.append(r)

    for row in ws:
        for cell in row:
            cell.font = normal_font

    header_cell = ['A1', 'B1', 'C1', 'D1', 'E1']
    for cell in header_cell:
        ws[cell].font = header_font
        ws[cell].fill = header_fill
        ws[cell].alignment = header_center
        ws[cell].border = header_border

    wb.save(filename)      

def to_chat_AI(data, filename):
    l_data = data[["pmid", "Title", "Abstract"]].values.tolist()
    l_data = ["pmid:" + l[0] + ", title:" + l[1] + ", abstract:" + l[2] for l in l_data]
    str_data = "\n\n".join(l_data)
    with open(filename, "w", encoding='UTF-8') as f:
        f.write(str_data)   

In [45]:
#pubmedで検索する単語をtermとします。「%20」はスペースです。
term = 'deep%20learning'

#まずはeSearchでpmidを取得します。デフォルトでは10個までにしましたが、変更する際はretmax=100などを入れます。
pmids = eSearch(term)

#論文の基本情報を取得し、pandasのDataFrame型として返します。
summary_df = eSummary(pmids)

#更にアブストラクトをeFetchで取得し、pandas DataFrame型として返す。
abst_df = eFetch(pmids)

#summaryとabstractを統合し一つのDataFrameとします。
df = pd.merge(summary_df, abst_df, on='pmid')

df

Unnamed: 0,pmid,Title,Author,Journal_full,Journal_abbr,Pubdate,Abstract
0,36800255,A partial convolution generative adversarial n...,Liu Y,Journal of applied clinical medical physics,J Appl Clin Med Phys,2023 Feb 17,Lesion segmentation is critical for clinicians...
1,36800155,Few-shot learning using explainable Siamese tw...,Tummala S,Medical & biological engineering & computing,Med Biol Eng Comput,2023 Feb 17,Automated classification of blood cells from m...
2,36800143,Segmentation of the aorta in systolic phase fr...,Marin-Castrillon DM,"Magma (New York, N.Y.)",MAGMA,2023 Feb 17,"In the management of the aortic aneurysm, 4D f..."
3,36800112,Diagnostic performance of deep learning-based ...,Yang W,La Radiologia medica,Radiol Med,2023 Feb 17,Post-processing and interpretation of coronary...
4,36799660,Using Explainable Artificial Intelligence in t...,Jiménez-Mesa C,International journal of neural systems,Int J Neural Syst,2023 Feb 16,The prevalence of dementia is currently increa...
5,36799418,An innovative metal artifact reduction algorit...,Zhang Z,Current medical imaging,Curr Med Imaging,2023 Feb 17,"During X-ray computed tomography (CT) scans, t..."
6,36799417,Cancer detection based on Medical Image Analys...,Sood T,Current medical imaging,Curr Med Imaging,2023 Feb 17,Cancer is a deadly disease. It is crucial to d...
7,36799341,Implementable Deep Learning for Multi-sequence...,Astley JR,Journal of magnetic resonance imaging : JMRI,J Magn Reson Imaging,2023 Feb 17,"Recently, deep learning via convolutional neur..."
8,36799277,Micromirrors in Neurosurgery: Technical Overvi...,Ordóñez-Rubiano EG,Turkish neurosurgery,Turk Neurosurg,2022 Aug 26,Micromirrors are 45°-angled reflectors able to...
9,36799198,Construction of Exosome SORL1 Detection Platfo...,Li P,"Small (Weinheim an der Bergstrasse, Germany)",Small,2023 Feb 17,Exosomes are promising new biomarkers for colo...


In [49]:
to_excel(df, "test2.xlsx")

In [66]:
to_chat_AI(df, "test2.txt")

# Non-function form

In [35]:
# https://lifesciencehack-ai.hatenablog.com/entry/Python%E3%81%A7%E8%AB%96%E6%96%87%E6%83%85%E5%A0%B1%E3%82%92%E3%81%BE%E3%81%A8%E3%82%81%E3%81%A6%E3%82%B2%E3%83%83%E3%83%88%E3%81%99%E3%82%8B%E2%91%A1%E4%B8%8B%E6%BA%96%E5%82%99
import pandas as pd
import openpyxl
from openpyxl.styles import Border, Font, Side, PatternFill, Alignment
from openpyxl.utils.dataframe import dataframe_to_rows
import json
import requests

In [8]:
# https://lifesciencehack-ai.hatenablog.com/entry/%E8%AB%96%E6%96%87%E6%83%85%E5%A0%B1%E3%82%92%E3%81%BE%E3%81%A8%E3%82%81%E3%81%A6%E3%82%B2%E3%83%83%E3%83%88%E3%81%99%E3%82%8B%E2%91%A2ESearch%E3%82%92%E4%BD%BF%E3%81%A3%E3%81%A6PMID%E3%82%92%E5%8F%96
query = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=deep+learning&retmax=10&retmode=json'

response = requests.get(query)

print(response)
# <Response [200]>  「200だとうまくいっています。200以外だとqueryがうまく書けてない可能性があります」

print(response.headers['Content-Type'])
# 'application/json; charset=UTF-8'　「json形式でデータを取得できていることが確認できました」

response_json = response.json()
print(response_json)

# {'header': {'type': 'esearch', 'version': '0.3'}, 
#'esearchresult': {'count': '6868', 'retmax': '10', 'retstart': '0', 'idlist': ['30481649', '30481453', '30481205', '30481176', '30481151', '30480818', '30480490', '30480079', '30478928', '30478810'], 
#'translationset': [{'from': 'learning', 'to': '"learning"[MeSH Terms] OR "learning"[All Fields]'}],
# 'translationstack': [{'term': 'deep[All Fields]', 'field': 'All Fields', 'count': '197588', 'explode': 'N'}, {'term': '"learning"[MeSH Terms]', 'field': 'MeSH Terms', 'count': '356271', 'explode': 'Y'}, {'term': '"learning"[All Fields]', 'field': 'All Fields', 'count': '344322', 'explode': 'N'}, 'OR', 'GROUP', 'AND', 'GROUP'], #'querytranslation': 'deep[All Fields] AND ("learning"[MeSH Terms] OR "learning"[All Fields])'}}

pmids = response_json['esearchresult']['idlist']
print(pmids)
# ['30481649', '30481453', '30481205', '30481176', '30481151', '30480818', '30480490', '30480079', '30478928', '30478810']

<Response [200]>
application/json; charset=UTF-8
{'header': {'type': 'esearch', 'version': '0.3'}, 'esearchresult': {'count': '53431', 'retmax': '10', 'retstart': '0', 'idlist': ['36800255', '36800155', '36800143', '36800112', '36799660', '36799418', '36799417', '36799341', '36799277', '36799198'], 'translationset': [{'from': 'deep learning', 'to': '"deep learning"[MeSH Terms] OR ("deep"[All Fields] AND "learning"[All Fields]) OR "deep learning"[All Fields]'}], 'querytranslation': '"deep learning"[MeSH Terms] OR ("deep"[All Fields] AND "learning"[All Fields]) OR "deep learning"[All Fields]'}}
['36800255', '36800155', '36800143', '36800112', '36799660', '36799418', '36799417', '36799341', '36799277', '36799198']


In [37]:
# https://lifesciencehack-ai.hatenablog.com/entry/2018/12/04/Python%E3%81%A7%E8%AB%96%E6%96%87%E6%83%85%E5%A0%B1%E3%82%92%E3%81%BE%E3%81%A8%E3%82%81%E3%81%A6%E3%82%B2%E3%83%83%E3%83%88%E3%81%99%E3%82%8B%E2%91%A3_~_ESummary%E3%82%92%E4%BD%BF%E3%81%A3

import requests

URL = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&retmode=json&id='

queries = [URL + pmid for pmid in pmids]
# pmids は前回の記事で取得したpmidのリストです。

responses = {} #このあと取得するjsonデータを格納する辞書を作成

for query in queries:
      response = requests.get(query)
      res_json = response.json()['result'] #responseのjsonを取得し、その中のresultを返す
      responses.update(res_json) #res_jsonをresponsesに連結

key1 = '36799198'

print(responses[key1])
print()
print("keys")
print(responses[key1].keys())
print()
print("title")
print(responses[key1]['title']) #pmid = 30478810のタイトルを取得
#Deep convolutional neural network-based speech enhancement to improve speech intelligibility and quality for hearing-impaired listeners.

print()
print("fulljournalname")
print(responses[key1]['fulljournalname']) # pmid = 30478810のジャーナルフルネームを取得
#Medical &amp; biological engineering &amp; computing 文字化け

print()
print("source")
print(responses[key1]['source']) #pmid = 30478810のジャーナル略称を取得
#Med Biol Eng Comput

Summaries = [{'pmid':pmid, 
'Title':responses[pmid]['title'], 
'Author':responses[pmid]['sortfirstauthor'],
'Journal_full' : responses[pmid]['fulljournalname'],
'Journal_abbr' : responses[pmid]['source'],
'Pubdate':responses[pmid]['epubdate']} for pmid in pmids]

{'uid': '36799198', 'pubdate': '2023 Feb 17', 'epubdate': '2023 Feb 17', 'source': 'Small', 'authors': [{'name': 'Li P', 'authtype': 'Author', 'clusterid': ''}, {'name': 'Chen J', 'authtype': 'Author', 'clusterid': ''}, {'name': 'Chen Y', 'authtype': 'Author', 'clusterid': ''}, {'name': 'Song S', 'authtype': 'Author', 'clusterid': ''}, {'name': 'Huang X', 'authtype': 'Author', 'clusterid': ''}, {'name': 'Yang Y', 'authtype': 'Author', 'clusterid': ''}, {'name': 'Li Y', 'authtype': 'Author', 'clusterid': ''}, {'name': 'Tong Y', 'authtype': 'Author', 'clusterid': ''}, {'name': 'Xie Y', 'authtype': 'Author', 'clusterid': ''}, {'name': 'Li J', 'authtype': 'Author', 'clusterid': ''}, {'name': 'Li S', 'authtype': 'Author', 'clusterid': ''}, {'name': 'Wang J', 'authtype': 'Author', 'clusterid': ''}, {'name': 'Qian K', 'authtype': 'Author', 'clusterid': ''}, {'name': 'Wang C', 'authtype': 'Author', 'clusterid': ''}, {'name': 'Du L', 'authtype': 'Author', 'clusterid': ''}], 'lastauthor': 'Du L'

In [38]:
data = pd.DataFrame(Summaries)
data
# # https://lifesciencehack-ai.hatenablog.com/entry/2019/02/23/Python%E3%81%A7%E8%AB%96%E6%96%87%E6%83%85%E5%A0%B1%E3%82%92%E3%81%BE%E3%81%A8%E3%82%81%E3%81%A6%E3%82%B2%E3%83%83%E3%83%88%E3%81%99%E3%82%8B%E2%91%A4_~_Excel%E3%83%95%E3%82%A1%E3%82%A4
# data  = pd.DataFrame(columns = ['PMID','Title', 'Author', 'Journal', 'Pubdate'])
# # カラム名を指定してデータフレームを作成
# PMIDs = [ i['pmid'] for i in Summaries ]
# Titles = [ i['Title'] for i in Summaries ]
# Authors = [ i['Author'] for i in Summaries ]
# Journals = [ i['Journal'] for i in  Summaries ]
# Pubdates = [ i['Pubdate'] for i in Summaries ]
# data['PMID'] = PMIDs
# data['Title'] = Titles
# data['Author'] = Authors
# data['Journal'] = Journals
# data['Pubdate'] = Pubdates
# data

Unnamed: 0,pmid,Title,Author,Journal_full,Journal_abbr,Pubdate
0,36800255,A partial convolution generative adversarial n...,Liu Y,Journal of applied clinical medical physics,J Appl Clin Med Phys,2023 Feb 17
1,36800155,Few-shot learning using explainable Siamese tw...,Tummala S,Medical & biological engineering & computing,Med Biol Eng Comput,2023 Feb 17
2,36800143,Segmentation of the aorta in systolic phase fr...,Marin-Castrillon DM,"Magma (New York, N.Y.)",MAGMA,2023 Feb 17
3,36800112,Diagnostic performance of deep learning-based ...,Yang W,La Radiologia medica,Radiol Med,2023 Feb 17
4,36799660,Using Explainable Artificial Intelligence in t...,Jiménez-Mesa C,International journal of neural systems,Int J Neural Syst,2023 Feb 16
5,36799418,An innovative metal artifact reduction algorit...,Zhang Z,Current medical imaging,Curr Med Imaging,2023 Feb 17
6,36799417,Cancer detection based on Medical Image Analys...,Sood T,Current medical imaging,Curr Med Imaging,2023 Feb 17
7,36799341,Implementable Deep Learning for Multi-sequence...,Astley JR,Journal of magnetic resonance imaging : JMRI,J Magn Reson Imaging,2023 Feb 17
8,36799277,Micromirrors in Neurosurgery: Technical Overvi...,Ordóñez-Rubiano EG,Turkish neurosurgery,Turk Neurosurg,2022 Aug 26
9,36799198,Construction of Exosome SORL1 Detection Platfo...,Li P,"Small (Weinheim an der Bergstrasse, Germany)",Small,2023 Feb 17


In [39]:
# https://lifesciencehack-ai.hatenablog.com/entry/2019/02/23/Python%E3%81%A7%E8%AB%96%E6%96%87%E6%83%85%E5%A0%B1%E3%82%92%E3%81%BE%E3%81%A8%E3%82%81%E3%81%A6%E3%82%B2%E3%83%83%E3%83%88%E3%81%99%E3%82%8B%E2%91%A4_~_Excel%E3%83%95%E3%82%A1%E3%82%A4

wb = openpyxl.Workbook() #ワークブックの作成
ws = wb.active #ワークブックのアクティブになってるシートを選択
ws.title='論文' #シートの名前を変更

#フォントの設定
normal_font = Font(name = "Century Gothic",sz = 9,b = False)
header_font = Font(name = "Century Gothic", sz = 9, b = True, color = 'FFFFFFFF')
#ヘッダーの塗りの設定
header_fill = PatternFill(patternType = "solid", fgColor = "FF808080")
#ヘッダーを中央揃えにする設定
header_center = Alignment(horizontal='center',vertical = 'center')    
#ヘッダーを太字にする設定
header_border = Border(
        outline=True,
        left=Side(style='thin', color='FF000000'),
        right=Side(style='thin', color='FF000000'),
        top=Side(style='thin', color='FF000000'),
        bottom=Side(style='thin', color='FF000000')
        )

for r in dataframe_to_rows(data, index=False, header=True):
    ws.append(r)
    
for row in ws:
    for cell in row:
        cell.font = normal_font
    
header_cell = ['A1', 'B1', 'C1', 'D1', 'E1']
for cell in header_cell:
    ws[cell].font = header_font
    ws[cell].fill = header_fill
    ws[cell].alignment = header_center
    ws[cell].border = header_border

wb.save("test.xlsx")        

In [41]:
# https://lifesciencehack-ai.hatenablog.com/entry/2020/08/07/Python%E3%81%A7%E8%AB%96%E6%96%87%E6%83%85%E5%A0%B1%E3%82%92%E3%81%BE%E3%81%A8%E3%82%81%E3%81%A6%E3%82%B2%E3%83%83%E3%83%88%E3%81%99%E3%82%8B%E2%91%A5%EF%BD%9EEFetch%E3%82%92%E4%BD%BF
import requests
from lxml import etree
import time

URL = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&retmode=XML &id='
# EFetchの基本URL. これにpmidを追加してqueryとする

queries = [URL + pmid for pmid in pmids]
# pmids は第4回の記事で取得したpmidのリストです。

responses_abst = {} #このあと取得するabstractを格納する辞書を作成

# ESummaryの時はpmidごとのjsonすべてをresponseに保存していたが、重くなるためabstractを抽出してアブストラクトだけをresponses_abstに保存します

for query in queries:
      response = requests.get(query)
      root = etree.fromstring(response.content)
      pmid = root.find('.//PMID').text#pmidを抽出
      abst = root.findall('.//AbstractText')
      if abst is None:
          abst_text = ''
      else:
          abst_text = ''.join(root.xpath('//Abstract//*/text()'))
      responses_abst[pmid]=abst_text
      time.sleep(0.2)

In [42]:
responses_abst

{'36800255': 'Lesion segmentation is critical for clinicians to accurately stage the disease and determine treatment strategy. Deep learning based automatic segmentation can improve both the segmentation efficiency and accuracy. However, training a robust deep learning segmentation model requires sufficient training examples with sufficient diversity in lesion location and lesion size. This study is to develop a deep learning framework for generation of synthetic lesions with various locations and sizes that can be included in the training dataset to enhance the lesion segmentation performance. The lesion synthesis network is a modified generative adversarial network (GAN). Specifically, we innovated a partial convolution strategy to construct a U-Net-like generator. The discriminator is designed using Wasserstein GAN with gradient penalty and spectral normalization. A mask generation method based on principal component analysis (PCA) was developed to model various lesion shapes. The g

# Trash (Failed)

In [2]:
from pymed import PubMed

AttributeError: partially initialized module 'charset_normalizer' has no attribute 'md__mypyc' (most likely due to a circular import)

In [None]:
pubmed = PubMed(tool="MyTool", email="my@email.address")
results = pubmed.query("Some query", max_results=500)