In [1]:
import pandas as pd
import json
from Bio import Entrez

In [2]:
# Set the email address to avoid any potential issues with Entrez
Entrez.email = 'rdc47@duke.edu'

In [3]:
# Define lists of authors and topics
authors = []  # Example authors, adjust as needed
topics = ['RNAi', "siRNA", "ASO", "mRNA"]  # Example topics, adjust as needed

In [4]:
# Define date range
date_range = '("2020/01/01"[Date - Create] : "2024/09/18"[Date - Create])'

In [5]:
def parse_pub_date(pub_date): 
    if 'Year' in pub_date: 
        year = pub_date['Year']
        month = pub_date.get('Month', 'Jan')
        day = pub_date.get('Day', '01')
        return f"{year}-{month}-{day}"
    return "Not Available"

In [6]:
# Build the query dynamically based on the available authors and topics
queries = []

if authors:
    author_queries = ['{}[Author]'.format(author) for author in authors]
    queries.append('(' + ' OR '.join(author_queries) + ')')

if topics:
    topic_queries = ['{}[Title/Abstract]'.format(topic) for topic in topics]
    queries.append('(' + ' OR '.join(topic_queries) + ')')

full_query = ' AND '.join(queries) + ' AND ' + date_range

In [7]:
# Search PubMed for relevant records
handle = Entrez.esearch(db='pubmed', retmax=100, term=full_query)
record = Entrez.read(handle)
id_list = record['IdList']

In [8]:
# DataFrame to store the extracted data
df = pd.DataFrame(columns=['PMID', 'Title', 'Abstract', 'Authors', 'Journal', 'Keywords', 'URL', 'Affiliations', "Pub Date"])

In [9]:
%%capture
#Fetch information for each record in the id_list
for pmid in id_list:
    handle = Entrez.efetch(db='pubmed', id=pmid, retmode='xml')
    records = Entrez.read(handle)

    # Process each PubMed article in the response
    for record in records['PubmedArticle']:
        # Print the record in a formatted JSON style
        print(json.dumps(record, indent=4, default=str))  # default=str handles types JSON can't serialize like datetime
        title = record['MedlineCitation']['Article']['ArticleTitle']
        abstract = ' '.join(record['MedlineCitation']['Article']['Abstract']['AbstractText']) if 'Abstract' in record['MedlineCitation']['Article'] and 'AbstractText' in record['MedlineCitation']['Article']['Abstract'] else ''
        authors = ', '.join(author.get('LastName', '') + ' ' + author.get('ForeName', '') for author in record['MedlineCitation']['Article']['AuthorList'])
        
        affiliations = []
        for author in record['MedlineCitation']['Article']['AuthorList']:
            if 'AffiliationInfo' in author and author['AffiliationInfo']:
                affiliations.append(author['AffiliationInfo'][0]['Affiliation'])
        affiliations = '; '.join(set(affiliations))

        journal = record['MedlineCitation']['Article']['Journal']['Title']
        keywords = ', '.join(keyword['DescriptorName'] for keyword in record['MedlineCitation']['MeshHeadingList']) if 'MeshHeadingList' in record['MedlineCitation'] else ''
        url = f"https://www.ncbi.nlm.nih.gov/pubmed/{pmid}"
        
        pub_date = parse_pub_date(record['MedlineCitation']['Article']["Journal"]["JournalIssue"]["PubDate"])

        new_row = pd.DataFrame({
            'PMID': [pmid],
            'Title': [title],
            'Abstract': [abstract],
            'Authors': [authors],
            'Journal': [journal],
            'Pub Date': [pub_date],
            'Keywords': [keywords],
            'URL': [url],
            'Affiliations': [affiliations]
        })

        df = pd.concat([df, new_row], ignore_index=True)

In [10]:
df

Unnamed: 0,PMID,Title,Abstract,Authors,Journal,Keywords,URL,Affiliations,Pub Date
0,39294861,Anabolic deficits and divergent unfolded prote...,Cancer cachexia manifests as whole body wastin...,"Belcher Daniel J, Kim Nina, Navarro-Llinas Bla...",Physiological reports,"Animals, Cachexia, Male, Muscle, Skeletal, Rat...",https://www.ncbi.nlm.nih.gov/pubmed/39294861,Department of Biochemistry and Molecular Medic...,2024-Sep-01
1,39294819,Cardioprotective microRNAs (protectomiRs) in a...,Cardioprotective miRNAs (protectomiRs) are pro...,"Nagy Regina N, Makkos András, Baranyai Tamás, ...",British journal of pharmacology,,https://www.ncbi.nlm.nih.gov/pubmed/39294819,"Institute of Physiology, Justus-Liebig Univers...",2024-Sep-18
2,39294751,ZFP64 drives glycolysis-mediated stem cell-lik...,Breast cancer (BC) is a great clinical challen...,"Sun Jiayi, Liu Jinquan, Hou Yudong, Bao Jianhe...",Biology direct,"Humans, Glycolysis, Breast Neoplasms, Female, ...",https://www.ncbi.nlm.nih.gov/pubmed/39294751,"Shanxi Datong University, Datong, Shanxi, Peop...",2024-Sep-18
3,39294742,Characterization of the angiomodulatory effect...,"The IL-6 cytokine family, with its crucial and...","Liang Paula, Ness Jan, Rapp Julian, Boneva Ste...",Journal of neuroinflammation,"Interleukin-11, Humans, Signal Transduction, A...",https://www.ncbi.nlm.nih.gov/pubmed/39294742,"Eye Center, Medical Center, Faculty of Medicin...",2024-Sep-18
4,39294631,Anti-inflammatory potential of aspergillus ung...,Inflammation serves as an intricate defense me...,"Nasr Soad, Dawood Abdelhameed S, Ibrahim Amal ...",BMC biotechnology,"Toll-Like Receptor 4, Mice, Animals, Anti-Infl...",https://www.ncbi.nlm.nih.gov/pubmed/39294631,"Drug Bioassay-Cell Culture Laboratory, Pharmac...",2024-Sep-18
...,...,...,...,...,...,...,...,...,...
95,39288768,Increased Platelet Size and Elevated P2Y12 mRN...,,"Nishikawa Masako, Nagura Yutaka, Okazaki Hitos...",International journal of laboratory hematology,,https://www.ncbi.nlm.nih.gov/pubmed/39288768,"Department of Transfusion Medicine, Graduate S...",2024-Sep-17
96,39288629,Current status of nucleic acid therapy and its...,Nucleic acid is an essential biopolymer in all...,"Lou Wenting, Zhang Leqi, Wang Jianwei",International immunopharmacology,,https://www.ncbi.nlm.nih.gov/pubmed/39288629,"Department of Surgery, The Fourth Affiliated H...",2024-Sep-16
97,39288627,Formononetin ameliorates dextran sulfate sodiu...,"Ulcerative colitis (UC) is a complex, refracto...","Xiao Qiuping, Huang Jiaqi, Zhu Xiyan, Shi Min,...",International immunopharmacology,,https://www.ncbi.nlm.nih.gov/pubmed/39288627,"College of Pharmacy, Jiangxi University of Chi...",2024-Sep-16
98,39288626,Activin A inhibits the migration of human lung...,"Activin A, a member of the transforming growth...","Zhang Fenglin, Cui Xueling, Yang Ke, Guo Rui, ...",International immunopharmacology,,https://www.ncbi.nlm.nih.gov/pubmed/39288626,"Department of Genetics, College of Basic Medic...",2024-Sep-16


In [11]:
df.to_excel('PubMed_resultsx.xlsx', index=False)