In [1]:
import os
from dotenv import load_dotenv

load_dotenv()
_ = os.getenv('NCBI_API_KEY')

import pandas as pd
import numpy as np
import metapub

In [3]:
keyword='("prostate cancer" OR "adenocarcinoma of the prostate" OR "prostate sarcoma") AND ("molecular target" OR "therapeutic target")'
num_of_articles=250

In [4]:
from metapub import PubMedFetcher
fetch = PubMedFetcher()

# get the  PMID for first 250 articles with keyword 
pmids = fetch.pmids_for_query(keyword, retmax=num_of_articles)

# get  articles
articles = {}
for pmid in pmids:
    articles[pmid] = fetch.article_by_pmid(pmid)

In [5]:
list(articles)[0:10]

['37138326',
 '37119046',
 '37118847',
 '37113783',
 '37086156',
 '37075701',
 '37073437',
 '37069746',
 '37056926',
 '37047232']

In [6]:
titles = {}
for pmid in pmids:
    titles[pmid] = fetch.article_by_pmid(pmid).title
Title = pd.DataFrame(list(titles.items()),columns = ['pmid','Title'])

In [7]:
# get abstract for each article:
abstracts = {}
for pmid in pmids:
    abstracts[pmid] = fetch.article_by_pmid(pmid).abstract
Abstract = pd.DataFrame(list(abstracts.items()),columns = ['pmid','Abstract'])

In [8]:
authors = {}
for pmid in pmids:
    authors[pmid] = fetch.article_by_pmid(pmid).authors
Author = pd.DataFrame(list(authors.items()),columns = ['pmid','Author'])

In [9]:
years = {}
for pmid in pmids:
    years[pmid] = fetch.article_by_pmid(pmid).year
Year = pd.DataFrame(list(years.items()),columns = ['pmid','Year'])

In [10]:
volumes = {}
for pmid in pmids:
    volumes[pmid] = fetch.article_by_pmid(pmid).volume
Volume = pd.DataFrame(list(volumes.items()),columns = ['pmid','Issue'])

In [11]:
issues = {}
for pmid in pmids:
    issues[pmid] = fetch.article_by_pmid(pmid).issue
Issue = pd.DataFrame(list(issues.items()),columns = ['pmid','Issue'])

In [12]:
journals = {}
for pmid in pmids:
    journals[pmid] = fetch.article_by_pmid(pmid).journal
Journal = pd.DataFrame(list(journals.items()),columns = ['pmid','Journal'])

In [13]:
citations = {}
for pmid in pmids:
    citations[pmid] = fetch.article_by_pmid(pmid).citation
Citation = pd.DataFrame(list(citations.items()),columns = ['pmid','Citation'])

In [14]:
links={}
for pmid in pmids:
    links[pmid] = "https://pubmed.ncbi.nlm.nih.gov/"+pmid+"/"
Link = pd.DataFrame(list(links.items()),columns = ['pmid','Link'])

In [15]:
data_frames = [Title,Abstract,Author,Year,Volume,Issue,Journal,Citation,Link]
from functools import reduce
df_merged = reduce(lambda  left,right: pd.merge(left,right,on=['pmid'],
                                            how='outer'), data_frames)
df_merged

Unnamed: 0,pmid,Title,Abstract,Author,Year,Issue_x,Issue_y,Journal,Citation,Link
0,37138326,Myeloid-like tumor hybrid cells in bone marrow...,BACKGROUND: Bone metastasis is the leading cau...,"[Ye X, Huang X, Fu X, Zhang X, Lin R, Zhang W,...",2023,16,1,J Hematol Oncol,"Ye X, et al. Myeloid-like tumor hybrid cells i...",https://pubmed.ncbi.nlm.nih.gov/37138326/
1,37119046,"SALL4 correlates with proliferation, metastasi...",BACKGROUND: The mechanism involved in prostate...,"[Zhou J, Peng S, Fan H, Li J, Li Z, Wang G, Ze...",2023,,,Cancer Med,"Zhou J, et al. SALL4 correlates with prolifera...",https://pubmed.ncbi.nlm.nih.gov/37119046/
2,37118847,Circular RNA EPHA3 suppresses progression and ...,BACKGROUND: Circular RNAs (circRNAs) may regul...,"[Feng H, Deng Z, Peng W, Wei X, Liu J, Wang T]",2023,21,1,J Transl Med,"Feng H, et al. Circular RNA EPHA3 suppresses p...",https://pubmed.ncbi.nlm.nih.gov/37118847/
3,37113783,Novel roles for HMGA2 isoforms in regulating o...,Oxidative stress is increased in several cance...,"[Campbell T, Hawsawi O, Henderson V, Dike P, H...",2023,9,4,Heliyon,"Campbell T, et al. Novel roles for HMGA2 isofo...",https://pubmed.ncbi.nlm.nih.gov/37113783/
4,37086156,Isocitrate dehydrogenase 1 sustains a hybrid c...,The androgen receptor (AR) is an established o...,"[Gonthier K, Weidmann C, Berthiaume L, Jobin C...",2023,,,Mol Oncol,"Gonthier K, et al. Isocitrate dehydrogenase 1 ...",https://pubmed.ncbi.nlm.nih.gov/37086156/
...,...,...,...,...,...,...,...,...,...,...
245,35109825,RNA-seq of nine canine prostate cancer cell li...,BACKGROUND: Canine prostate adenocarcinoma (PA...,"[Packeiser EM, Taher L, Kong W, Ernst M, Beck ...",2022,22,1,Cancer Cell Int,"Packeiser EM, et al. RNA-seq of nine canine pr...",https://pubmed.ncbi.nlm.nih.gov/35109825/
246,35098656,Roles of Myc-associated zinc finger protein in...,As an important transcription factor that is w...,"[Zheng C, Wu H, Jin S, Li D, Tan S, Zhu X]",2022,18,6,Asia Pac J Clin Oncol,"Zheng C, et al. Roles of Myc-associated zinc f...",https://pubmed.ncbi.nlm.nih.gov/35098656/
247,35087756,Tumor- and Osteoblast-Derived Periostin in Pro...,Exploring the biological function of periostin...,"[Sun CY, Mi YY, Ge SY, Hu QF, Xu K, Guo YJ, Ta...",2021,11,,Front Oncol,"Sun CY, et al. Tumor- and Osteoblast-Derived P...",https://pubmed.ncbi.nlm.nih.gov/35087756/
248,35083818,ERG amplification is a secondary recurrent dri...,ERG is a transcription factor encoded on chrom...,"[Lee WY, Gutierrez-Lanz EA, Xiao H, McClintock...",2022,61,7,Genes Chromosomes Cancer,"Lee WY, et al. ERG amplification is a secondar...",https://pubmed.ncbi.nlm.nih.gov/35083818/
