In [1]:
# import libraries
from Bio import Entrez
import pandas as pd
import numpy as np
import calendar

In [2]:
# function for search articles
def search_function(query):
    Entrez.email = 'your_email'
    Entrez.api_key = 'your_api_key'
    handle = Entrez.esearch(db='pubmed', sort='relevance', retmax='250000', retmode='xml', term=query)
    results = Entrez.read(handle)
    
    return results

In [3]:
# the query search algorithm PubMed (for the search function) and Cochrane library (manual search)
query = """(Gut Microbiome OR Gut Microbiomes OR Gut Microflora OR Gut Microbiota OR Gastrointestinal Flora OR 
            Gut Flora OR Gastrointestinal Microbiota OR Gastrointestinal Microbial Community OR 
            Gastrointestinal Microbial Communities OR Intestinal Microbiome OR Intestinal Microflora OR 
            Intestinal Flora OR Gastrointestinal Microflora) AND 
            (Kidney stone disease OR Urolithiasis OR Nephrolithiasis OR gut-kidney axis)"""

In [4]:
# creating list of PMIDs 
studies = search_function(query)
studies_id_list = studies['IdList']

In [5]:
# function for fetch details of the articles
def fetch_details(id_list):
    ids = ','.join(id_list)
    Entrez.email = 'your_email'
    Entrez.api_key = 'your_api_key'
    handle = Entrez.efetch(db='pubmed', retmode='xml', id=ids)
    results = Entrez.read(handle)
    
    return results

In [6]:
# function for export the fetched details of the articles to dataframe 
def articles_info_to_df(id_list):
    # fetch details of the articles
    studies = fetch_details(id_list)
    # size of chunk
    chunk_size = 10000
    
    # declare the lists
    pmid_list = []
    title_list = []
    abstract_list = []
    journal_list = []
    language_list = []
    pubdate_year_list = []
    pubdate_month_list = []
    publication_type_list = []
    elocation_id_list = []
    
    # slicing of the PMIDs list into chunks
    for chunk_i in range(0, len(id_list), chunk_size):
        chunk = id_list[chunk_i:chunk_i + chunk_size]
        papers = fetch_details(chunk)
        
        # iterating over a chunk
        for i, paper in enumerate(papers['PubmedArticle']):
            
            # PMID
            pmid_list.append(paper['MedlineCitation']['PMID'][:])
            
            # Title
            title_list.append(paper['MedlineCitation']['Article']['ArticleTitle'])
        
            # Abstract
            try:
                abstract_list.append(paper['MedlineCitation']['Article']['Abstract']['AbstractText'][0])
            except Exception:
                abstract_list.append('No Abstract')
            
            # Journal
            journal_list.append(paper['MedlineCitation']['Article']['Journal']['Title'])
            
            # Language of Publication
            language_list.append(paper['MedlineCitation']['Article']['Language'][0])
            
            # Year of Publication
            try:
                pubdate_year_list.append(paper['MedlineCitation']['Article']['Journal']['JournalIssue']['PubDate']['Year'])
            except Exception:
                pubdate_year_list.append('No Data')
              
            # Month of Publication
            try:
                pubdate_month_list.append(paper['MedlineCitation']['Article']['Journal']['JournalIssue']['PubDate']['Month'])
            except:
                pubdate_month_list.append('No Data')
            
            # Publication Type
            try:
                publication_type_list.append(", ".join(paper['MedlineCitation']['Article']['PublicationTypeList']))
            except Exception:
                publication_type_list.append('No Data')
            
            # ELocationID
            try:
                elocation_id_list.append(", ".join(paper['MedlineCitation']['Article']['ELocationID']))
            except Exception:
                elocation_id_list.append('No Data')
    
    # export the data to dataframe
    df = pd.DataFrame(list(zip(pmid_list, title_list, abstract_list, journal_list, language_list, pubdate_year_list,
                               pubdate_month_list, publication_type_list, elocation_id_list)),
                      columns=['PMID', 'Title', 'Abstract', 'Journal', 'Language of Publication', 'Year_of_Publication',
                               'Month_of_Publication', 'Publication_Type', 'ELocationID'])
        
    return df

In [7]:
# export the fetched details of the articles to dataframe 
df_info_articles = articles_info_to_df(studies_id_list)

In [8]:
# show the dataframe
df_info_articles.head()

Unnamed: 0,PMID,Title,Abstract,Journal,Language of Publication,Year_of_Publication,Month_of_Publication,Publication_Type,ELocationID
0,29760448,The gut microbiota and the brain-gut-kidney ax...,Crosstalk between the gut microbiota and the h...,Nature reviews. Nephrology,eng,2018,Jul,"Journal Article, Review",10.1038/s41581-018-0018-2
1,34642783,The mucosal immune system and IgA nephropathy.,The precise pathogenesis of immunoglobulin A n...,Seminars in immunopathology,eng,2021,Oct,"Journal Article, Research Support, Non-U.S. Go...",10.1007/s00281-021-00871-y
2,34132361,Recent advances on the mechanisms of kidney st...,Kidney stone disease is one of the oldest dise...,International journal of molecular medicine,eng,2021,Aug,"Journal Article, Review","149, 10.3892/ijmm.2021.4982"
3,36127409,Mechanisms of the intestinal and urinary micro...,Kidney stone disease affects ~10% of the globa...,Nature reviews. Urology,eng,2022,Dec,"Journal Article, Review",10.1038/s41585-022-00647-5
4,27847292,The role of the microbiome in kidney stone for...,Nephrolithiasis is a complex disease of worldw...,"International journal of surgery (London, Engl...",eng,2016,Dec,"Journal Article, Review","10.1016/j.ijsu.2016.11.024, S1743-9191(16)31033-0"


In [9]:
# shape of the dataframe (rows, columns)
df_info_articles.shape

(248, 9)

In [10]:
# additional PMIDs found from Cochrane library, ClinicalTrials.gov (via Cochrane library) and the systemic review 
additional_studies_id_list = ['29353409', '27115405', '33655334', '29705728', '33404827', '35460343', '27708409', '34427737',
                   '35325834', '27848096', '20736987', '28217694', '29748329', '35571974', '10959618', '15541456',
                   '24643131', '30586752', '29270002', '36126902']

In [11]:
additional_studies_id_list = list(dict.fromkeys(additional_studies_id_list))

In [12]:
# export the fetched details of the articles to dataframe 
df_info_articles_additional = articles_info_to_df(additional_studies_id_list)

In [13]:
# show the dataframe
df_info_articles_additional.head()

Unnamed: 0,PMID,Title,Abstract,Journal,Language of Publication,Year_of_Publication,Month_of_Publication,Publication_Type,ELocationID
0,29353409,16S rRNA gene sequencing reveals altered compo...,Nephrolithiasis is a common urological disease...,Urolithiasis,eng,2018,Nov,Journal Article,10.1007/s00240-018-1037-y
1,27115405,Evidence for a distinct gut microbiome in kidn...,The trillions of microbes that colonize our ad...,Urolithiasis,eng,2016,Oct,"Comparative Study, Journal Article",10.1007/s00240-016-0882-9
2,33655334,Intestinal dysbacteriosis leads to kidney ston...,The formation and physicochemical properties o...,Molecular medicine reports,eng,2021,Mar,Journal Article,"180, 10.3892/mmr.2020.11819"
3,29705728,Understanding the gut-kidney axis in nephrolit...,The involvement of the gut microbiota in the p...,Gut,eng,2018,Dec,Journal Article,10.1136/gutjnl-2017-315734
4,33404827,Gut microbiota affect the formation of calcium...,Kidney stones are a common and frequently occu...,Applied microbiology and biotechnology,eng,2021,Jan,Journal Article,10.1007/s00253-020-11086-w


In [14]:
# shape of the dataframe (rows, columns)
df_info_articles_additional.shape

(20, 9)

In [15]:
# join two dataframes
df_info_articles_concat = pd.concat([df_info_articles, df_info_articles_additional], ignore_index=True, sort=False)

In [16]:
# show the dataframe
df_info_articles_concat.head()

Unnamed: 0,PMID,Title,Abstract,Journal,Language of Publication,Year_of_Publication,Month_of_Publication,Publication_Type,ELocationID
0,29760448,The gut microbiota and the brain-gut-kidney ax...,Crosstalk between the gut microbiota and the h...,Nature reviews. Nephrology,eng,2018,Jul,"Journal Article, Review",10.1038/s41581-018-0018-2
1,34642783,The mucosal immune system and IgA nephropathy.,The precise pathogenesis of immunoglobulin A n...,Seminars in immunopathology,eng,2021,Oct,"Journal Article, Research Support, Non-U.S. Go...",10.1007/s00281-021-00871-y
2,34132361,Recent advances on the mechanisms of kidney st...,Kidney stone disease is one of the oldest dise...,International journal of molecular medicine,eng,2021,Aug,"Journal Article, Review","149, 10.3892/ijmm.2021.4982"
3,36127409,Mechanisms of the intestinal and urinary micro...,Kidney stone disease affects ~10% of the globa...,Nature reviews. Urology,eng,2022,Dec,"Journal Article, Review",10.1038/s41585-022-00647-5
4,27847292,The role of the microbiome in kidney stone for...,Nephrolithiasis is a complex disease of worldw...,"International journal of surgery (London, Engl...",eng,2016,Dec,"Journal Article, Review","10.1016/j.ijsu.2016.11.024, S1743-9191(16)31033-0"


In [17]:
# shape of the dataframe (rows, columns)
df_info_articles_concat.shape

(268, 9)

In [18]:
# drop dublicates
df_info_articles_full = df_info_articles_concat.drop_duplicates(subset=['PMID'])

In [19]:
# show the dataframe
df_info_articles_full.head()

Unnamed: 0,PMID,Title,Abstract,Journal,Language of Publication,Year_of_Publication,Month_of_Publication,Publication_Type,ELocationID
0,29760448,The gut microbiota and the brain-gut-kidney ax...,Crosstalk between the gut microbiota and the h...,Nature reviews. Nephrology,eng,2018,Jul,"Journal Article, Review",10.1038/s41581-018-0018-2
1,34642783,The mucosal immune system and IgA nephropathy.,The precise pathogenesis of immunoglobulin A n...,Seminars in immunopathology,eng,2021,Oct,"Journal Article, Research Support, Non-U.S. Go...",10.1007/s00281-021-00871-y
2,34132361,Recent advances on the mechanisms of kidney st...,Kidney stone disease is one of the oldest dise...,International journal of molecular medicine,eng,2021,Aug,"Journal Article, Review","149, 10.3892/ijmm.2021.4982"
3,36127409,Mechanisms of the intestinal and urinary micro...,Kidney stone disease affects ~10% of the globa...,Nature reviews. Urology,eng,2022,Dec,"Journal Article, Review",10.1038/s41585-022-00647-5
4,27847292,The role of the microbiome in kidney stone for...,Nephrolithiasis is a complex disease of worldw...,"International journal of surgery (London, Engl...",eng,2016,Dec,"Journal Article, Review","10.1016/j.ijsu.2016.11.024, S1743-9191(16)31033-0"


In [20]:
# shape of the dataframe (rows, columns)
df_info_articles_full.shape

(255, 9)

In [21]:
# filter the dataframe by year (No Data)

In [22]:
df_filtered = df_info_articles_full.copy()

In [23]:
df_filtered.rename(columns={'Year_of_Publication': 'year'}, inplace=True)
df_filtered.rename(columns={'Month_of_Publication': 'month'}, inplace=True)

In [24]:
df_year_nodata = df_filtered.loc[df_filtered['year'] == 'No Data']

In [25]:
# show the dataframe
df_year_nodata.head()

Unnamed: 0,PMID,Title,Abstract,Journal,Language of Publication,year,month,Publication_Type,ELocationID
263,15541456,Delayed graft function in kidney transplantation.,Delayed graft function is a form of acute rena...,"Lancet (London, England)",eng,No Data,No Data,"Journal Article, Review",


In [26]:
# shape of the dataframe (rows, columns)
df_year_nodata.shape

(1, 9)

In [27]:
# filter the dataframe by month (No Data)

In [28]:
df_month_nodata = df_filtered.loc[(df_filtered['month'] == 'No Data') & (df_filtered['year'] != 'No Data')]

In [29]:
df_month_nodata_1 = df_month_nodata[df_month_nodata['year'].notnull()].copy()
df_month_nodata_1['year'] = df_month_nodata_1['year'].astype(int)

In [30]:
df_month_nodata_2 = df_month_nodata_1[df_month_nodata_1['year'] >= 2013]

In [31]:
# show the dataframe
df_month_nodata_2.head()

Unnamed: 0,PMID,Title,Abstract,Journal,Language of Publication,year,month,Publication_Type,ELocationID
14,37201114,Causal effects between gut microbiota and IgA ...,Therapeutic approaches that target the gut mic...,Frontiers in cellular and infection microbiology,eng,2023,No Data,"Journal Article, Research Support, Non-U.S. Gov't","1171517, 10.3389/fcimb.2023.1171517"
19,35370631,The Intestinal Microbiota and Metabolites in t...,Emerging evidences demonstrate the involvement...,Frontiers in pharmacology,eng,2022,No Data,"Journal Article, Review","837500, 10.3389/fphar.2022.837500"
20,35656030,Gut Microbiome Characteristics in IgA Nephropa...,Recent data indicate the importance of gut-kid...,Frontiers in cellular and infection microbiology,eng,2022,No Data,"Journal Article, Meta-Analysis, Review, System...","904401, 10.3389/fcimb.2022.904401"
22,36467425,Gut-kidney axis in IgA nephropathy: Role on me...,IgA Nephropathy (IgAN) is the commonest primar...,Frontiers in cell and developmental biology,eng,2022,No Data,"Journal Article, Review","993716, 10.3389/fcell.2022.993716"
27,36016798,The role and mechanism of butyrate in the prev...,Diabetic kidney disease (DKD) remains the lead...,Frontiers in microbiology,eng,2022,No Data,"Journal Article, Review","961536, 10.3389/fmicb.2022.961536"


In [32]:
# shape of the dataframe (rows, columns)
df_month_nodata_2.shape

(45, 9)

In [33]:
# filter the dataframe by year and month

In [34]:
df_filtered_without_nodata = df_filtered.copy()

In [35]:
df_filtered_without_nodata = df_filtered_without_nodata.loc[(df_filtered_without_nodata['year'] != 'No Data') &
                                                            (df_filtered_without_nodata['month'] != 'No Data')]

In [36]:
dict_map_month = dict((v,k) for k,v in enumerate(calendar.month_abbr))

In [37]:
df_filtered_without_nodata.month = df_filtered_without_nodata.month.map(dict_map_month)

In [38]:
df_filtered_without_nodata_1 = df_filtered_without_nodata.copy()

In [39]:
df_filtered_without_nodata_1['date'] = pd.to_datetime(df_filtered_without_nodata_1[['year', 'month']].assign(DAY=1))

In [40]:
df_filtered_without_nodata_2 = df_filtered_without_nodata_1.loc[(df_filtered_without_nodata_1['date'] >= '20130101') &
                                                                (df_filtered_without_nodata_1['date'] <= '20231001')]

In [41]:
# show the dataframe
df_filtered_without_nodata_2.head()

Unnamed: 0,PMID,Title,Abstract,Journal,Language of Publication,year,month,Publication_Type,ELocationID,date
0,29760448,The gut microbiota and the brain-gut-kidney ax...,Crosstalk between the gut microbiota and the h...,Nature reviews. Nephrology,eng,2018,7,"Journal Article, Review",10.1038/s41581-018-0018-2,2018-07-01
1,34642783,The mucosal immune system and IgA nephropathy.,The precise pathogenesis of immunoglobulin A n...,Seminars in immunopathology,eng,2021,10,"Journal Article, Research Support, Non-U.S. Go...",10.1007/s00281-021-00871-y,2021-10-01
2,34132361,Recent advances on the mechanisms of kidney st...,Kidney stone disease is one of the oldest dise...,International journal of molecular medicine,eng,2021,8,"Journal Article, Review","149, 10.3892/ijmm.2021.4982",2021-08-01
3,36127409,Mechanisms of the intestinal and urinary micro...,Kidney stone disease affects ~10% of the globa...,Nature reviews. Urology,eng,2022,12,"Journal Article, Review",10.1038/s41585-022-00647-5,2022-12-01
4,27847292,The role of the microbiome in kidney stone for...,Nephrolithiasis is a complex disease of worldw...,"International journal of surgery (London, Engl...",eng,2016,12,"Journal Article, Review","10.1016/j.ijsu.2016.11.024, S1743-9191(16)31033-0",2016-12-01


In [42]:
# shape of the dataframe (rows, columns)
df_filtered_without_nodata_2.shape

(185, 10)

In [43]:
# join three dataframes

In [44]:
df_filtered_by_date = pd.concat([df_filtered_without_nodata_2, df_month_nodata_2, df_year_nodata], ignore_index=True, sort=False)

In [45]:
# show the dataframe
df_filtered_by_date.head()

Unnamed: 0,PMID,Title,Abstract,Journal,Language of Publication,year,month,Publication_Type,ELocationID,date
0,29760448,The gut microbiota and the brain-gut-kidney ax...,Crosstalk between the gut microbiota and the h...,Nature reviews. Nephrology,eng,2018,7,"Journal Article, Review",10.1038/s41581-018-0018-2,2018-07-01
1,34642783,The mucosal immune system and IgA nephropathy.,The precise pathogenesis of immunoglobulin A n...,Seminars in immunopathology,eng,2021,10,"Journal Article, Research Support, Non-U.S. Go...",10.1007/s00281-021-00871-y,2021-10-01
2,34132361,Recent advances on the mechanisms of kidney st...,Kidney stone disease is one of the oldest dise...,International journal of molecular medicine,eng,2021,8,"Journal Article, Review","149, 10.3892/ijmm.2021.4982",2021-08-01
3,36127409,Mechanisms of the intestinal and urinary micro...,Kidney stone disease affects ~10% of the globa...,Nature reviews. Urology,eng,2022,12,"Journal Article, Review",10.1038/s41585-022-00647-5,2022-12-01
4,27847292,The role of the microbiome in kidney stone for...,Nephrolithiasis is a complex disease of worldw...,"International journal of surgery (London, Engl...",eng,2016,12,"Journal Article, Review","10.1016/j.ijsu.2016.11.024, S1743-9191(16)31033-0",2016-12-01


In [46]:
# shape of the dataframe (rows, columns)
df_filtered_by_date.shape

(231, 10)

In [47]:
# filter the dataframe by study design

In [63]:
df_study_design = df_filtered_by_date.copy()

In [64]:
df_study_design['Publication_Type'].unique()

array(['Journal Article, Review',
       "Journal Article, Research Support, Non-U.S. Gov't, Review",
       "Journal Article, Research Support, Non-U.S. Gov't",
       'Journal Article, Research Support, N.I.H., Extramural, Review',
       "Journal Article, Review, Research Support, Non-U.S. Gov't",
       'Journal Article', 'Congress, Overall', 'Review, Journal Article',
       "Meta-Analysis, Systematic Review, Journal Article, Research Support, Non-U.S. Gov't",
       'Observational Study, Journal Article',
       'Review, Journal Article, Research Support, N.I.H., Extramural',
       "Journal Article, Research Support, N.I.H., Extramural, Research Support, Non-U.S. Gov't",
       'Editorial, Comment', 'Journal Article, Systematic Review',
       'Journal Article, Research Support, N.I.H., Extramural',
       'Journal Article, Comment',
       "Journal Article, Research Support, Non-U.S. Gov't, Research Support, N.I.H., Extramural",
       'Comparative Study, Journal Article',
    

In [65]:
df_filtered_by_publication_type = df_study_design[df_study_design['Publication_Type'].str. \
                                                  contains('''Review|Comment|Congress|Veterinary|Editorial|Meta-Analysis|Letter|Preprint|Case|Reports|Erratum|Abstract''') == False]

In [66]:
df_filtered_by_publication_type['Publication_Type'].unique()

array(["Journal Article, Research Support, Non-U.S. Gov't",
       'Journal Article', 'Observational Study, Journal Article',
       "Journal Article, Research Support, N.I.H., Extramural, Research Support, Non-U.S. Gov't",
       'Journal Article, Research Support, N.I.H., Extramural',
       "Journal Article, Research Support, Non-U.S. Gov't, Research Support, N.I.H., Extramural",
       'Comparative Study, Journal Article',
       'Comparative Study, Journal Article, Observational Study, Research Support, N.I.H., Extramural'],
      dtype=object)

In [67]:
# show the dataframe
df_filtered_by_publication_type.head()

Unnamed: 0,PMID,Title,Abstract,Journal,Language of Publication,year,month,Publication_Type,ELocationID,date
8,35264456,Intestinal Bacterial Translocation Contributes...,"In recent years, many studies have focused on ...",Journal of the American Society of Nephrology ...,eng,2022,6,"Journal Article, Research Support, Non-U.S. Gov't",10.1681/ASN.2021060843,2022-06-01
14,35259481,Pharmacological targeting macrophage phenotype...,Renal fibrosis is a non-negligible pathologica...,Pharmacological research,eng,2022,4,"Journal Article, Research Support, Non-U.S. Gov't","10.1016/j.phrs.2022.106161, S1043-6618(22)00106-2",2022-04-01
18,36371056,Sulforaphane-driven reprogramming of gut micro...,"Currently, revealing how to prevent and contro...",Journal of advanced research,eng,2023,10,Journal Article,"10.1016/j.jare.2022.11.003, S2090-1232(22)00251-X",2023-10-01
19,35843568,Lacticaseibacillus rhamnosus Fmb14 prevents pu...,Hyperuricemia is a critical threat to human he...,Pharmacological research,eng,2022,8,"Journal Article, Research Support, Non-U.S. Gov't","10.1016/j.phrs.2022.106350, S1043-6618(22)00295-X",2022-08-01
21,34989745,Punicalagin alleviates renal injury <i>via</i>...,Diabetic renal injury was associated with dysb...,Food & function,eng,2022,1,Journal Article,10.1039/d1fo03343c,2022-01-01


In [68]:
# shape of the dataframe (rows, columns)
df_filtered_by_publication_type.shape

(114, 10)

In [69]:
# sort and format the dataframe

In [70]:
df_sorted = df_filtered_by_publication_type.copy()

In [71]:
df_sorted = df_sorted.sort_values(by=['date', 'year']).reset_index(drop = True)

In [72]:
df_formatted = df_sorted.copy()

In [73]:
df_formatted.rename(columns={'year': 'Year of Publication'}, inplace=True)
df_formatted.rename(columns={'month': 'Month of Publication'}, inplace=True)
df_formatted.rename(columns={'Publication_Type': 'Publication Type'}, inplace=True)

In [74]:
# full dataframe
df_full = df_formatted.copy()

In [75]:
# show the dataframe
df_full.head()

Unnamed: 0,PMID,Title,Abstract,Journal,Language of Publication,Year of Publication,Month of Publication,Publication Type,ELocationID,date
0,23392537,Diet and renal stone formation.,The relationship between diet and the formatio...,Minerva medica,eng,2013,2,Journal Article,,2013-02-01
1,23408055,Melamine-induced renal toxicity is mediated by...,Melamine poisoning has become widely publicize...,Science translational medicine,eng,2013,2,"Journal Article, Research Support, Non-U.S. Gov't",10.1126/scitranslmed.3005114,2013-02-01
2,25459132,Helicobacter pylori and urinary system stones:...,Helicobacter pylori (H. pylori) is a atypical ...,Medical hypotheses,eng,2014,12,Journal Article,"10.1016/j.mehy.2014.09.016, S0306-9877(14)00341-7",2014-12-01
3,27115405,Evidence for a distinct gut microbiome in kidn...,The trillions of microbes that colonize our ad...,Urolithiasis,eng,2016,10,"Comparative Study, Journal Article",10.1007/s00240-016-0882-9,2016-10-01
4,27708409,Hyperoxaluria leads to dysbiosis and drives se...,Hyperoxaluria due to endogenously synthesized ...,Scientific reports,eng,2016,10,"Journal Article, Research Support, Non-U.S. Gov't","34712, 10.1038/srep34712",2016-10-01


In [76]:
# shape of the dataframe (rows, columns)
df_full.shape

(114, 10)

In [62]:
# write the dataframe to excel
df_full.to_excel('results.xlsx', columns=['PMID', 'Title', 'Abstract', 'Journal', 'Language of Publication', 
                                                    'Year of Publication', 'Month of Publication', 'Publication Type', 'ELocationID'],
                 index=False)