In [1]:
# requires installing Biopython (using pip) to be able to ping PubMed
#MEDLINE data element fields: https://www.nlm.nih.gov/bsd/mms/medlineelements.html


In [1]:
from Bio import Entrez #use this library to access PubMed database and its Medline records
import re #to help with text searching mesh terms
import pandas as pd #to export PMID's or PubMed Queries as needed
import plotly.express as px

from jupyter_dash import JupyterDash #use plotly + Dash for building dashboard instead of using default
import dash
import dash_core_components as dcc
import dash_html_components as html

In [2]:
Entrez.email = "sarah.ngo@ucsf.edu" #to let NLM know who I am

In [4]:
def get_links_term(term):
    links = Entrez.esearch(db="pubmed", retmax = 1000, term=term)
    record = Entrez.read(links)
    link_list = record['IdList']
    
    return link_list
    

def get_medline(pubmed_list):
    handle = Entrez.efetch(db='pubmed',id=pubmed_list, retmode='text', rettype='medline')
    return handle.read()


def get_mesh_terms(link_list):
    record = Entrez.efetch(db="pubmed", retmax = 1000, term=term)
    

query = input("what do you want to search for?")


#links_term = get_links_term(query)
pubmed_record = get_links_term(query)

pubmed_dict = {}
for record in pubmed_record:
    medline_record = get_medline(record)

    #cleaning the medline record
    refined_text = medline_record.split('\n')

    # using re + search() to search within string for substring  
    mesh = "MH "
    mesh_terms = [line.replace(mesh,'') for line in refined_text if re.search(mesh, line)] 
    pubmed_dict[record] = mesh_terms
    

    


what do you want to search for?P30 AG044281


In [5]:
pubmed_dict

{'33694088': [],
 '33404634': [' - Adaptation, Psychological',
  ' - Aged',
  ' - *COVID-19',
  ' - *Cognitive Dysfunction/epidemiology',
  ' - Humans',
  ' - Pandemics',
  ' - SARS-CoV-2',
  ' - United States/epidemiology'],
 '33185602': [],
 '33105281': [' - Aged',
  ' - Electronic Health Records/*standards',
  ' - Emergence Delirium/etiology/*prevention & control',
  ' - Female',
  ' - Humans',
  ' - Male',
  ' - Perioperative Care/methods/*standards',
  ' - Practice Guidelines as Topic/*standards',
  ' - Treatment Outcome'],
 '33105280': [' - Adult',
  ' - Aged',
  ' - Cohort Studies',
  ' - Electronic Health Records/*standards/trends',
  ' - Emergence Delirium/diagnosis/*etiology/*prevention & control',
  ' - Female',
  ' - Humans',
  ' - Male',
  ' - Middle Aged',
  ' - Perioperative Care/*standards/trends',
  ' - Reproducibility of Results',
  ' - Treatment Outcome'],
 '33049232': [],
 '33048142': [' - Adult',
  ' - Chronic Disease',
  ' - Humans',
  ' - *Palliative Care',
  ' -

In [6]:
#need to transpose df so arrays are all same length
df = pd.DataFrame.from_dict(pubmed_dict, orient='index')
df = df.transpose()
df


Unnamed: 0,33694088,33404634,33185602,33105281,33105280,33049232,33048142,33047812,32989731,32988850,...,23836120,23752793,23752708,23712743,23602308,23560514,23521614,23445498,23200188,21357812
0,,"- Adaptation, Psychological",,- Aged,- Adult,,- Adult,- *COVID-19,,,...,- Activities of Daily Living,- Aged,- Aged,- Adult,- Age Factors,- Aged,- *Activities of Daily Living,- Adult,- Advance Care Planning/*organization & admin...,- Caregivers/*psychology
1,,- Aged,,- Electronic Health Records/*standards,- Aged,,- Chronic Disease,- Career Mobility,,,...,- Aged,- Drug Prescriptions/*statistics & numerical ...,"- Emergency Service, Hospital/*statistics & n...",- *Advance Directives,- Aged/psychology/statistics & numerical data,- Communication,- Aged,- Diabetes Mellitus/*therapy,- Aged,- *Delivery of Health Care
2,,- *COVID-19,,- Emergence Delirium/etiology/*prevention & c...,- Cohort Studies,,- Humans,"- Faculty, Medical/education/*trends",,,...,"- Aged, 80 and over",- Drug Substitution/statistics & numerical data,- Female,"- Aged, 80 and over","- Aged, 80 and over",- Homeless Persons,"- Aged, 80 and over",- Humans,- *Attitude to Death,- Dementia/*therapy
3,,- *Cognitive Dysfunction/epidemiology,,- Female,- Electronic Health Records/*standards/trends,,- *Palliative Care,- Fellowships and Scholarships/*trends,,,...,- Disability Evaluation,- Female,- Homeless Persons/*statistics & numerical data,- Alzheimer Disease/*complications,- *Anhedonia,"- Hospitals, Veterans/*statistics & numerical...",- Cohort Studies,- Hypoglycemia/etiology/*prevention & control,- *Attitude to Health,- Humans
4,,- Humans,,- Humans,- Emergence Delirium/diagnosis/*etiology/*pre...,,- Quality of Life,- Geriatrics/education/*trends,,,...,- Disabled Persons/*statistics & numerical data,- Humans,- Humans,"- Conflict, Psychological",- Depression/*complications/diagnosis/mortality,- Humans,- Disabled Persons/*statistics & numerical data,- Practice Guidelines as Topic/standards,- California/epidemiology,- *Social Support
5,,- Pandemics,,- Male,- Female,,- *Terminal Care,- Humans,,,...,- Educational Status,- Male,- Male,- *Decision Making,- Disabled Persons/*psychology/statistics & n...,- Middle Aged,- Female,"- *Quality Indicators, Health Care",- Consensus,
6,,- SARS-CoV-2,,- Perioperative Care/methods/*standards,- Humans,,,"- Medical Staff, Hospital/education/*trends",,,...,- Female,- Pharmaceutical Preparations/*administration...,- Middle Aged,- Female,- Female,- Patient Readmission/*statistics & numerical...,- Geriatric Assessment/*statistics & numerica...,- Quality of Health Care/standards,- *Decision Making,
7,,- United States/epidemiology,,- Practice Guidelines as Topic/*standards,- Male,,,- Research Personnel/education/*trends,,,...,- Frail Elderly,- United States,- Neuropsychological Tests,- Hip Fractures/complications/*therapy,- Humans,"- Power, Psychological",- *Health Status,- Risk Factors,- Female,
8,,,,- Treatment Outcome,- Middle Aged,,,- SARS-CoV-2,,,...,- Humans,- United States Department of Veterans Affairs,,- Humans,"- Interview, Psychological","- Process Assessment, Health Care",- Humans,,- Focus Groups,
9,,,,,- Perioperative Care/*standards/trends,,,,,,...,- Income,- Veterans,,- Lung Neoplasms/secondary/*therapy,- Longitudinal Studies,- Quality Improvement,- Life Style,,- Humans,


In [7]:
#print df to csv just to test
df.to_csv('pubmedtest.csv', index=False)