In [1]:
from Bio import Entrez
import pandas as pd
import json

In [2]:
# list of scientists and year they started at IMHR
scientists = {'Lauri Tuominen': 2018, 'Synthia Guimond': 2018, 'Sara Tremblay': 2018, 'Robyn McQuaid': 2018,
             'Clifford Cassidy': 2016, 'Natalia Jaworska': 2018, 'Jennifer L. Phillips' : 2007 , 'Georg Northoff': 2009,
            'Michael Seto' : 2009 , 'Michael Bodnar': 2018, 'Stuart Fogel': 2017, 'Rebecca Robillard': 2018,
             'Zachary Kaminsky': 2018, 'Pierre Blier': 2004, 'Reggie Taylor': 2018, 
              'Avery Berman': 2022, 'Andrew A. Nicholson': 2022}

In [3]:
Entrez.email = 'tuominenlj@gmail.com'

In [68]:
def searchPubMed(scientists): 
    search_results = {} 
    
    for scientist in scientists.keys():
    
        handle = Entrez.esearch(db='pubmed', 
                                sort='relevance', 
                                retmax='500',
                                retmode='xml', 
                                term=f'{scientist}[Author]')
        search_results[scientist]  = Entrez.read(handle)
    return search_results 

def fetch_publications(single_scientist):
    id_list = single_scientist['IdList']
    ids = ','.join(id_list)
    handle = Entrez.efetch(db='pubmed',
                           retmode='xml',
                           id=ids)
    publications = Entrez.read(handle)
    return publications

def flatten_publication(publication):
    
    out = {}

    def flatten(x, name=''):
        if type(x) is dict:
            for a in x:
                flatten(x[a], name + a + '_')
        elif type(x) is list:
            i = 0
            for a in x:
                flatten(a, name + str(i) + '_')
                i += 1
        else:
            out[name[:-1]] = x

    pub = json.dumps(publication)
    pub = json.loads(pub)
    flatten(pub)
    return out

In [74]:
# search pids for all scientists 
search_results = searchPubMed(scientists)

# use pids to get publications for a single scientist at a time 
scientist = list(search_results.keys())[0]
pids = search_results[scientist]
publications_scientist1 = fetch_publications(pids)



In [76]:
# for one publication of all the publications of that scientist, flatten for entry

f= flatten_publication(publications_scientist1['PubmedArticle'][0])


In [79]:
scientist

'Lauri Tuominen'

In [92]:
dict= {'Scientist': scientist, 'PMID':f['MedlineCitation_PMID'], 'Title': f['MedlineCitation_Article_ArticleTitle'], 
       'Year': f['MedlineCitation_Article_ArticleDate_0_Year'],'Journal': f['MedlineCitation_Article_Journal_Title']
      }
df = pd.DataFrame([dict])
df

Unnamed: 0,Scientist,PMID,Title,Year,Journal
0,Lauri Tuominen,28394427,Opioid system and human emotions.,2017,British journal of pharmacology


In [91]:
# n_authors 
# list of authors 
# author position 
# affiliation 

[key for key in f.keys() if 'MedlineCitation_Article_AuthorList' in key]

['MedlineCitation_Article_AuthorList_0_Identifier_0',
 'MedlineCitation_Article_AuthorList_0_AffiliationInfo_0_Affiliation',
 'MedlineCitation_Article_AuthorList_0_LastName',
 'MedlineCitation_Article_AuthorList_0_ForeName',
 'MedlineCitation_Article_AuthorList_0_Initials',
 'MedlineCitation_Article_AuthorList_1_AffiliationInfo_0_Affiliation',
 'MedlineCitation_Article_AuthorList_1_LastName',
 'MedlineCitation_Article_AuthorList_1_ForeName',
 'MedlineCitation_Article_AuthorList_1_Initials']

In [93]:
f['MedlineCitation_Article_AuthorList_0_AffiliationInfo_0_Affiliation']

'Turku PET Centre and Department of Psychology, University of Turku, Turku, Finland.'