# PubMed PhD Supervisor Search

## Load Packages

In [1]:
import re
import ast
import pandas as pd
from functions import get_article_ids
from collections import Counter
from tqdm.notebook import tqdm
from geotext import GeoText


In [168]:
def get_locations(affiliations):
    locations = []
    for affiliation in affiliations:
        places = GeoText(affiliation)
        if places:
            response = list(set(list(places.countries) + list(places.cities)))
            locations = locations + response
        else:
            pass
        if 'University' in locations: locations.remove('University')
        return locations
    
def get_location(affiliation):
    locations = []
    #for affiliation in affiliations:
    places = GeoText(affiliation)
    if places:
        city_str = ' '.join(set([city for city in places.cities if city != 'University'])).strip()
        locations = f"{city_str}, {places.countries[0]}"
        #locations = locations + response
    else:
        pass
    return locations

In [169]:
get_location('Department of Psychiatry, University of Toronto, Toronto, ON, Canada.')

'Toronto, Canada'

## Set Variables

In [170]:
query = 'Herscot Center'
loi = ['united states', 'france', 'netherlands', 'denmark', 'sweden', 'germany', 'switzerland', 'norway', 
                   'finland', 'luxembourg', 'belgium', 'austria', 'cambridge', 'oxford', 'london']

## Fetch Entries from PubMed

In [173]:
response = get_article_ids(query, sort = 'relevance', from_year = 2010, api_key="9f66a38099f29d882365afb5ea170b1ef608")
papers_result = response[0].to_dict('records')
affiliations_result = response[1]

Query:Herscot Center
Number of Results: 22


HBox(children=(FloatProgress(value=0.0, description='Downloading Herscot Center data in chunks : ', max=1.0, s…




In [174]:
papers_result

[{'title': 'Juvenile cataract in association with tuberous sclerosis complex.',
  'pmid': '32340510',
  'keywords': ['Tuberous sclerosis complex', 'cataract', 'juvenile'],
  'pub_type_list': ['Journal Article'],
  'journal_info_list': ['Ophthalmic genetics',
   '1744-5094',
   'Electronic',
   'Ophthalmic Genet.'],
  'author_list': [['A L', 'AL', 'Geffrey'],
   ['K R', 'KR', 'Geenen'],
   ['E', 'E', 'Abati'],
   ['S H', 'SH', 'Greenstein'],
   ['D K', 'DK', 'VanderVeen'],
   ['R L', 'RL', 'Levy'],
   ['S L', 'SL', 'Davidson'],
   ['M P', 'MP', 'McGarrey'],
   ['E A', 'EA', 'Thiele'],
   ['M E', 'ME', 'Aronow']],
  'affil_list': ['Massachusetts Eye and Ear, Harvard Medical School, Boston, Massachusetts, USA.'],
  'pubdate': '2020',
  'link': 'https://www.ncbi.nlm.nih.gov/pubmed/32340510',
  'abstract': [None]},
 {'title': 'A telehealth approach to improving clinical trial access for infants with tuberous sclerosis complex.',
  'pmid': '31969108',
  'keywords': ['Autism spectrum disorder

In [89]:
affiliations_result

[{'author': ['Annelise', 'A', 'Madison'],
  'affiliation': ['Institute for Behavioral Medicine Research, The Ohio State University College of Medicine, United States.',
   'Department of Psychology, The Ohio State University, United States.'],
  'pmid': '32395568'},
 {'author': ['Janice K', 'JK', 'Kiecolt-Glaser'],
  'affiliation': ['Institute for Behavioral Medicine Research, The Ohio State University College of Medicine, United States.',
   'Department of Psychiatry and Behavioral Health, The Ohio State University College of Medicine, United States.'],
  'pmid': '32395568'},
 {'author': ['Craig', 'C', 'Friesen'],
  'affiliation': ["Division of Gastroenterology, Hepatology, and Nutrition, Children's Mercy Kansas City, 2401 Gillham Road, Kansas City, MO, 64108, USA. cfriesen@cmh.edu."],
  'pmid': '32393272'},
 {'author': ['Meenal', 'M', 'Singh'],
  'affiliation': ["Division of Gastroenterology, Hepatology, and Nutrition, Children's Mercy Kansas City, 2401 Gillham Road, Kansas City, MO,

## Create Tables

In [120]:
import itertools
#Create Author List
def create_author_affil_list(papers_result):
    authors = []
    for paper_dictionary in papers_result:
        for author, affil in itertools.product(paper_dictionary['author_list'], paper_dictionary['affil_list']):
            try:
                authors.append({'author_list' : author, 
                                'author_string' : author[2] + ", " + author[0],
                                'affiliation' : affil, 
                                'locations' : get_location(affil), 
                                'title' : paper_dictionary['title'],
                                'pmid' : paper_dictionary['pmid']
                                
                               })             
            except Exception as err:
                print(err)
                pass
    return authors

authors_affils = create_author_affil_list(papers_result)
authors_affils

[{'author_list': ['Annelise', 'A', 'Madison'],
  'author_string': 'Madison, Annelise',
  'affiliation': 'Institute for Behavioral Medicine Research, The Ohio State University College of Medicine, United States.',
  'locations': ['United States'],
  'title': 'Stress, depression, diet, and the gut microbiota: human-bacteria interactions at the core of psychoneuroimmunology and nutrition.',
  'pmid': '32395568'},
 {'author_list': ['Annelise', 'A', 'Madison'],
  'author_string': 'Madison, Annelise',
  'affiliation': 'Department of Psychiatry and Behavioral Health, The Ohio State University College of Medicine, United States.',
  'locations': ['United States'],
  'title': 'Stress, depression, diet, and the gut microbiota: human-bacteria interactions at the core of psychoneuroimmunology and nutrition.',
  'pmid': '32395568'},
 {'author_list': ['Janice K', 'JK', 'Kiecolt-Glaser'],
  'author_string': 'Kiecolt-Glaser, Janice K',
  'affiliation': 'Institute for Behavioral Medicine Research, The 

In [9]:
#Create Affiliations List
def create_affil_list(affiliations_result):
    affiliations = []
    for affiliation in affiliations_result:
        try:
            author_string = affiliation['author'][2] + ', ' + affiliation['author'][0]
            affiliations.append({'author' : str(author_string), 
                                 'affiliation' : affiliation['affiliation'], 
                                 'locations' : get_locations(affiliation['affiliation']), 
                                 'pmid' : affiliation['pmid']})
        except:
            print(affiliation)
            pass
    return affiliations

affiliations = create_affil_list(affiliations_result)

In [133]:
def filter_affiliations_by_location(affiliations_by_author, locations_of_interest = loi):
    """
    Given a list of author's affiliations and the user's locations of interest, filter the affiliations
    to only contain locations the user is interested in.
    
    Args:
        affiliations_by_author - 
        locations_of_interest - List: Hardcoded list of interesting locations
    Returns:
        
    """
    keep_affiliations_by_author = []
    for author in affiliations_by_author:
        for affiliation in [d['affiliation'] for d in author['affiliations']]:
            for location in locations_of_interest:
                if location.lower() in affiliation.lower():
                    keep_affiliations_by_author.append(author)
                    
    return keep_affiliations_by_author

def map_author_to_affil(authors_affils, locations_of_interest = loi):
    """
    Get `n` most common authors, find their top 3 affiliations and their geographic locations. 
    
    Filter list of authors by whether one of their affiliation locations lands in a location_of_interest
    
    Args:
    
    Returns:
        List - List elements are dictionaries containing data on an author's top 3 affiliations 
            if they land in the supplied `locations_of_interest`. 
    """
    author_affil_df = pd.DataFrame(authors_affils)
    top_authors = dict(Counter(author_affil_df['author_string']).most_common(200))
    #Get Affiliations for Top 200 Authors
    affiliations_by_author = []
    ### Count up most common authors
    author_list = list(top_authors.keys())
    
    for author_name in author_list[:5]:
        matching_affiliations = []
        
        matching_affiliations = [paper_author_affil.get('locations') for paper_author_affil in authors_affils \
                                 if paper_author_affil.get('locations') and paper_author_affil.get('author_string') == author_name]
        matching_affiliations = [item for sublist in matching_affiliations for item in sublist]
        """
        for entry in affiliations:
            if author_name == entry['author']:
                if entry.get('locations'):
                    for location in entry['locations']:
                        matching_affiliations.append(location)
                else:
                    matching_affiliations.append('none')
        """
        affs = Counter(matching_affiliations)
        
        reformatted_affs = []
        for aff in affs.most_common(3):
            reformatted_affs.append({'affiliation' : aff[0], 'count' : aff[1]})
        print(reformatted_affs)
        affiliations_by_author.append({'author' : author_name, 
                                       'total_papers' : sum(list(affs.values())),
                                       'affiliations' : reformatted_affs})

    #Filter authors down to authors matching locations of interest
    #keep_affiliations_by_author = filter_affiliations_by_location(affiliations_by_author, locations_of_interest = loi)
                
    return affiliations_by_author, top_authors

affiliations_by_author, top_authors = map_author_to_affil(authors_affils)

[{'affiliation': 'Australia', 'count': 33}, {'affiliation': 'Thailand', 'count': 27}, {'affiliation': 'Geelong', 'count': 27}]
[{'affiliation': 'Australia', 'count': 43}, {'affiliation': 'Geelong', 'count': 20}, {'affiliation': 'Melbourne', 'count': 20}]
[{'affiliation': 'Toronto', 'count': 24}, {'affiliation': 'Canada', 'count': 24}, {'affiliation': 'Ontario', 'count': 6}]
[{'affiliation': 'Australia', 'count': 14}, {'affiliation': 'Geelong', 'count': 10}, {'affiliation': 'Thailand', 'count': 9}]
[{'affiliation': 'Geelong', 'count': 8}, {'affiliation': 'Australia', 'count': 8}, {'affiliation': 'Brazil', 'count': 8}]


In [135]:
affiliations_by_author

[{'author': 'Maes, Michael',
  'total_papers': 219,
  'affiliations': [{'affiliation': 'Australia', 'count': 33},
   {'affiliation': 'Thailand', 'count': 27},
   {'affiliation': 'Geelong', 'count': 27}]},
 {'author': 'Berk, Michael',
  'total_papers': 137,
  'affiliations': [{'affiliation': 'Australia', 'count': 43},
   {'affiliation': 'Geelong', 'count': 20},
   {'affiliation': 'Melbourne', 'count': 20}]},
 {'author': 'McIntyre, Roger S',
  'total_papers': 67,
  'affiliations': [{'affiliation': 'Toronto', 'count': 24},
   {'affiliation': 'Canada', 'count': 24},
   {'affiliation': 'Ontario', 'count': 6}]},
 {'author': 'Kanchanatawan, Buranee',
  'total_papers': 80,
  'affiliations': [{'affiliation': 'Australia', 'count': 14},
   {'affiliation': 'Geelong', 'count': 10},
   {'affiliation': 'Thailand', 'count': 9}]},
 {'author': 'Carvalho, André F',
  'total_papers': 68,
  'affiliations': [{'affiliation': 'Geelong', 'count': 8},
   {'affiliation': 'Australia', 'count': 8},
   {'affiliatio

In [136]:

sorted(affiliations_by_author, key=lambda k: k['total_papers'], reverse=True)

[{'author': 'Maes, Michael',
  'total_papers': 219,
  'affiliations': [{'affiliation': 'Australia', 'count': 33},
   {'affiliation': 'Thailand', 'count': 27},
   {'affiliation': 'Geelong', 'count': 27}]},
 {'author': 'Berk, Michael',
  'total_papers': 137,
  'affiliations': [{'affiliation': 'Australia', 'count': 43},
   {'affiliation': 'Geelong', 'count': 20},
   {'affiliation': 'Melbourne', 'count': 20}]},
 {'author': 'Kanchanatawan, Buranee',
  'total_papers': 80,
  'affiliations': [{'affiliation': 'Australia', 'count': 14},
   {'affiliation': 'Geelong', 'count': 10},
   {'affiliation': 'Thailand', 'count': 9}]},
 {'author': 'Carvalho, André F',
  'total_papers': 68,
  'affiliations': [{'affiliation': 'Geelong', 'count': 8},
   {'affiliation': 'Australia', 'count': 8},
   {'affiliation': 'Brazil', 'count': 8}]},
 {'author': 'McIntyre, Roger S',
  'total_papers': 67,
  'affiliations': [{'affiliation': 'Toronto', 'count': 24},
   {'affiliation': 'Canada', 'count': 24},
   {'affiliation

In [137]:
def author_affil_total_df(affiliations_by_author, n=25):
    """
    Combines data from previous steps to generate a dataframe of top publishing authors and their most frequently
    cited affiliations.
    """
    #Get top 25 from remaining authors
    affiliations_by_author_df = pd.DataFrame(affiliations_by_author)

    ### affiliations_by_author_df['totalPapers'] = affiliations_by_author_df['author'].map(top_authors)
    ### Removed because occasionally totalPapers was < sum of counts of topAffiliations which seems like a no-no
    ### `total_papers` is calculated in the `map_author_to_affil()` function and is calculated before 
    ### undesirable locations are filtered out
    affiliations_by_author_df['totalPapers'] = affiliations_by_author_df['total_papers']

    paper_counts_affiliations = affiliations_by_author_df.to_dict('records')

    flat_dict = []
    for author in paper_counts_affiliations:
        topAffiliations = []
        topAffiliations = {affiliation['affiliation'] : str(affiliation['count']) for affiliation in \
                           author['affiliations']}
        
        flat_dict.append({'author' : author['author'],
                          'topAffiliations' : topAffiliations,
                          'totalPapers' : author['totalPapers']})

    out_df = pd.DataFrame(flat_dict).sort_values(by='totalPapers', ascending=False).reset_index(drop=True)
    return out_df.head(n)
    
author_affil_total_df(affiliations_by_author)

Unnamed: 0,author,topAffiliations,totalPapers
0,"Maes, Michael","{'Australia': '33', 'Thailand': '27', 'Geelong...",219
1,"Berk, Michael","{'Australia': '43', 'Geelong': '20', 'Melbourn...",137
2,"Kanchanatawan, Buranee","{'Australia': '14', 'Geelong': '10', 'Thailand...",80
3,"Carvalho, André F","{'Geelong': '8', 'Australia': '8', 'Brazil': '8'}",68
4,"McIntyre, Roger S","{'Toronto': '24', 'Canada': '24', 'Ontario': '6'}",67


In [138]:
def get_authors_papers(author_of_interest, authors, papers_result):
    matching_pmids = []
    ### Find an authors PMIDs
    for author in authors:
        if author['author_string'] == author_of_interest:
            matching_pmids.append(author['pmid'])
    
    matching_papers = []
    for paper in papers_result:
        if paper['pmid'] in matching_pmids:
            matching_papers.append(paper)

    papers_sample = pd.DataFrame(matching_papers).to_dict('records')
    matching_papers_df = pd.DataFrame(matching_papers)
    matchedPapers = matching_papers_df.drop(columns=['pub_type_list', 'journal_info_list', 'author_list', 'keywords'])
    matchedPapers_dicts = matchedPapers.to_dict('records')
    
        
    return matchedPapers_dicts

#Get papers for top 25 authors
paper_top_author_list = []
for author_of_interest in top_authors:
    matchedPapers_dicts = get_authors_papers(author_of_interest, authors, papers_result)
    for matchedPaper in matchedPapers_dicts:
        top_paper = {'author' : author_of_interest, 
                     'title' : matchedPaper['title'], 
                     'pubdate' : matchedPaper['pubdate'], 
                     'link' : matchedPaper['link'], 
                     'pmid' : matchedPaper['pmid']}
        paper_top_author_list.append(top_paper)

paper_top_author_df = pd.DataFrame(paper_top_author_list)

affiliations_df = pd.DataFrame(affiliations)[['author', 'affiliation', 'pmid']]

big_df = pd.merge(paper_top_author_df, affiliations_df, on = ['author', 'pmid'])
out_dict = {}
for author_dict in affiliations_by_author:
    out_dict[author_dict['author']] = {'author' : author_dict['author'], 
                         'total_count' : author_dict['total_papers'],
                         'locations' : str([d['affiliation'] for d in author_dict['affiliations']]),
                           'paper_data' : big_df.loc[big_df['author'] == author_dict['author'], :].to_dict('records')
                       }
    
    
out_dict

{'Maes, Michael': {'author': 'Maes, Michael',
  'total_count': 219,
  'locations': "['Australia', 'Thailand', 'Geelong']",
  'paper_data': [{'author': 'Maes, Michael',
    'link': 'https://www.ncbi.nlm.nih.gov/pubmed/32335809',
    'pmid': '32335809',
    'pubdate': '2020',
    'title': 'Major Depression in Children with Transfusion-Dependent Thalassemia Is Strongly Associated with the Combined Effects of Blood Transfusion Rate, Iron Overload, and Increased Pro-inflammatory Cytokines.',
    'affiliation': ['Department of Psychiatry, Faculty of Medicine, King Chulalongkorn Memorial Hospital, Bangkok, Thailand. dr.michaelmaes@hotmail.com.',
     'Department of Psychiatry, Medical University of Plovdiv, Plovdiv, Bulgaria. dr.michaelmaes@hotmail.com.',
     'IMPACT Strategic Research Centre, Deakin University, PO Box 281, Geelong, VIC, 3220, Australia. dr.michaelmaes@hotmail.com.']},
   {'author': 'Maes, Michael',
    'link': 'https://www.ncbi.nlm.nih.gov/pubmed/32217347',
    'pmid': '322

In [76]:
affiliations_by_author

[{'author': 'Pariante, Carmine M',
  'total_papers': 22,
  'affiliations': [{'affiliation': 'London', 'count': 15},
   {'affiliation': 'United Kingdom', 'count': 5},
   {'affiliation': 'none', 'count': 1}]},
 {'author': 'Penninx, Brenda W J H',
  'total_papers': 16,
  'affiliations': [{'affiliation': 'Amsterdam', 'count': 12},
   {'affiliation': 'Netherlands', 'count': 3},
   {'affiliation': 'none', 'count': 1}]},
 {'author': 'Fuchs, Dietmar',
  'total_papers': 22,
  'affiliations': [{'affiliation': 'Austria', 'count': 11},
   {'affiliation': 'Innsbruck', 'count': 11}]},
 {'author': 'Lamers, Femke',
  'total_papers': 11,
  'affiliations': [{'affiliation': 'Amsterdam', 'count': 8},
   {'affiliation': 'Netherlands', 'count': 2},
   {'affiliation': 'none', 'count': 1}]},
 {'author': 'Khandaker, Golam M',
  'total_papers': 14,
  'affiliations': [{'affiliation': 'Cambridge', 'count': 8},
   {'affiliation': 'Peterborough', 'count': 6}]},
 {'author': 'Milaneschi, Yuri',
  'total_papers': 10,


## Save results

In [None]:
#Save papers by author in order of total number of papers
lengths = []
for key, df in big_df_grouped:
    df = df[['author', 'pmid']].drop_duplicates()
    lengths.append({'key' : key, 'length' : df.shape[0]})

lengths = sorted(lengths, key=lambda k: k['length'], reverse=True) 

filename = query + '_TopAuthors.xlsx'
writer = pd.ExcelWriter(filename)

final_authors_df.to_excel(writer, 'LocationsByAuthor')

for entry in lengths[0:20]:
    for key, df in big_df_grouped:
        if key == entry['key']:
            loc_list = ast.literal_eval(key[1])
            locs = []
            for loc in loc_list:
                if loc != 'none':
                    locs.append(loc)
            loc_string = '-'.join(locs)
            key_string = key[0] + ' ' + str(key[2])
            trimmed_df = df[['title', 'pubdate', 'link', 'affiliation']]
            trimmed_df['affiliation'] = trimmed_df['affiliation'].apply(' -- '.join)
            trimmed_df = trimmed_df.drop_duplicates()
            trimmed_df.to_excel(writer, key_string)
        else: pass
    
writer.save()

### BREAK

In [None]:
# from fuzzywuzzy import fuzz
# import copy
# ### If you remove this, it edits the original `paper_counts_affiliations` list
# paper_counts_affiliations_copy = copy.deepcopy(paper_counts_affiliations)

# fuzzy_threshold = 90

# out_author_affils = []
# for author_index, author_dict in enumerate(paper_counts_affiliations_copy):
#     author = paper_counts_affiliations_copy[author_index]['author']
#     main_affil = paper_counts_affiliations_copy[author_index]['affiliations'][0]
#     main_affil_count = main_affil['count']
#     out_affils = []
    
#     for other_affil in paper_counts_affiliations_copy[author_index]['affiliations'][1:]:
#         if fuzz.partial_ratio(main_affil['affiliation'].lower(), \
#                               other_affil['affiliation'].lower()) >= fuzzy_threshold:
#             main_affil_count = main_affil_count + other_affil['count']
#         else:
#             out_affils.append(other_affil)
#     main_affil['count'] = main_affil_count
#     out_affils = [main_affil] + out_affils
#     out_author_affils.append({'author' : author, 'affiliations' : out_affils})
    
# out_author_affils

In [None]:
#Get matching papers
author_of_interest = top_authors_df.loc[1]['author']
print('Papers Matching: ' + author_of_interest)
matching_pmids = []
for author in authors:
    if author['author_string'] == author_of_interest:
        matching_pmids.append(author['pmid'])
        
matching_papers = []
for paper in res_dicts:
    if paper['pmid'] in matching_pmids:
        matching_papers.append(paper)
        
papers_sample = pd.DataFrame(matching_papers).to_dict('records')
matching_papers_df = pd.DataFrame(matching_papers)[0:10]
matchedPapers = matching_papers_df.drop(columns=['pub_type_list', 'journal_info_list', 'author_list', 'link', 'keywords'])
top_papers

In [None]:
#Get matching affiliations
matching_affiliations = []
for author in result[1]:
    author_name = author['author'][2] + ', ' + author['author'][0]
    if author_name == author_of_interest:
        for affiliation in author['affiliation']:
            matching_affiliations.append(affiliation)
print('Most Common Affiliations Matching: ' + author_of_interest)
affs = Counter(matching_affiliations)
d = dict(Counter(affs).most_common(5))
pd.DataFrame.from_dict(d, orient='index').reset_index().rename(columns={'index':'Affiliation', 0:'Count'})

In [None]:
#Journals the author has published in
journals = []
for paper in papers_sample:
    journals.append(paper['journal_info_list'][0])
print('Most Common Journals Matching: ' + author_of_interest)
d = dict(Counter(journals).most_common(5))
pd.DataFrame.from_dict(d, orient='index').reset_index().rename(columns={'index':'Journal', 0:'Count'})

In [None]:
#Keywords related to the author
keywords = []
for paper in papers_sample:
    if len(paper['keywords']) > 0:
        for i in paper['keywords']:
            keywords.append(i)
print('Keywords Matching: ' + author_of_interest)
d = dict(Counter(keywords).most_common(25))
pd.DataFrame.from_dict(d, orient='index').reset_index().rename(columns={'index':'Journal', 0:'Count'})

In [None]:
#author_of_interest = top_authors_df.loc[0]['author']
authors_info = []
for author_of_interest in list(top_authors_df['author']):
    matching_pmids = []
    for author in authors:
        if author['author_string'] == author_of_interest:
            matching_pmids.append(author['pmid'])

    matching_papers = []
    for paper in res_dicts:
        if paper['pmid'] in matching_pmids:
            matching_papers.append(paper)

    papers_sample = pd.DataFrame(matching_papers).to_dict('records')

    #Keywords related to the author
    keywords = []
    for paper in papers_sample:
        if len(paper['keywords']) > 0:
            for i in paper['keywords']:
                keywords.append(i)
    d = dict(Counter(keywords).most_common(25))
    keywords = list(pd.DataFrame.from_dict(d, orient='index').reset_index().rename(columns={'index':'Keyword', 0:'Count'})['Keyword'])
    
    #Get matching affiliations
    matching_affiliations = []
    for author in result[1]:
        author_name = author['author'][2] + ', ' + author['author'][0]
        if author_name == author_of_interest:
            for affiliation in author['affiliation']:
                matching_affiliations.append(affiliation)
    affs = Counter(matching_affiliations)
    d = dict(Counter(affs).most_common(5))
    aff_terms = []
    for i in pd.DataFrame.from_dict(d, orient='index').reset_index().rename(columns={'index':'Affiliation', 0:'Count'})['Affiliation']:
        aff_terms = aff_terms + [d.lower().strip() for d in i.split(',')]
    aff_terms = list(set(aff_terms))
    
    authors_info.append({'author' : author_of_interest , 'keywords' : keywords, 'aff_terms' : aff_terms})

In [None]:
def search_author(keywords, location = []):
    accept = []
    for author in authors_info:
        for keyword in keywords:
            if keyword.lower() in [d.lower() for d in author['keywords']]:
                if location:
                    for term in author['aff_terms']:
                            if location in term:
                                if author not in accept:
                                    accept.append(author)
                else:
                     if author not in accept:
                        accept.append(author)   
    return pd.DataFrame(accept)

In [None]:
search_author(['CD-8', 'Depression', 'Anxiety', 'neuroimmunology'], 'boston')

In [None]:
pd.DataFrame(authors_info)