<a href="https://colab.research.google.com/github/ericchagnon15/BERTeley/blob/main/Final%20Notebooks/TopicModel_RecEngine.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## install and imports

In [None]:
!pip install --upgrade spacy
!python -m spacy download en_core_web_lg
!pip install -U kaleido
!pip install octis
!pip install bertopic
# !python -m spacy download en_core_web_lg

## The following cell needs to be run twice

In [None]:
# This cell may need to be run twice
import pandas as pd
import numpy as np
import multiprocessing
import time
from google.colab import drive
import os
import joblib
from bertopic import BERTopic
from sentence_transformers import SentenceTransformer

import re
import string

from octis.evaluation_metrics.coherence_metrics import Coherence
from octis.evaluation_metrics.diversity_metrics import TopicDiversity
import gensim.corpora as corpora
from gensim.models.coherencemodel import CoherenceModel

import nltk
nltk.download('stopwords')
import spacy
from scipy import spatial

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
nltk.download('punkt')
import kaleido
nlp = spacy.load('en_core_web_lg')
multiprocessing.cpu_count()

## helper functions

In [4]:
def create_path_if_not_exists(datapath):
  '''
  If the given path does not exist, a directory is created at the location specified in the path

  Input: 
  datapath: string of the path you want to create

  Output:
  none
  '''

  if not os.path.exists(datapath):
      os.makedirs(datapath) 

#
# Removes to punctuation from the string in each row
# df: 
#

def remove_punct_df(row_string):

  '''
  Tokenizes each word in the dataframe and checks to see if the word is a punctuation character
  If there is any punctuation it is removed from the string

  Input: 
  row_string: single row from a dataframe that contains the text in the corpus, a single string

  Output:
  a string with no punctuation
  '''

  filt_combined = []
  for word in word_tokenize(row_string):

    if word.lower() not in string.punctuation:
      
      
      filt_combined.append(word.lower())
  filtered_ip= " ".join(filt_combined)

  return filtered_ip

def format_dataframe(df):

  '''
  The ALS data has some extra fields that are not needed, we only want to keep
  Authors, Pub Year, Research Area, Pub TYpe
  Create a new field 'Combined' which is the concatenation of the Title and Abstract
  Rows with NA or missing values are removed

  Input: 
  df: pandas dataframe containing the raw data downloaded from the ALS database

  Output:
  newly formated dataframe with proper fields
  '''

  # NA in title converted to blanks
  indices = df['Title'].isna()
  df.loc[indices,'Title'] = ""

  # NA in abstract converted to blanks
  indices = df['Abstract'].isna()
  df.loc[indices,'Abstract'] = ""

  # combined - title + abstract
  df['Combined'] = df['Title'] + " " + df['Abstract']

  # remove blanks
  df = df[df['Combined']!=" "]

  # keep only selected cols
  df_sel = df[['Authors','Pub Year','Research Area','Combined',"Pub TYpe"]]
  df_sel= df_sel.rename(columns={'Pub Year':'pub_year',"Research Area":"research_area","Authors":"authors","Combined":"combined","Pub TYpe":"pub_type"})

  combined = list(df_sel['combined'])

  # remove patterns
  pattern = r'<inf>|</inf>|<sup>|</sup>|inf|/inf'
  comb_clean = []

  for l in combined:
    mod_string = re.sub(pattern, '', l )
    comb_clean.append(mod_string)

  # merge back to df
  df_sel['combined'] = comb_clean

  # filter spurious years
  df_sel = df_sel[df_sel['pub_year']!='12.0.1.2']

  # convert years to int
  df_sel['pub_year'] = df_sel['pub_year'].astype(str).replace('\.0', '', regex=True).astype(int)
  # if year is 201, that is mistyped fom 2001
  df_sel[df_sel['pub_year']==201]['pub_year'] = 2001


  return df_sel



def lemma_spacy(row_string):

  '''
  This function utilizes the lemmatizer in the spacy package to lemmatize all the words in the list

  Input: 
  row_string: single row from a dataframe that contains the text in the corpus, a single string

  Output:
  a string with lemmatized words
  '''

  filt_combined = []
  for word in nlp(row_string):
    if word.lemma_ != '-PRON-' :
      filt_combined.append(word.lemma_)

  new_df = " ".join(filt_combined)

  return new_df  


def remove_stop_df(row_string, allow_abbrev = True):
  
  '''
  This function utilizes the stopwords in the nltk package to remove the stopwords from the string. 
  Some further steps were taken for the specific ALS use case including removal of words that do not contain a letter
  and words that are in the supplemental stopword list created after looking at initial outputs from early iterations of the topic model

  Input: 
  row_string: single row from a dataframe that contains the text in the corpus, a single string

  Output:
  a string with removed stopwords
  '''

  filt_combined = []
  als_stopwords = ['use', 'award', 'prize', 'academy', 'Thailand', 'high', 'scientist', 'medal', 'science',
                   'fellow', 'career', 'presentation', 'early', 'shirley', 'david', 'achievement']


  for word in word_tokenize(row_string):
    contains_letter = re.search('[a-zA-Z]', word) != None
    #if (word.lower() not in stopwords.words('english')) and (contains_letter) and (word.lower not in als_stopwords):
    if word.lower() not in stopwords.words('english'): #check nltk stopwords
      if re.search('[a-zA-Z]', word.lower()) != None: #check there is a letter
        if word.lower() not in als_stopwords: #check ALS stopwords

          if not allow_abbrev:
            if len(word.lower()) > 2:
              if word.lower() == "perovskites":
                filt_combined.append("perovskite")
              else:
                filt_combined.append(word)

          else:

            if word.lower() == "perovskites":
              filt_combined.append("perovskite")
            else:
              filt_combined.append(word)

    filtered_ip= " ".join(filt_combined)
  return filtered_ip




def calculate_metrics(topic_model, texts):
  '''
  calculates evaluation metrics for a given topic model
  in this case it calculates Topic Coherence and Topic Diversity
  Input
  topic_model: the BERTopic object the metrics will be calculated on
  texts: the list of documents the BERTopic model was trained on, this is a list of strings

  Output
  a list containing the metrics in the form [coherence_score, diversity_score]
  '''


  #octis requires the texts input be in the form of a list of list of strings
  octis_texts = [sentence.split() for sentence in list_text] 

  
  npmi = Coherence(texts = octis_texts, topk = 10, measure = 'c_npmi')
  topic_diversity = TopicDiversity(topk=10)

  # reformat the output of BERTopic to the proper format
  # {topics: [[topic, words, for, topic1], [topic, words, for, topic2], [etc, etc, etc]]}
  all_words = [word for words in octis_texts for word in words]

  #check to see if the topic word has a space, if it does then this is a bigram model
  if ' ' in topic_model.get_topic(0)[:1][0][0]:
    #print("Bigram")
    bertopic_topics = [
      [
      vals[0] if (vals[0].split()[0] in all_words or vals[0].split()[1] in all_words) else all_words[0] 
      for vals in topic_model.get_topic(i)[:10]
      ]
    for i in range(len(set(topics)) - 1)
    ]

    output_tm = {"topics": bertopic_topics}

    coherence_score = _calculate_coherence(topic_model, texts)

  #unigram
  else:
    #print("Unigram")
    bertopic_topics = [
                    [
                        vals[0] if vals[0] in all_words else all_words[0]
                        for vals in topic_model.get_topic(i)[:10]
                    ]
                    for i in range(len(set(topics)) - 1)
                ]
  
    output_tm = {"topics": bertopic_topics}

    coherence_score = npmi.score(output_tm)


  diversity_score = topic_diversity.score(output_tm)

  return {"Coherence": coherence_score, "Diversity": diversity_score}

  #return [coherence_score, diversity_score]


def _calculate_coherence(topic_model, texts):

  '''
  Calculates the coherence metric for bigrams


  Input
  topic_model: the BERTopic model object
  texts: list of documents used to train the BERTopic model, this is a list of strings

  Output
  returns the coherence score which ranges from [-1, 1]
  '''

  docs = texts

  # Preprocess Documents
  documents = pd.DataFrame({"Document": docs,
                            "ID": range(len(docs)),
                            "Topic": topics})
  documents_per_topic = documents.groupby(['Topic'], as_index=False).agg({'Document': ' '.join})
  cleaned_docs = topic_model._preprocess_text(documents_per_topic.Document.values)

  # Extract vectorizer and analyzer from BERTopic
  vectorizer = topic_model.vectorizer_model
  analyzer = vectorizer.build_analyzer()

  # Extract features for Topic Coherence evaluation
  words = vectorizer.get_feature_names()
  tokens = [analyzer(doc) for doc in cleaned_docs]
  dictionary = corpora.Dictionary(tokens)
  corpus = [dictionary.doc2bow(token) for token in tokens]
  topic_words = [[words for words, _ in topic_model.get_topic(topic)] 
                for topic in range(len(set(topics))-1)]

  # Evaluate
  coherence_model = CoherenceModel(topics=topic_words, 
                                  texts=tokens, 
                                  corpus=corpus,
                                  dictionary=dictionary, 
                                  coherence='c_npmi')
  coherence = coherence_model.get_coherence()
  return coherence


def flatten(t):
  '''
  

  Input: 
  t: 

  Output:
  
  '''
  
  return [item for sublist in t for item in sublist]


def get_authors(input_data,rep_docs):

  '''
  extract authors from the input data for the given documents(rep_docs)

  Input: 
  input_data: data containing information about the documents
  rep_docs: documents that you want to find the authors for


  Output: list of all unique authors that authored the documents in rep_docs
  
  '''
  # print("***Entered this function***")
  tt = input_data['combined'].to_list()

  auth_list= []
  for d in rep_docs:
    ind = tt.index(d)
    auth_str = input_data.authors.to_list()[ind]
    auth_el = auth_str.split(" ,")
    auth_list.append(auth_el)

  auth_list = flatten(auth_list)
  if '' in auth_list:
    auth_list.remove('')

  # unduplicate repeating authors 

  final_auth_list = list(set(auth_list))
  if '' in final_auth_list:
    final_auth_list.remove('')

  return final_auth_list



#
# for all topics, get respective docs and then find respective authors  
# topics: 
# input_data:  
#
def author_all_topics(topics,input_data):

  dict_df = {}

  # get a dict of all documents for each doc 
  topic_docs = {topic: [] for topic in set(topics)}
  for topic, doc in zip(topics, input_data['combined']):
      topic_docs[topic].append(doc)


  for i in range(len(set(topics))-1):
    author_list = get_authors(input_data,topic_docs[i])

    dict_df[i] = author_list

  # create df with topics and authors
  author_topics = pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in dict_df.items() ]))

  return author_topics,dict_df



#
# find closest author to each doc by using each doc as a search term
# relevant_docs: 
#

def closest_author_docs(relevant_docs):

  auth_list = []

  for doc in relevant_docs:
    similar_topics, similarity = topic_model.find_topics(doc, top_n=5)
    auth_list.append(dict_authors[similar_topics[0]])

  final_auth_list = list(set(flatten(auth_list)))


  return final_auth_list



In [8]:
def create_input_new_springer(df):
    

  # for title
  indices = df['Title'].isna()
  df.loc[indices,'Title'] = ""

  # for abstract
  indices = df['Abstract'].isna()
  df.loc[indices,'Abstract'] = ""

  # combined - title + abstract
  df['combined'] = df['Title'] + " " + df['Abstract']

  # remove blanks
  df = df[df['combined']!=" "]

  #keep only selected cols
  # df_sel = df[['Authors','Pub Year','Research Area','Combined',"Pub TYpe"]]
  # df_sel= df_sel.rename(columns={'Pub Year':'pub_year',"Research Area":"research_area","Authors":"authors","Combined":"combined","Pub TYpe":"pub_type"})

  combined = list(df['combined'])

  # remove patterns
  pattern = r'<inf>|</inf>|<sup>|</sup>|inf|/inf'
  comb_clean = []
  for l in combined:
    mod_string = re.sub(pattern, '', l )
    comb_clean.append(mod_string)

  # merge back to df
  df['combined'] = comb_clean

  # filter spurtious yeats
  # df = df[df['pub_year']!='12.0.1.2']

  # convert years to int
  # df_sel['pub_year'] = df_sel['pub_year'].astype(str)
  # df['pub_year'] = df_sel['pub_year'].astype(str).replace('\.0', '', regex=True).astype(int)
  # if year is 201, that is mistyped fom 2001
  # df_sel[df_sel['pub_year']==201]['pub_year'] = 2001


  return df

## path of files 

In [5]:
drive.mount('/content/drive')

# location of csv files
data_path = '/content/drive/MyDrive/NLP/Data/ALS Spreadsheets/'
files = os.listdir(data_path)
files.sort()
#len(files)


Mounted at /content/drive


## topic model

In [10]:
# which model to use - "default" ["mini-LM-L6-v2"] or "specter" 
model_use = "default"

# select which beamline to run
beamline = "5.0.3"

# ngram range - (1,1); (2,2); (1,2)
# unigram is (1,1), bigram is (2,2)
n_gram = (1,1)

n_gram_type = ""
if n_gram == (1,1): 
  n_gram_type = "Unigram"
elif n_gram == (2,2):
   n_gram_type = "Bigram"

# name of folder the results will be saved in
iter_version = "test_results/"

# location of the folder for the results
base_path = '/content/drive/MyDrive/NLP/Results/'

In [7]:
# import csv of beamline
beam_name = "Beamline_" + beamline + ".xls"
df = pd.read_table(data_path + beam_name, on_bad_lines='skip')


# preprocess the data, maybe put these all in a function?

input_data = format_dataframe(df)
#input_data = df

# remove stopwords from the combined column
input_data['combined'] = input_data['combined'].apply(remove_stop_df, allow_abbrev = False)

# lemmatize words in the combined column
input_data['combined'] = input_data['combined'].apply(lemma_spacy)

# remove punct from the combined column
input_data['combined'] = input_data['combined'].apply(remove_punct_df)

In [None]:
start_time = time.time()

n_topics = 10

# train topic model
if model_use == "specter":
  sentence_model = SentenceTransformer('allenai-specter')
  topic_model = BERTopic(embedding_model=sentence_model,verbose=True,nr_topics = n_topics,n_gram_range=n_gram) 
else: 
  topic_model = BERTopic(verbose=True,nr_topics = n_topics,n_gram_range=n_gram) # uses default bertopic model - "all-miniLM-L6_v2"

# convert the column of combined text into a list of strings
list_text = input_data['combined'].to_list()

topics, probs = topic_model.fit_transform(list_text)

print('Total training time taken (mins): ', float((time.time()-start_time)/60))


###################################################


start_time = time.time()

## dynamic topic modeling - topics over time 
years = input_data['pub_year'].to_list() # save years from the dataframe
topics_over_time = topic_model.topics_over_time(list_text, topics, years) # train dynamic topic model


print('Dynamic Topic modeling total time taken (mins): ', float((time.time()-start_time)/60))

## Saving results

In [19]:
import collections
# create beamline folder in the given path to which model files will be saved 
model_path = base_path + iter_version + beamline + "_" + model_use + "_" + n_gram_type + "/"
create_path_if_not_exists(model_path)

# save topic model as pickle file
def save_model(filename):
  model_path = base_path + iter_version + beamline + "_" + model_use + "_" + n_gram_type + "/"
  file_path = model_path + "model" + beam_name + ".pkl"
  joblib.dump(topic_model, file_path) 


# visualize barchart of topics
def create_barcharts(topic_model):  
  fig = topic_model.visualize_barchart(top_n_topics = len(topic_model.topics))
  fig_name = model_path + "bar_chart" + beam_name  +".html"
  fig_name_png = model_path + "bar_chart" + beam_name  +".png"
  fig.write_html(fig_name)
  fig.write_image(fig_name_png)
  return fig

# save topics in excel file
def save_topic_results(topic_model, filename = "Topic_Results.xlsx"):
  
  excel_name = model_path + filename
  df_topics = pd.DataFrame(topic_model.topics)
  df_topics.to_excel(excel_name,sheet_name="topic_words")


def create_linecharts(topic_model, top_n_topics = 20):
  # topic over time figures
  fig_time = topic_model.visualize_topics_over_time(topics_over_time, top_n_topics=20) # save figure
  fig_name_png = model_path + "topic_time" + beam_name  +".png"
  fig_time.write_image(fig_name_png) # static image 
  fig_name_html = model_path + "topic_time" + beam_name  +".html"
  fig_time.write_html(fig_name_html) # interactive html image
  return fig_time

def save_topic_sizes(topics):
  counter = collections.Counter(topics)
  df = pd.DataFrame(dict(counter).items())
  df.columns = ['Topics', 'Size']
  df.to_csv(model_path + 'topic_size' +'.csv')

  

In [20]:
save_model(model_path) 
create_barcharts(topic_model)
save_topic_results(topic_model)
create_linecharts(topic_model)
save_topic_sizes(topics)

## Rec Engine

In [None]:
# topic embedding
topic_emb = topic_model.topic_embeddings


def create_sim_df(topic_num,doc_text):

  '''
  function to create a df with the representative dcouments and their similarity to the respective document 


  Input: 
  topic_num indicating which topic to calculate similarity from
  doc_text: list of all docs within that topic

  Output:
  similatity dataframe with all the docs in one column and the similarity of the doc with the topic in the other (descending order)
  '''


  # document embeddings, depends on which embedding model used to train topic model
  if model_use == "specter":
    sentence_model = SentenceTransformer('allenai-specter')
    doc_emb = sentence_model.encode(doc_text, show_progress_bar=False)
  else:
    sentence_model = SentenceTransformer("all-MiniLM-L6-v2")
    doc_emb = sentence_model.encode(doc_text, show_progress_bar=False)


  sim_list = {}
  for i in range(len(doc_text)):
    sim_list[doc_text[i]]=(1-spatial.distance.cosine(doc_emb[i], topic_emb[topic_num])) # cosine similarity between all docs and one topic
  
  # sort dict in decreasing order 
  sim_list_sorted = dict(sorted(sim_list.items(), key=lambda item: item[1],reverse=True))

  # make df and print head to get top 10 closest docs 
  import pandas as pd
  sim_df = pd.DataFrame(sim_list_sorted.items(), columns=['Doc', 'Sim'])

  return sim_df



In [None]:

###--------- for a given search term, find closest authors, documents-----------###
search_term = input("Find authors and documents related to the term : ")


# generating authors for each topic 
df_authors, dict_authors = author_all_topics(topics,input_data)
# save df with authors 
excel_name = model_path + "Author_Topics.xlsx"
df_authors.to_excel(excel_name,sheet_name="authors_topics")

# find most similar topics
similar_topics, similarity = topic_model.find_topics(search_term, top_n=5)

# print closest topic
print("The closest topic is topic number {}: {}".format(similar_topics[0],topic_model.topic_names[similar_topics[0]]))

# find all authors in the most similar topic
print("All authors who work in this space:")
author_similar = dict_authors[similar_topics[0]]
print(author_similar)

# get 3 rep docs related to a particular search term 
rep_docs = topic_model.get_representative_docs(similar_topics[0])
print("Three Closest documents are:") # this is using the get_representative_docs funciton from the package
print(rep_docs)

# get all docs related to that topic 
docs_topics = pd.DataFrame(topics, list_text).reset_index().rename(columns = {'index':'Document', 0:'Topic_Number'})

# filter for the most similar topic
docs_similar = docs_topics[docs_topics['Topic_Number']==similar_topics[0]]['Document']

print("There are {} documents in this topic. They are (in order of similarity to topic):".format(len(docs_similar)))

create_sim_df(similar_topics[0]+1,docs_similar.to_list())



In [None]:
###----- for a given document, find closest authors and documents-----------###


search_doc = "Researchers have increased the lifetime of a promising electric vehicle \\
 battery to a record level, an important step toward the goal of lighter, less expensive and long-lasting batteries for future electric vehicles. The work is reported June 28 in the journal Nature Energy.Such batteries—the goal\\
  of research groups the world over—are seen as an important part of the solution to reduce the effects of climate change, and scientists are exploring a dizzying array of options.\\
One solution on the horizon is a lithium-metal battery for electric vehicles. These batteries hold almost twice the energy of their widely used lithium-ion counterparts, and they’re lighter.\\
 That combination offers the enticing prospect of an electric vehicle that would be lighter and go much farther on a single charge. But lithium-metal batteries in the laboratory have been plagued by premature death, lasting only a fraction of the time of today’s lithium-ion batteries."



# find most similar topic
similar_topics, similarity = topic_model.find_topics(search_doc, top_n=5)

# print closest topic
print("The closest topic is topic number {}: {}".format(similar_topics[0],topic_model.topic_names[similar_topics[0]]))


# find all authors in the most similar topic
print("All authors who work in this space:")
author_similar = dict_authors[similar_topics[0]]
print(author_similar)

# get 3 rep docs related to a particular search term 
rep_docs = topic_model.get_representative_docs(similar_topics[0])
print("Three Closest documents are:")
print(rep_docs)

# get all docs related to that topic 
# filter for the most similar topic
docs_similar = docs_topics[docs_topics['Topic_Number']==similar_topics[0]]['Document']

print("There are {} documents in this topic. They are (in order of similarity to topic):".format(len(docs_similar)))

create_sim_df(similar_topics[0]+1,docs_similar.to_list())




The closest topic is topic number 7: 7_lithium metal_block copolymer_current density_copolymer electrolyte
All authors who work in this space:
['Sulas, D.B.', 'Cheng, Lei', 'Platt, H.S.', 'Al-Jassim, M.M.', 'Ho, A.', 'Maslyn, Jacqueline', 'Yuca, N.', 'Frenck, Louise', 'Srinivasan, V.', 'Bird, O.F.', 'Renner, Peter', 'Wu, S.-L.', 'Parkinson, Dilworth Y.,McCloskey, Bryan', 'Sahu, S.', 'Parkinson, Dilworth Y.,Kundu, Shankhamala', 'Trask, S.E.', 'Chen, Yinlin', 'Meckler, S.M.', 'Jiang, X.', 'Ling, M.', 'Barai, P.', 'Yi, Eongyu', 'Mueller, K.T.', 'MacDowell, Alastair A.,Balsara, Nitash P.,', 'Amores, M.', 'Kim, J.', 'Tong, W.', 'Parkinson, Dilworth Y.,Liang, Hong', 'Pratt, R.C.', 'Müller, A.', 'Andrykowski, R.', 'Robbins, S.', 'Pylypenko, S.', 'Chen, Kelly', 'Doeff, Marca', 'Frenck, L.', 'Kim, H.W.', 'Marwaha, N.', 'Harry, Katherine', 'Baskin, A.', 'Heywood, S.', 'McEntush, Kyle', 'Zhang, Bixia', 'Viswanathan, V.', 'Teat, S.J.', 'Sankar, S.', 'Sung, Yung-Eun', 'Ells, A.W.', 'Seitzman, N.', 

Unnamed: 0,Doc,Sim
0,lithium Electrodeposition Rigid Block Copolyme...,0.558527
1,lithium dendrite growth solid polymer electrol...,0.542074
2,lithium Dendrite Growth Glassy Rubbery Nanostr...,0.532473
3,lithium Metal copper Vanadium Oxide Battery Bl...,0.525009
4,growth Lithium Dendrites Globules Solid Block ...,0.518955
5,Influence Electrolyte Modulus Local Current De...,0.509452
6,Failure Mode Lithium Metal battery block Copol...,0.502882
7,detection subsurface structure underneath dend...,0.502662
8,Electrochemical Deposition Stripping Behavior ...,0.50063
9,lithium metal foil low defect density pending,0.494996


In [None]:
###----- for a given auhors, find all other authors and closest documents-----------###

# iterate thru dict, pick up the topic where that author occurs and return all authors in that topic
# search_author = 'Balsara, N.P.'
search_author = 'Zenyuk, I.V.'

for t in dict_authors:
  if search_author in dict_authors[t]:
    # print closest topic
    print("The given author works in Topic Number {}: {}".format(t,topic_model.topic_names[t]))
    first_auth_list = dict_authors[t]
    # print("All other authors:",dict_authors[t])


# find the doc in which the search author occurs, then find closest docs to that doc and return authors for that
relevant_docs = input_data[input_data.apply(lambda row: row.astype(str).str.contains(search_author).any(), axis=1)]['combined'].to_list()
authors_docs = closest_author_docs(relevant_docs)
# print("Authors representing the closest docs are:",authors_docs)

final_auth_list = authors_docs + first_auth_list
final_auth_list = list(set(final_auth_list))
print("Closest authors to given author:",final_auth_list)



The given author works in Topic Number 4: 4_fuel cell_polymer electrolyte_liquid water_gas diffusion
Closest authors to given author: ['Spernjak, D.', 'Chan, T.', 'Pan, X.', 'Karan, K.', 'Sabarirajan, Dinesh C,Calzada, O.', 'Mukundan, R.', 'Dery, S.', 'Garcia-Salaberri, P.A.', 'Vera, M.', 'Ng, B.', 'Besli, M.M.', 'McElrone, Andrew Joseph,Choat, Brendan', 'Martin, Andreas', 'Parkinson, Dilworth Y.,Kusoglu, Ahmet', 'Tucker, M.C.', 'Sabarirajan, Dinesh C,Yared, D.G.', 'Parkinson, Dilworth Y.,Serov, A.', 'Gili, A.', 'Shum, A.D.', 'Connolly, L.G.', 'Parkinson, Dilworth Y.,Blair, J.', 'Cetinbas, F.C.', 'Gambetta, Gregory A.,Brodersen, Craig', 'Tieu, P.', 'Fendorf, Scott E.,Luthy, Richard G,', 'Serov, A.', 'Normile, S.J.', 'Weidner, J.W.', 'Medici, E.F.', 'Gostick, Jeff T.,Gunterman, Haluna Penelope F.,Kienitz, Brian L.,Newman, James', 'Khedekar, K.', 'Weber, Adam Z,Gostick, Jeff T.,', 'Mansour, Nagi N,', 'Zheng, Y.', 'Gross, Elad', 'Xiao, X.', 'Varcoe, J.R.', 'Weber, Adam Z,Kusoglu, Ahmet', 