# A-T modeling with Model Selection

### References:
https://nbviewer.jupyter.org/github/rare-technologies/gensim/blob/develop/docs/notebooks/atmodel_tutorial.ipynb

## 1. Clean Text

In [23]:
import nltk
import json
import spacy
import re

content = []

content = []
with open('total.json') as json_data:
    content = json.load(json_data)

### Author-Doc List

In [24]:
# Get all author names and their corresponding document IDs.
author2doc = dict()

i = 0
for entry in content:
    sender = entry['Sender'].replace('\n',' ')
    if not author2doc.get(sender):
        # This is a new author.
        #author2doc[sender] = []
        author2doc[sender] = [i]
    # Add document IDs to author.
    else:
        author2doc[sender].append(i)
    i = i + 1
    
i = 0    
for entry in content:
    receiver = entry['Receiver'].replace('\n',' ')
    if not author2doc.get(receiver):
        # This is a new author.
        author2doc[receiver] = []
        author2doc[receiver] = [i]
    # Add document IDs to author.
    else:
        author2doc[receiver].append(i)
    i = i + 1


### Clean text data

In [25]:
nlp = spacy.load('en')

### using both title and abstract
abstract = []
for entry in content:
    title = entry['Title'].replace('\n',' ')
    title = title.replace('/u',' ')
    #sender = entry['Sender'].replace('\n',' ')
    #receiver = entry['Receiver'].replace('\n',' ')
    abst = entry['Content'].replace('\n',' ')
    abst = abst.replace('/u',' ')
    abst = abst.replace('%',' ')
    entry_str = title+' '+abst
    entry_str = re.sub(r'\b\w{1,3}\b', '',entry_str)
    abstract.append(entry_str)

In [26]:
### Load stopwords

from nltk.corpus import stopwords
d = {}
stopword = stopwords.words('english')

In [27]:
### lemmatization, bigrams
#%%time
processed_docs = []    
for doc in nlp.pipe(abstract, n_threads=4, batch_size=100):
    # Process document using Spacy NLP pipeline.
    
    ents = doc.ents  # Named entities.

    # Keep only words (no numbers, no punctuation).
    # Lemmatize tokens, remove punctuation and remove stopwords.
    doc = [token.lemma_ for token in doc if token.is_alpha and not token.is_stop]

    # Remove common words from a stopword list.
    doc = [token for token in doc if token not in stopword]

    # Add named entities, but only if they are a compound of more than word.
    doc.extend([str(entity) for entity in ents if len(entity) > 1])
    
    processed_docs.append(doc)

abstract_all = processed_docs
del processed_docs

from gensim.models import Phrases
# Add bigrams and trigrams to docs (only ones that appear 20 times or more).
bigram = Phrases(abstract_all, min_count=20)
for idx in range(len(abstract_all)):
    for token in bigram[abstract_all[idx]]:
        if '_' in token:
            # Token is a bigram, add to document.
            abstract_all[idx].append(token)



### Remove rare and common tokens, customizable

In [28]:
from gensim.corpora import Dictionary
dictionary = Dictionary(abstract_all)

# Remove rare and common tokens.
# Filter out words that occur too frequently or too rarely.
max_freq = 0.2
min_wordcount = 80
dictionary.filter_extremes(no_below=min_wordcount, no_above=max_freq)

_ = dictionary[0]  # This sort of "initializes" dictionary.id2token.

## 2. Best Model Selection

In [29]:
import nltk
import json
import pyLDAvis
import pyLDAvis.gensim  # don't skip this
import matplotlib.pyplot as plt
%matplotlib inline
import random
from gensim.models import CoherenceModel
import gensim
from gensim import corpora,models
mallet_path = '/Users/Maggie/Downloads/mallet-2.0.8/bin/mallet' # update this path
from nltk.corpus import stopwords 
nltk.download('stopwords')
nltk.download('wordnet')
from nltk.stem.wordnet import WordNetLemmatizer
import string


Total = []
for c in content:
    ##using both title and content
    total = c['Title']
    Total.append(total)
    
stop = set(stopwords.words('english'))
exclude = set(string.punctuation) 
lemma = WordNetLemmatizer()
def clean(doc):
    stop_free = " ".join([i for i in doc.lower().split() if i not in stop])
    punc_free = ''.join(ch for ch in stop_free if ch not in exclude)
    normalized = " ".join(lemma.lemmatize(word) for word in punc_free.split())
    return normalized

doc_clean = [clean(entry).split() for entry in Total]



[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/Maggie/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/Maggie/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [30]:
def bestModel(abstract_all):
    co_score_tfidf = []
    co_score_lda = []
    co_score_mallet = []

    for i in range(0,10):
        random.shuffle(abstract_all)

        training = abstract_all[:round(len(abstract_all)*0.6)]
        test = abstract_all[round(len(abstract_all)*0.6):]

        doc_clean_train = [entry for entry in training]
        doc_clean_test = [entry for entry in test]
        # Creating the term dictionary of our courpus, where every unique term is assigned an index. 
        dictionary_tr = corpora.Dictionary(doc_clean_train)
        dictionary_te = corpora.Dictionary(doc_clean_test)
        dictionary = corpora.Dictionary(abstract_all)
        # Converting list of documents (corpus) into Document Term Matrix using dictionary prepared above.
        doc_term_matrix_te = [dictionary_te.doc2bow(doc) for doc in doc_clean_test]
        doc_term_matrix_tr = [dictionary_tr.doc2bow(doc) for doc in doc_clean_train]
        doc_term_matrix = [dictionary.doc2bow(doc) for doc in abstract_all]

        #mystring = mystring..decode(‘utf-8’)

        tfidf = models.TfidfModel(doc_term_matrix)
        corpus_tfidf = tfidf[doc_term_matrix_tr]
        corpus_tfidf_te = tfidf[doc_term_matrix_te]

        lda_model_tfidf = gensim.models.LdaMulticore(corpus_tfidf, num_topics=5, id2word=dictionary_tr, passes=2, workers=4)

        Lda = gensim.models.ldamodel.LdaModel
        ldamodel = Lda(doc_term_matrix_tr, num_topics=5, id2word = dictionary_tr, passes=50)
        ldamallet = gensim.models.wrappers.LdaMallet(mallet_path, corpus=doc_term_matrix_tr, num_topics=20, id2word=dictionary_tr)
    
        #tfidf
        coherence_model_ldatfidf = CoherenceModel(model=lda_model_tfidf, texts=doc_clean_test, dictionary=dictionary_te, coherence='c_v')
        coherence_ldatfidf = coherence_model_ldatfidf.get_coherence()
    
        co_score_tfidf.append(coherence_ldatfidf)
    
        #lda
        coherence_model = CoherenceModel(model=ldamodel, texts=doc_clean_test, dictionary=dictionary_te, coherence='c_v')
        coherence_lda = coherence_model.get_coherence()
    
        co_score_lda.append(coherence_lda)
    
        #mallet
        coherence_model_ldamallet = CoherenceModel(model=ldamallet, texts=doc_clean_test, dictionary=dictionary_te, coherence='c_v')
        coherence_ldamallet = coherence_model_ldamallet.get_coherence()
    
        co_score_mallet.append(coherence_ldamallet)
    
      
    
    avg_co_lda = sum(co_score_lda)/10
    avg_co_tfidf = sum(co_score_lda)/10
    avg_co_mallet = sum(co_score_mallet)/10

    result = {avg_co_lda:'lda',avg_co_tfidf:'tfidf',avg_co_mallet:'mallet'}
    maximum = max([avg_co_lda,avg_co_tfidf,avg_co_mallet])
    best = result[maximum]
    
    return best
    
    
    

In [31]:
def compute_coherence_values(total, best, limit, start=2, step=3):
    """
    Compute c_v coherence for various number of topics

    Parameters:
    ----------
    dictionary : Gensim dictionary
    corpus : Gensim corpus
    texts : List of input texts
    limit : Max num of topics

    Returns:
    -------
    model_list : List of LDA topic models
    coherence_values : Coherence values corresponding to the LDA model with respective number of topics
    """
    random.shuffle(total)

    training = total[:round(len(total)*0.6)]
    test = total[round(len(total)*0.6):]

    doc_clean_train = [clean(entry).split() for entry in training]
    doc_clean_test = [clean(entry).split() for entry in test]
    # Creating the term dictionary of our courpus, where every unique term is assigned an index. 
    dictionary_tr = corpora.Dictionary(doc_clean_train)
    dictionary_te = corpora.Dictionary(doc_clean_test)
    dictionary = corpora.Dictionary(doc_clean)
    # Converting list of documents (corpus) into Document Term Matrix using dictionary prepared above.
    doc_term_matrix_te = [dictionary_te.doc2bow(doc) for doc in doc_clean_test]
    doc_term_matrix_tr = [dictionary_te.doc2bow(doc) for doc in doc_clean_train]
    doc_term_matrix = [dictionary.doc2bow(doc) for doc in doc_clean]
     
    coherence_values = []
    model_list = []
    if(best == 'lda'):
        for n in range(start, limit, step):
            Lda = gensim.models.ldamodel.LdaModel
            ldamodel = Lda(doc_term_matrix_tr, num_topics=n, id2word = dictionary_tr, passes=50)
            coherence_model = CoherenceModel(model=ldamodel, texts=doc_clean_test, dictionary=dictionary_te, coherence='c_v')
            coherence_lda = coherence_model.get_coherence()
    
            coherence_values.append(coherence_lda)
         
    if(best == 'tfidf'):
        for n in range(start, limit, step):
            tfidf = models.TfidfModel(doc_term_matrix)
            corpus_tfidf = tfidf[doc_term_matrix_tr]
            corpus_tfidf_te = tfidf[doc_term_matrix_te]

            lda_model_tfidf = gensim.models.LdaMulticore(corpus_tfidf, num_topics=n, id2word=dictionary_tr, passes=2, workers=4)
            coherence_model_ldatfidf = CoherenceModel(model=lda_model_tfidf, texts=doc_clean_test, dictionary=dictionary_te, coherence='c_v')
            coherence_ldatfidf = coherence_model_ldatfidf.get_coherence()
    
            coherence_values.append(coherence_ldatfidf)
   
    if(best == 'mallet'):
        for n in range(start, limit, step):
            ldamallet = gensim.models.wrappers.LdaMallet(mallet_path, corpus=doc_term_matrix_tr, num_topics=n, id2word=dictionary_tr)
            coherence_model_ldamallet = CoherenceModel(model=ldamallet, texts=doc_clean_test, dictionary=dictionary_te, coherence='c_v')
            coherence_ldamallet = coherence_model_ldamallet.get_coherence()
    
            coherence_values.append(coherence_ldamallet)
            

    return coherence_values





In [32]:
def getOptimal(start,limit,step,coherence):
    x = range(start, limit, step)
    xlist = []
    for i, cv in zip(x,coherence):
        #print("Num Topics =", i, " has Coherence Value of", round(cv, 4))
        xlist.append(i)
    
    optimal = []
    last_x = start
    last_y = coherence[0]
    #last_slope = 1
    for i,cv in enumerate(coherence):
        #print("Num Topics =", m, " has Coherence Value of", round(cv, 4))
        last_slope = (cv-last_y)/step
        if i < len(coherence) - 1:
            next_y = coherence[i + 1]
            next_slope = (next_y-cv)/step
            if next_slope <= last_slope and next_slope >= 0  and i > 0:
                optimal.append((xlist[i]))
        else:
            break
        last_y = cv
        #last_x = i
    return min(optimal)

In [33]:
##Run all of them together to get best model and number of topics 

## "Total" should be replaced with "abstract_all"
best = bestModel(abstract_all)
print(best)
coherence = compute_coherence_values(Total, best, limit=40, start=2, step=6)
optimal_topics = getOptimal(2,40,6,coherence)
print(optimal_topics)

mallet
14


In [34]:
print(type(abstract_all))

<class 'list'>


### Define corpus

In [38]:
dictionary = corpora.Dictionary(abstract_all)

In [41]:
### AT Corpus
atcorpus = [dictionary.doc2bow(doc) for doc in abstract_all]

### LDA Mallet Corpus
from gensim.test.utils import datapath, get_tmpfile, common_texts
from gensim.corpora import MalletCorpus
from gensim.corpora import Dictionary

# Write corpus in Mallet format to disk
output_fname = get_tmpfile("corpus.mallet")
MalletCorpus.serialize(output_fname, atcorpus, dictionary)

mallet_corpus = MalletCorpus(output_fname)

malcorpus = list()

for t in mallet_corpus:
    malcorpus.append(t)

### LDA-tfidf Corpus
from operator import itemgetter
import gensim
from gensim import corpora,models
tfidf = models.TfidfModel(atcorpus)
corpus_tfidf = tfidf[atcorpus]

l = list()
for t in corpus_tfidf:
    l.append(t)

index = 0
tfidfcorpus = []
for i in l:
    index +=1
    common_denom = min(i,key=itemgetter(1))[1] if i else None
    if common_denom is not None:
        new_list = []
        for f in i:
            n = f[1]/common_denom
            new_list.append((f[0],int(n)))
        tfidfcorpus.append(new_list)
    else:
        #print(index)
        new_list = []
        for f in i:
            new_list.append(f[0],f[1])
        tfidfcorpus.append(new_list)

In [8]:
#print(malcorpus[10])
#print(tfidfcorpus[10])
#print(atcorpus[10])

## 2. Build Models and Assess Coherence score

In [42]:
from gensim.models import AuthorTopicModel
def getModel(model_type, num):
    corpus_list = {'atcorpus': atcorpus, 'malcorpus':malcorpus,'tfidfcorpus':tfidfcorpus}
    model_list = []
    if model_type in corpus_list:
        for i in range(5):
            model = AuthorTopicModel(corpus=corpus_list[model_type], num_topics=num, id2word=dictionary.id2token, \
                        author2doc=author2doc, chunksize=2000, passes=100, gamma_threshold=1e-10, \
                        eval_every=0, iterations=1, random_state=i)
            top_topics = model.top_topics(texts = abstract_all,dictionary = dictionary, coherence = 'c_v')
            tc = sum([t[1] for t in top_topics])
            model_list.append((model, tc))
        return model_list
    else:
        return 'please put in current corpus name'

In [43]:
def getCoherence(model_list):
    model, tc = max(model_list, key=lambda x: x[1])
    return 'Topic coherence: %.3e' %tc

In [44]:
def saveModel(model,model_type):
    model.save('/tmp/model.'+model_type)
    return '/tmp/model.'+model_type

In [45]:
def loadModel(model_path):
    model = AuthorTopicModel.load(model_path)
    return model

In [46]:
%%time
ATmodel_list = getModel('atcorpus',optimal_topics)
LDAtfidf_list = getModel('tfidfcorpus',optimal_topics)
LDAmallet_list = getModel('malcorpus',optimal_topics)

CPU times: user 42min 6s, sys: 1min 57s, total: 44min 3s
Wall time: 45min 33s


In [47]:
ATmodel, AT_tc = max(ATmodel_list, key=lambda x: x[1])
TFIDFmodel, TFIDF_tc = max(LDAtfidf_list, key=lambda x: x[1])
Malletmodel, Mallet_tc = max(LDAmallet_list, key=lambda x: x[1])
#print(model)
print('AT Topic coherence: %.3e' %AT_tc)
print('TFIDF Topic coherence: %.3e' %TFIDF_tc)
print('Mallet Topic coherence: %.3e' %Mallet_tc)

AT Topic coherence: 6.869e+00
TFIDF Topic coherence: 7.555e+00
Mallet Topic coherence: 1.108e+01


## 3. Some Insights

### See Topics and Keywords

In [48]:
malletmodel = AuthorTopicModel.load(saveModel(Malletmodel, 'mallet'))

In [49]:
tfidfmodel = AuthorTopicModel.load(saveModel(TFIDFmodel, 'tfidf'))

In [50]:
atmodel = tfidfmodel = AuthorTopicModel.load(saveModel(ATmodel, 'lda'))

In [51]:
def showTopics(model, num):
    topics = []
    i = 1
    for topic in model.show_topics(num_topics=num):
        words = []
        for word, prob in model.show_topic(topic[0]):
            words.append(word)
        print('Topic '+str(i)+': ')
        print(words[2]+' '+words[1]+' '+words[0])
        print(*words)
        print()
        i += 1
        topics.append(words[2]+' '+words[1]+' '+words[0])
    return topics

In [52]:
topics = showTopics(tfidfmodel, optimal_topics)

Topic 1: 
base result method
method result base model propose image system time datum error

Topic 2: 
ligand complex reaction
reaction complex ligand catalyst synthesis bond product state catalyze study

Topic 3: 
case study patient
patient study case disease associate woman group factor diagnosis risk

Topic 4: 
gene cell protein
protein cell gene plant expression specie activity pathway role growth

Topic 5: 
expression gene cell
cell gene expression drug study effect treatment vitro level protein

Topic 6: 
health datum study
study datum health analysis medical student system result technology factor

Topic 7: 
study patient analysis
analysis patient study cancer meta meta_analysis risk result include review

Topic 8: 
task experiment effect
effect experiment task visual participant result study group influence stimulus

Topic 9: 
health study patient
patient study health treatment trial care clinical therapy intervention child

Topic 10: 
increase result study
study result increas

In [53]:
#topic_keyword = {}
#for t in topics:
#    topic_keyword[t] = 

In [54]:
from pprint import pprint
from operator import itemgetter
def show_author(model,name):
    print('\n%s' % name)
    print('Docs:', model.author2doc[name])
    print('Topics:')
    topic_sorted = sorted(atmodel[name],reverse=True, key = itemgetter(1))
    #pprint([(topics[topic[0]],topic[1]) for topic in atmodel[name]])
    pprint([(topics[topic[0]],topic[1]) for topic in topic_sorted])
    
show_author(atmodel, 'Ying Zhang')




Ying Zhang
Docs: [1667, 3620, 4304]
Topics:
[('health datum study', 0.35821758638582424),
 ('gene datum analysis', 0.3295214454780773),
 ('case study patient', 0.0871960669745464),
 ('gene cell protein', 0.08506952979980031),
 ('present report finding', 0.0808561427639484),
 ('study patient analysis', 0.056674362212548755)]


In [55]:
# Make a function that returns similarities based on the Hellinger distance.

from gensim import matutils
import pandas as pd

# Make a list of all the author-topic distributions.
author_vecs = [atmodel.get_author_topics(author) for author in atmodel.id2author.values()]

def similarity(vec1, vec2):
    '''Get similarity between two vectors'''
    dist = matutils.hellinger(matutils.sparse2full(vec1, model.num_topics), \
                              matutils.sparse2full(vec2, model.num_topics))
    sim = 1.0 / (1.0 + dist)
    return sim

def get_sims(vec):
    '''Get similarity of vector to all authors.'''
    sims = [similarity(vec, vec2) for vec2 in author_vecs]
    return sims

def get_table(name, top_n=10, smallest_author=1):
    '''
    Get table with similarities, author names, and author sizes.
    Return `top_n` authors as a dataframe.
    
    '''
    
    # Get similarities.
    sims = get_sims(model.get_author_topics(name))

    # Arrange author names, similarities, and author sizes in a list of tuples.
    table = []
    for elem in enumerate(sims):
        author_name = model.id2author[elem[0]]
        sim = elem[1]
        author_size = len(model.author2doc[author_name])
        if author_size >= smallest_author:
            table.append((author_name, sim, author_size))
            
    # Make dataframe and retrieve top authors.
    df = pd.DataFrame(table, columns=['Author', 'Score', 'Size'])
    df = df.sort_values('Score', ascending=False)[:top_n]
    
    return df

In [56]:
model = atmodel
#get_table('Jonathan L. Sessler')
get_table('Ying Zhang', smallest_author=3)

Unnamed: 0,Author,Score,Size
124,Ying Zhang,1.0,3
126,Yu Wang,0.749266,3
76,Peter J. Stang,0.695495,5
15,Dijana Miric,0.691572,3
4,Angela Stainthorpe,0.674391,3
58,Long Zhao,0.661927,3
97,Tao Liu,0.648764,3
116,Xing Chen,0.62885,3
9,Ben Zhong Tang,0.626219,4
103,Vanja P Nickovic,0.625271,3


In [57]:
author_dict = {}
author_topic_1  ={}
for a in author2doc:
    topic_sorted = sorted(atmodel[a],reverse=True, key = itemgetter(1))
    topic = [(topics[t[0]],t[1]) for t in topic_sorted]
    top1 = topic[0]
    topic = [t for t in topic if (t[1] > 0.75)]
    author_dict[a] = [i[0] for i in topic]
    author_topic_1[a] = top1[0]

In [58]:
#print(author_topic_1)

In [59]:
topic_author_list = {}
for t in topics:
    topic_author_list[t] = ''
    
#print(topic_author_list)
for author, topic_list in author_dict.items():
    if topic_list is not None:
        for t in topic_list:
            topic_author_list[t]+=author+', '

In [60]:
pd.DataFrame.from_dict(topic_author_list, orient='index')

Unnamed: 0,0
base result method,"Gerardo Bosco, Mehdi Pariav, Ramsis F Ghaly, W..."
ligand complex reaction,"Katrin Werwick, Muhammad Afzal, Chul-Hong Koo,..."
case study patient,"Yuyuan Fang, Yancheng Shi, Hao Sun, Bin-Sheng ..."
gene cell protein,"Yu Su, Dakun Yu, Sara Mahmoud, Disong Fu, Zees..."
expression gene cell,"Amin Ghafouri, Aaron A Comeault, Mourad F Rezk..."
health datum study,"Brandie Pugh, Mang Xiao, Abdullah H Baqui, Yua..."
study patient analysis,"Assad Munis, Oladunni Izobo-Matins, Girmay Tek..."
task experiment effect,"Fernando De Ory, Xing Zhang, Pengjun Jiang, Yi..."
health study patient,"Daniel T Nystrom, Johann Beghain, Francisco Ja..."
increase result study,"Hiroaki Matsubara, Jang Yoo, Rolf Sybren Postm..."


In [61]:
# Output JSON file for display
d = []


for topic, authors in topic_author_list.items():
    d.append({"topic": topic,
           "authors":authors})

jsonfile = open('output.json', 'w')
json.dump(d, jsonfile)

## 4. Visualize the Outcome

In [62]:
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2, random_state=0)
smallest_author = 2  # Ignore authors with documents less than this.
authors = [malletmodel.author2id[a] for a in model.author2id.keys() if len(model.author2doc[a]) >= smallest_author]
_ = tsne.fit_transform(model.state.gamma[authors, :])  # Result stored in tsne.embedding_

In [63]:
from bokeh.io import output_notebook
output_notebook()

In [65]:
from bokeh.palettes import all_palettes
from sklearn.cluster import KMeans
import numpy as np
import bokeh.plotting
from bokeh.plotting import figure
from sklearn import datasets


kmean = KMeans(n_clusters=15)
kmean.fit(tsne.embedding_)

colormap = [
    "#1f77b4", "#aec7e8", "#ff7f0e", "#ffbb78", "#2ca02c",
    "#98df8a", "#d62728", "#ff9896", "#9467bd", "#c5b0d5",
    "#8c564b", "#c49c94", "#e377c2", "#f7b6d2", "#7f7f7f",
    "#c7c7c7", "#bcbd22", "#dbdb8d", "#17becf", "#9edae5"]
author_names = [model.id2author[a] for a in authors]
colorchoice = [colormap[i] for i in kmean.labels_]
#topicchoice = [topics[i] for i in kmean.labels_]
topicchoice = [author_topic_1[a] for a in author_names]

In [68]:
from bokeh.models import HoverTool
from bokeh.plotting import figure, show, ColumnDataSource
from bokeh.transform import linear_cmap
from bokeh.palettes import Spectral6

x = tsne.embedding_[:, 0]
y = tsne.embedding_[:, 1]


# Radius of each point corresponds to the number of documents attributed to that author.
scale = 0.4
author_sizes = [len(model.author2doc[a]) for a in author_names]
radii = [size * scale for size in author_sizes]

source = ColumnDataSource(
        data=dict(
            x=x,
            y=y,
            author_names=author_names,
            author_sizes=author_sizes,
            topic = topicchoice,
            #topic = topics,
            radii=radii,
            color = colorchoice,
        )
    )

# Add author names and sizes to mouse-over info.
hover = HoverTool(
        tooltips=[
        ("author", "@author_names"),
        ("size", "@author_sizes"),
        ("topic","@topic")
        ]
    )


plot = figure(tools=[hover, 'crosshair,pan,wheel_zoom,box_zoom,reset,save,lasso_select'])


plot.scatter('x', 'y', radius='radii', source=source, fill_color = 'color', fill_alpha=0.6, line_color=None)
show(plot)

In [69]:
from bokeh.plotting import figure, output_file, save

output_file("output.html")
save(plot)

'/Users/Maggie/Documents/School/Fall 2018/Capstone/final/output.html'