In [1]:
%load_ext autoreload
%autoreload 2

import os
import char_max_likelihood
import metrics
import pandas as pd
import LSTMGenerator

Importing Jupyter notebook from lstm_words.ipynb


# Generate Speeches

In [2]:
presidents = ["obama", "bush", "reagan", "trump"]
n=10

## LSTM

In [3]:
for pres in presidents:
    lstm = LSTMGenerator.LSTMGenerator(pres)
    speeches = lstm.generate_n(n, length=5000)
    lstm.persist(speeches)

## CharPred

In [48]:
order = 10
letters = 5000*5
filepath = "../data/charpred/{}_generated/{}.txt"

for pres in presidents:
    lm = char_max_likelihood.train_char_lm(pres, order)
    for i in range(0,n):
        s = char_max_likelihood.generate_text(lm, order, letters)
        if not os.path.exists("../data/charpred/{}_generated/".format(pres)):
            os.makedirs("../data/charpred/{}_generated/".format(pres))
        with open(filepath.format(pres,i), "w") as f:
            f.write(s)

## NGram

In [None]:
# todo generate speeches and save to ../data/ngram/{}_generated/

# Qualitative Evaluation

In [47]:
# print some speeches here
for loc in generated_speech_locations:
    speech = open("../data/"+loc+"/"+presidents[1]+"_generated/0.txt").read()
    print("Generated by " + loc+":")
    print(speech[:700])
    print("\n-------------------------------------\n")

Generated by lstm:
good evening ladies and gentlemen a lot of a better time . the president yes to the iraqi government will be able to the united states . and the united states of the world is to do the world . the world is to be a new challenge . and this will be a new nation . we must continue to work . and the last year that we have been a way to live in the world . and i ask congress to do you to be a lot of people . and so i 'm not a new man . we will not be accomplished with the united states . i ask you to work . i have brought a lot of freedom . the president i have seen the world . i ask congress to join me in the american people . we will make sure that the united states . and we will work alongside

-------------------------------------

Generated by charpred:
Madam Speaker, Vice President having to worry about?

THE PRESIDENT: Eight years. You used to be known as the sun sets on this issue. The funding. This year, we will not leave their employees to set up health savings 

# Quantitative Evaluation

In [22]:
metric_list = ["tfidf_cosine", "tfidf_distance", "rouge", "mean_sentence_len_ratio", "mean_word_len_ratio"]
generated_speech_locations = ["lstm", "charpred"]#, "ngram"]

In [23]:
def generate_metrics(president):
    results = pd.DataFrame(columns=metric_list)
    for loc in generated_speech_locations:
        mean_cosine, _, _ = metrics.get_cosine_sim_tfidf(president, loc+"/"+president+"_generated", None, print_results=False)
        mean_distance = metrics.get_top_n_rank_distance(president, loc+"/"+president+"_generated", None, 15)
        mean_rouge, _, _ = metrics.get_rouge_score(president, loc+"/"+president+"_generated", None, print_results=False)
        mean_sentence_l_g = metrics.calculate_mean_sentence_length(loc+"/"+president+"_generated")
        mean_sentence_l = metrics.calculate_mean_sentence_length(president)
        mean_word_l_g = metrics.calculate_mean_word_length(loc+"/"+president+"_generated")
        mean_word_l = metrics.calculate_mean_word_length(president)
        results=results.append({"tfidf_cosine":mean_cosine,
                        "tfidf_distance":mean_distance,
                        "rouge":mean_rouge,
                        "mean_sentence_len_ratio":mean_sentence_l_g/mean_sentence_l,
                        "mean_word_len_ratio":mean_word_l_g/mean_word_l,
                               }, ignore_index=True)

    results.index = generated_speech_locations
    return results.round(3)

## Obama

In [24]:
results = generate_metrics("obama")
results

Unnamed: 0,tfidf_cosine,tfidf_distance,rouge,mean_sentence_len_ratio,mean_word_len_ratio
lstm,0.046,601.267,0.408,0.643,0.716
charpred,0.092,3003.545,0.573,0.938,1.002


In [25]:
print(results.to_latex())

\begin{tabular}{lrrrrr}
\toprule
{} &  tfidf\_cosine &  tfidf\_distance &  rouge &  mean\_sentence\_len\_ratio &  mean\_word\_len\_ratio \\
\midrule
lstm     &         0.046 &         601.267 &  0.408 &                    0.643 &                0.716 \\
charpred &         0.092 &        3003.545 &  0.573 &                    0.938 &                1.002 \\
\bottomrule
\end{tabular}



## Bush

In [26]:
results = generate_metrics("bush")
results

Unnamed: 0,tfidf_cosine,tfidf_distance,rouge,mean_sentence_len_ratio,mean_word_len_ratio
lstm,0.057,363.996,0.353,0.633,0.801
charpred,0.092,3021.826,0.488,0.999,0.996


## Reagan

In [27]:
results = generate_metrics("reagan")
results

Unnamed: 0,tfidf_cosine,tfidf_distance,rouge,mean_sentence_len_ratio,mean_word_len_ratio
lstm,0.041,326.732,0.347,0.529,0.787
charpred,0.089,3334.681,0.521,0.954,0.998


## Trump

In [28]:
results = generate_metrics("trump")
results

Unnamed: 0,tfidf_cosine,tfidf_distance,rouge,mean_sentence_len_ratio,mean_word_len_ratio
lstm,0.035,208.642,0.285,0.867,0.618
charpred,0.083,3048.084,0.478,0.975,1.0
