In [13]:
%load_ext autoreload
%autoreload 2

import os
import char_max_likelihood
import metrics
import pandas as pd
import LSTMGenerator

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Generate Speeches

In [2]:
presidents = ["obama", "bush", "reagan", "trump"]
n=2

## LSTM

In [3]:
for pres in presidents:
    lstm = LSTMGenerator.LSTMGenerator(pres, force_cpu=True)
    speeches = lstm.generate_n(n, length=5000)
    lstm.persist(speeches)

  return torch._C._cuda_getDeviceCount() > 0


RuntimeError: Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device('cpu') to map your storages to the CPU.

## CharPred

In [48]:
order = 10
letters = 5000*5
filepath = "../data/charpred/{}_generated/{}.txt"

for pres in presidents:
    lm = char_max_likelihood.train_char_lm(pres, order)
    for i in range(0,n):
        s = char_max_likelihood.generate_text(lm, order, letters)
        if not os.path.exists("../data/charpred/{}_generated/".format(pres)):
            os.makedirs("../data/charpred/{}_generated/".format(pres))
        with open(filepath.format(pres,i), "w") as f:
            f.write(s)

## NGram

In [None]:
# todo generate speeches and save to ../data/ngram/{}_generated/

# Qualitative Evaluation

In [47]:
# print some speeches here
for loc in generated_speech_locations:
    speech = open("../data/"+loc+"/"+presidents[1]+"_generated/0.txt").read()
    print("Generated by " + loc+":")
    print(speech[:700])
    print("\n-------------------------------------\n")

Generated by lstm:
good evening ladies and gentlemen a lot of a better time . the president yes to the iraqi government will be able to the united states . and the united states of the world is to do the world . the world is to be a new challenge . and this will be a new nation . we must continue to work . and the last year that we have been a way to live in the world . and i ask congress to do you to be a lot of people . and so i 'm not a new man . we will not be accomplished with the united states . i ask you to work . i have brought a lot of freedom . the president i have seen the world . i ask congress to join me in the american people . we will make sure that the united states . and we will work alongside

-------------------------------------

Generated by charpred:
Madam Speaker, Vice President having to worry about?

THE PRESIDENT: Eight years. You used to be known as the sun sets on this issue. The funding. This year, we will not leave their employees to set up health savings 

# Quantitative Evaluation

In [34]:
metric_list = ["tfidf_cosine", "tfidf_distance", "rouge", "mean_sentence_len", "mean_word_len"]
generated_speech_locations = ["lstm", "charpred"]#, "ngram"]

In [35]:
def generate_metrics(president):
    results = pd.DataFrame(columns=metric_list)
    for loc in generated_speech_locations:
        mean_cosine, _, _ = metrics.get_cosine_sim_tfidf(president, loc+"/"+president+"_generated", None, print_results=False)
        mean_distance = metrics.get_top_n_rank_distance(president, loc+"/"+president+"_generated", None, 15)
        mean_rouge, _, _ = metrics.get_rouge_score(president, loc+"/"+president+"_generated", None, print_results=False)
        mean_sentence_l = metrics.calculate_mean_sentence_length(loc+"/"+president+"_generated")
        mean_word_l = metrics.calculate_mean_word_length(loc+"/"+president+"_generated")
        results=results.append({"tfidf_cosine":mean_cosine,
                        "tfidf_distance":mean_distance,
                        "rouge":mean_rouge,
                        "msl":mean_sentence_l,
                        "mwl":mean_word_l}, ignore_index=True)

    results.index = generated_speech_locations
    return results

## Obama

In [37]:
results = generate_metrics("obama")
results

Unnamed: 0,tfidf_cosine,tfidf_distance,rouge,msl,mwl
lstm,0.046214,601.266998,0.407979,11.772116,3.312004
charpred,0.09246,2964.815994,0.571858,19.314168,4.64863


## Bush

In [36]:
results = generate_metrics("bush")
results

Unnamed: 0,tfidf_cosine,tfidf_distance,rouge,msl,mwl
lstm,0.056604,363.995729,0.353213,10.978056,3.841696
charpred,0.092398,3036.46505,0.48746,17.192233,4.848614


In [31]:
results.to_markdown()

'|          |   tfidf_cosine |   tfidf_distance |    rouge |     msl |     mwl |\n|:---------|---------------:|-----------------:|---------:|--------:|--------:|\n| lstm     |      0.0566038 |          363.996 | 0.353213 | 10.9781 | 3.8417  |\n| charpred |      0.092398  |         3036.47  | 0.48746  | 17.1922 | 4.84861 |'

## Reagan

In [38]:
results = generate_metrics("reagan")
results

Unnamed: 0,tfidf_cosine,tfidf_distance,rouge,msl,mwl
lstm,0.04075,326.732364,0.34652,9.771818,3.71698
charpred,0.086771,3328.871243,0.520197,18.2,4.776102


## Trump

In [39]:
results = generate_metrics("trump")
results

Unnamed: 0,tfidf_cosine,tfidf_distance,rouge,msl,mwl
lstm,0.035442,208.641637,0.284851,10.834158,2.828583
charpred,0.087156,3139.097618,0.478781,11.947503,4.615896
