In [1]:
import models
import regression_paper
import clean_text_gutentag

from nltk.tokenize import sent_tokenize

In [2]:
# Importing a book from downloaded Gutenberg collection
reg_paper_obj = regression_paper.Gutenberg_Emotion()
book = reg_paper_obj.get_book(1777)["text"]

In [3]:
# Some inital text in the book
book[:1000]

"\n\n\n\n\n1595\n\nTHE TRAGEDY OF ROMEO AND JULIET\n\nby William Shakespeare\n\n\n\nDramatis Personae\n\n  Chorus.\n\n  Escalus, Prince of Verona.\n  Paris, a young Count, kinsman to the Prince.\n  Montague, heads of two houses at variance with each other.\n  Capulet, heads of two houses at variance with each other.\n  An old Man, of the Capulet family.\n  Romeo, son to Montague.\n  Tybalt, nephew to Lady Capulet.\n  Mercutio, kinsman to the Prince and friend to Romeo.\n  Benvolio, nephew to Montague, and friend to Romeo\n  Tybalt, nephew to Lady Capulet.\n  Friar Laurence, Franciscan.\n  Friar John, Franciscan.\n  Balthasar, servant to Romeo.\n  Abram, servant to Montague.\n  Sampson, servant to Capulet.\n  Gregory, servant to Capulet.\n  Peter, servant to Juliet's nurse.\n  An Apothecary.\n  Three Musicians.\n  An Officer.\n\n  Lady Montague, wife to Montague.\n  Lady Capulet, wife to Capulet.\n  Juliet, daughter to Capulet.\n  Nurse to Juliet.\n\n  Citizens of Verona; Gentlemen and 

In [4]:
def basic_formatting(text):
    text = clean_text_gutentag.clean_text(text)
    text = sent_tokenize(text)    
    return text

In [5]:
sentences = basic_formatting(book)
print("No. of sentences: {}".format(len(sentences)))

No. of sentences: 3334


In [6]:
# Model object
model = models.Models()

In [7]:
# Model pipeline
df_train, df_val, df_test = model.emobank_split()
X_train, y_train, X_val, y_val = model.emobank_preprocess(df_train, df_val)
model_dm, model_dbow = model.gensim_build_vocab(X_train, X_val=X_val)

model.gensim_train(model_dm, model_dbow, X_train)
train_vecs = model.model_vectors(model_dm, model_dbow, X_train)

model.gensim_train(model_dm, model_dbow, X_val)
val_vecs = model.model_vectors(model_dm, model_dbow, X_val)

svr_model = model.svr(train_vecs, y_train, val_vecs, y_val)



In [8]:
# model.svr() returns JSON file
# Let's see what is the JSON structure
svr_model.keys()

dict_keys(['model', 'score_train', 'score_val', 'rmse_val', 'rmse_train'])

In [9]:
# Info on each method
# help(model.svr)

In [10]:
# print(svr_model)

In [11]:
# Train our book to get the word vectors
X_book = model.text_preprocess(sentences)
model_dm, model_dbow = model.gensim_build_vocab(X_book)



In [12]:
model.gensim_train(model_dm, model_dbow, X_book)
book_vecs = model.model_vectors(model_dm, model_dbow, X_book)

In [13]:
# Predicting using our trained svr_model
book_predict = svr_model["model"].predict(book_vecs)

In [14]:
len(book_predict)

3334

In [15]:
# Checking for sentence N
N = 800
print("SENTENCE: {} \n\n\n HAS VALENCE: {}".format(sentences[N], book_predict[N]))

SENTENCE: Nay, gentlemen, prepare not to be gone;
    We have a trifling foolish banquet towards. 


 HAS VALENCE: 3.0490166442353015


In [16]:
import pandas

book_predict_df = pandas.DataFrame(book_predict)
book_predict_df.describe()

Unnamed: 0,0
count,3334.0
mean,3.040078
std,0.093415
min,2.670226
25%,2.988049
50%,3.039004
75%,3.10055
max,3.341538


In [19]:
sentences[0:100]

['1595\n\nTHE TRAGEDY OF ROMEO AND JULIET\n\nby William Shakespeare\n\n\n\nDramatis Personae\n\n  Chorus.',
 'Escalus, Prince of Verona.',
 'Paris, a young Count, kinsman to the Prince.',
 'Montague, heads of two houses at variance with each other.',
 'Capulet, heads of two houses at variance with each other.',
 'An old Man, of the Capulet family.',
 'Romeo, son to Montague.',
 'Tybalt, nephew to Lady Capulet.',
 'Mercutio, kinsman to the Prince and friend to Romeo.',
 'Benvolio, nephew to Montague, and friend to Romeo\n  Tybalt, nephew to Lady Capulet.',
 'Friar Laurence, Franciscan.',
 'Friar John, Franciscan.',
 'Balthasar, servant to Romeo.',
 'Abram, servant to Montague.',
 'Sampson, servant to Capulet.',
 'Gregory, servant to Capulet.',
 "Peter, servant to Juliet's nurse.",
 'An Apothecary.',
 'Three Musicians.',
 'An Officer.',
 'Lady Montague, wife to Montague.',
 'Lady Capulet, wife to Capulet.',
 'Juliet, daughter to Capulet.',
 'Nurse to Juliet.',
 'Citizens of Verona; Gentl