In [None]:
#To avoid data leakage I needed to avoid the traditional 80/20 split for ML validation
#Earlier on I created 2 dataframes (data_set for training and data_set_test) so that I could be sure that I had different tweeters in my training and testing sets
#This is why I need to run all the processing on two dataframes
#If you were going to go the 80/20 route, this is how you would do it:
#from sklearn.model_selection import train_test_split
#train, test = train_test_split(data_set_bayes, test_size=0.2
#It automatically randomly selects 20% of the data

#Preparing the testing data:

#keeping only the relevant columns
data_set_np_test = data_set_test.loc[:,['text', 'depressed']]
#stripping the space from the otherwise empty tweet cells
data_set_np_test['text']=data_set_np_test['text'].astype("str")
data_set_np_test['text']=data_set_np_test['text'].map(str.strip)
#creating a filter that is selecting the not empty cells since the space has been stripped
filter = data_set_np_test["text"] != ""
#removing all the rows that have cells that are empty that aren't what the filter holds
data_set_np_test = data_set_np_test[filter]
data_set_np_test['text'] = [''.join([(re.sub('[^A-Za-z]', ' ', line)) for line in lists]).strip() for lists in data_set_np_test['text']]  
#Removing URLS and @ and twitter handles 
data_set_np_test['text'] = data_set_np_test['text'].apply(lambda x: re.sub(r"http\S+", "", x))    
data_set_np_test['text'] = data_set_np_test['text'].apply(lambda x: re.sub(r"@\S+", "", x))
from nltk.stem.porter import PorterStemmer
data_set_np_test['text'] = data_set_np_test['text'].apply(lambda x: re.sub('  ', ' ', x.lower()))

from nltk.stem.porter import PorterStemmer
#stemming the text
porter = PorterStemmer()
data_set_np_test['text'] = [porter.stem(word) for word in data_set_np_test['text']]

In [None]:
#Making sure that everything looks good:
data_set_np_test['text'][1]

In [None]:
#Preparing the training data:

data_set_bayes = data_set.loc[:,['depressed', 'text']]
#Removing URLS and @ and twitter handles 
data_set_bayes['text'] = data_set_bayes['text'].apply(lambda x: re.sub(r"http\S+", "", x))    
data_set_bayes['text'] = data_set_bayes['text'].apply(lambda x: re.sub(r"@\S+", "", x))

data_set_bayes['text'] = data_set_bayes['text'].apply(lambda x: re.sub('  ', ' ', x.lower()))

porter = PorterStemmer()
data_set_bayes['text'] = [porter.stem(word) for word in data_set_bayes['text']]

In [None]:
#Logistic regression
import os
import pandas as pd
import numpy as np

from sklearn.cross_validation import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression as LR
from sklearn.metrics import roc_auc_score as AUC
from sklearn.ensemble import RandomForestRegressor

In [None]:
#Checking that everything looks fine:
data_set_bayes['text'][5]

In [None]:
#The following, in effect, is curbing data leakage.
#Because I had so few people providing all of the tweets the algorithim was picking up on how the tweets of the people in the 
#categories differed, as opposed to how the categories were the same within each other and one category differed from the other
#That is, it was using the person as the feature
#through trial and error I realized that I can mitigate the impact by extracting the important features, 
#and just deleting from all the tweets the features that were probably unique to only a single person (because of how who they were differed from everyone else)
#I also didn't want to remove all the stop words, but realized that I did want to remove some of them
#These are most of the features that I removed:

train = data_set_bayes
test = data_set_np_test
train['text'] = train['text'].apply(lambda x: re.sub(r"homeschool", "", x))    
test['text'] = test['text'].apply(lambda x: re.sub(r"homeschool", "", x))
train['text'] = train['text'].apply(lambda x: re.sub(r"kid", "", x))    
test['text'] = test['text'].apply(lambda x: re.sub(r"kid", "", x))
train['text'] = train['text'].apply(lambda x: re.sub(r"kids", "", x))    
test['text'] = test['text'].apply(lambda x: re.sub(r"kids", "", x))
train['text'] = train['text'].apply(lambda x: re.sub(r"lily", "", x))    
test['text'] = test['text'].apply(lambda x: re.sub(r"lily", "", x))
train['text'] = train['text'].apply(lambda x: re.sub(r"lilly", "", x))    
test['text'] = test['text'].apply(lambda x: re.sub(r"lilly", "", x))
train['text'] = train['text'].apply(lambda x: re.sub(r"facebook", "", x))    
test['text'] = test['text'].apply(lambda x: re.sub(r"facebook", "", x))
train['text'] = train['text'].apply(lambda x: re.sub(r"marketing", "", x))    
test['text'] = test['text'].apply(lambda x: re.sub(r"marketing", "", x))
train['text'] = train['text'].apply(lambda x: re.sub(r"linkedin", "", x))    
test['text'] = test['text'].apply(lambda x: re.sub(r"linkedin", "", x))
train['text'] = train['text'].apply(lambda x: re.sub(r"dad", "", x))    
test['text'] = test['text'].apply(lambda x: re.sub(r"dad", "", x))
train['text'] = train['text'].apply(lambda x: re.sub(r"lebron", "", x))    
test['text'] = test['text'].apply(lambda x: re.sub(r"lebron", "", x))
train['text'] = train['text'].apply(lambda x: re.sub(r"buzzie", "", x))    
test['text'] = test['text'].apply(lambda x: re.sub(r"buzzie", "", x))
train['text'] = train['text'].apply(lambda x: re.sub(r"voiceover", "", x))    
test['text'] = test['text'].apply(lambda x: re.sub(r"voiceover", "", x))
train['text'] = train['text'].apply(lambda x: re.sub(r"stream", "", x))    
test['text'] = test['text'].apply(lambda x: re.sub(r"stream", "", x))
train['text'] = train['text'].apply(lambda x: re.sub(r"george", "", x))    
test['text'] = test['text'].apply(lambda x: re.sub(r"george", "", x))
train['text'] = train['text'].apply(lambda x: re.sub(r"xxx", "", x))    
test['text'] = test['text'].apply(lambda x: re.sub(r"xxx", "", x))
train['text'] = train['text'].apply(lambda x: re.sub(r"the", "", x))    
test['text'] = test['text'].apply(lambda x: re.sub(r"the", "", x))
train['text'] = train['text'].apply(lambda x: re.sub(r"it", "", x))    
test['text'] = test['text'].apply(lambda x: re.sub(r"it", "", x))
train['text'] = train['text'].apply(lambda x: re.sub(r"is", "", x))    
test['text'] = test['text'].apply(lambda x: re.sub(r"is", "", x))
train['text'] = train['text'].apply(lambda x: re.sub(r"in", "", x))    
test['text'] = test['text'].apply(lambda x: re.sub(r"in", "", x))
train['text'] = train['text'].apply(lambda x: re.sub(r"that", "", x))    
test['text'] = test['text'].apply(lambda x: re.sub(r"that", "", x))
train['text'] = train['text'].apply(lambda x: re.sub(r"than", "", x))    
test['text'] = test['text'].apply(lambda x: re.sub(r"than", "", x))

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfTransformer

In [None]:
#Random forest didn't work for me, but here is the code that I experimented with:
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline

#Verbose prints out information about progress of tree building
text_clf_rf = Pipeline([('vect', CountVectorizer(stop_words='english')), ('tfidf', TfidfTransformer()), ('clf', RandomForestRegressor(n_jobs=-1, verbose=2))])

text_clf_rf = text_clf_rf.fit(train.text, train.depressed)

# Performance of RF Classifier
import numpy as np
predicted = text_clf_rf.predict(test.text)
np.mean(predicted == test.depressed)

In [None]:
#Examining a Naive Bayes

from sklearn.naive_bayes import MultinomialNB

#Although it is possible to integrate these 3 lines into the pipeline so that you only have a single line of code
#I have found that in order to later extract features of interest from each step I have to do it this way
vect = CountVectorizer(max_features=5500, ngram_range=(1,1))
clf = MultinomialNB()
tfidf = TfidfTransformer(use_idf=False)

text_clf = Pipeline([('vect', vect), ('tfidf', tfidf), ('clf', clf)])

text_clf = text_clf.fit(train.text, train.depressed)

In [None]:
#plotting and saving a confusion matrix

from sklearn.metrics import confusion_matrix
confusion_matrix = confusion_matrix(test.depressed, predicted)
confusion_matrix
import seaborn as sn
import matplotlib.pyplot as plt
#cmap='binary' switch to make it BW
sn.set(font_scale=1.4)#for label size
sn.heatmap(confusion_matrix, annot=True,annot_kws={"size": 16},fmt='g', cmap='coolwarm')# font size)
plt.savefig('confusion_matrix.png', bbox_inches='tight')#to avoid white borders
plt.show()

In [None]:
#Saving the model:
import pickle
filename_nb_depression_model = '/home/nb_depression_model.sav'
pickle.dump(text_clf, open(filename_nb_depression_model, 'wb'))

In [None]:
#Loading the model: 
import pickle
from sklearn.naive_bayes import MultinomialNB
filename_nb_depression_model = '/home/nb_depression_model.sav'
nb_clf = pickle.load(open(filename_nb_depression_model, 'rb'))

In [None]:
#To have the model predict new test put it in the Xnew
Xnew = ['Put Text You Want Analyzed Here']
        # make a prediction
ynew = nb_clf.predict_proba(Xnew)
# show the inputs and predicted probabilities
for i in range(len(Xnew)):
    print("Percentage match to depressed people's tweets=%s" % ((ynew[0][1])*100))
    print("Percentage match to non-depressed people's tweets=%s" % ((ynew[0][0])*100))

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LogisticRegressionCV

vect = CountVectorizer(max_features=5000, ngram_range=(1, 2))
#If you want to run a regular regression instead of a CV just erase those two letters
clf = LogisticRegressionCV(max_iter=100)
tfidf = TfidfTransformer(use_idf=True)

text_clf_lr = Pipeline([('vect', vect), ('tfidf', tfidf), ('clf', clf)])
#text_clf = Pipeline([('vect', CountVectorizer(stop_words='english')), ('tfidf', TfidfTransformer()), ('clf', MultinomialNB(fit_prior=False))])

text_clf_lr = text_clf_lr.fit(train.text, train.depressed)

import numpy as np
predicted = text_clf_lr.predict(test1.text)
np.mean(predicted == test1.depressed)

In [None]:
#Confusion matrix for the LR model:
from sklearn.metrics import confusion_matrix
confusion_matrix = confusion_matrix(test.depressed, predicted)
confusion_matrix

In [None]:
#plotting the confusion matrix as a heatmap
from sklearn.metrics import confusion_matrix
confusion_matrix = confusion_matrix(test.depressed, predicted)
confusion_matrix
import seaborn as sn
import matplotlib.pyplot as plt
#cmap='binary' switch to make it BW
sn.set(font_scale=1.4)#for label size
sn.heatmap(confusion_matrix, annot=True,annot_kws={"size": 16},fmt='g', cmap='coolwarm')# font size)
plt.savefig('lr_confusion_matrix.png', bbox_inches='tight')
plt.show()

In [None]:
#Extracting the top 20 most influencial features (aka words) from the model:
def show_most_informative_features(vect, clf, n=20):
    feature_names = vect.get_feature_names()
    coefs_with_fns = sorted(zip(clf.coef_[0], feature_names))
    top = zip(coefs_with_fns[:n], coefs_with_fns[:-(n + 1):-1])
    for (coef_1, fn_1), (coef_2, fn_2) in top:
        print("\t%.4f\t%-15s\t\t%.4f\t%-15s" % (coef_1, fn_1, coef_2, fn_2))
        
show_most_informative_features(vect, clf, n=20)

In [None]:
#Saving the model
from sklearn.externals import joblib
joblib.dump(text_clf_lr, 'model.pkl')

In [None]:
#Saving the model and loading it:
lr_clf = pickle.dumps(text_clf_lr)
lr_clf = pickle.loads(lr_clf)

In [None]:
#Another way to save and load the model:
import pickle
=filename_lr_depression_model = 'lr_depression_model.sav'
pickle.dump(text_clf_lr, open(filename_lr_depression_model, 'wb'))

import pickle
from sklearn.linear_model import LogisticRegression
lr_clf = pickle.load(open(filename_lr_depression_model, 'rb'))

In [None]:
#To have the model predict new test put it in the Xnew
Xnew = ['Put Text You Want Analyzed Here']
        # make a prediction
ynew = lr_clf.predict_proba(Xnew)
# show the inputs and predicted probabilities
for i in range(len(Xnew)):
    print("Percentage match to depressed people's tweets=%s" % ((ynew[0][1])*100))
    print("Percentage match to non-depressed people's tweets=%s" % ((ynew[0][0])*100))

In [None]:
#SVM model

from sklearn.linear_model import SGDClassifier

vect = CountVectorizer(max_features=5000, ngram_range=(1, 1))
#because there are only 2 outcomes loss needs to be set to log
clf = SGDClassifier(loss='log', penalty='l2', alpha=0.0009, n_iter=35, random_state=42)
tfidf = TfidfTransformer(use_idf=False)

text_clf_svm = Pipeline([('vect', vect), ('tfidf', tfidf), ('clf', clf)])

text_clf_svm = text_clf_svm.fit(train.text, train.depressed)
predicted_svm = text_clf_svm.predict(test.text)
np.mean(predicted_svm == test.depressed)

In [None]:
#SVM performance metrics
from sklearn import metrics
print(metrics.classification_report(test.depressed, predicted_svm))

In [None]:
#Graphing and calculating ROC curve
fpr, tpr, threshold = metrics.roc_curve(test.depressed, predicted_svm)
roc_auc = metrics.auc(fpr, tpr)
roc_auc

import matplotlib.pyplot as plt
plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

In [None]:
#What the dictionary keys for the top 5 words in the dictionary look like
#Play with max_features to visualize more words
#play with the ngrams to look not only at single words but also/or groups of 2 or 3 words
vect = CountVectorizer(max_features=5, ngram_range=(1, 1))
vect.fit(train.text)
print(vect.vocabulary_)

In [None]:
#Take those 5 words and make a dictionary arry out of them, and visualize what the matrix looks like
vector = vect.transform(train.text)
print(vector.shape)
print(type(vector))
print(vector.toarray())

In [None]:
#see what happens when you apply TF to the matrix
#To check out IDF as well set use_idf to true
tfidf_transformer = TfidfTransformer(use_idf=False)
X_train_tfidf = tfidf_transformer.fit_transform(vector)
print(X_train_tfidf.toarray())

In [None]:
text_clf_svm.steps

In [None]:
#extracting the top 65 features of the models:
feature_names = vect.get_feature_names()
coefs_with_fns = sorted(zip(clf.coef_[0], feature_names))
#20 is the number of features we want extracted
top = zip(coefs_with_fns[:65], coefs_with_fns[:-(65 + 1):-1])
for (coef_1, fn_1), (coef_2, fn_2) in top:
        print("\t%.4f\t%-15s\t\t%.4f\t%-15s" % (coef_1, fn_1, coef_2, fn_2))

In [None]:
#heatmap confusion matrix
from sklearn.metrics import confusion_matrix
confusion_matrix = confusion_matrix(test.depressed, predicted_svm)
confusion_matrix
import seaborn as sn
import matplotlib.pyplot as plt
#cmap='binary' switch to make it BW
sn.set(font_scale=1.4)#for label size
sn.heatmap(confusion_matrix, annot=True,annot_kws={"size": 16},fmt='g', cmap='coolwarm')# font size)
plt.savefig('lr_confusion_matrix.png', bbox_inches='tight')
plt.show()

In [None]:
#Grid search wasn't appropriate for me because of my data leakage problem, 
#But it's usually a good way to figure out how to tune the hyperparameters
#I am extracting both how to tune the parameters
#As well as what the highest validation score that I can expect is

#fine tuning NB
from sklearn.model_selection import GridSearchCV
#creating a list of parameters for which we would like to do performance tuning. 
#All the parameters name start with the classifier name 
parameters = {'vect__ngram_range': [(1, 1), (1, 2), (1, 3), (2, 3), (2, 2), (3,3)],
               'tfidf__use_idf': (True, False),
               'clf__alpha': (1e-2, 1e-3),}
#we are telling to use unigram and bigrams and choose the one which is optimal
gs_clf = GridSearchCV(text_clf, parameters, n_jobs=-1)
gs_clf = gs_clf.fit(train.text, train.depressed)
#to see the best params
gs_clf.best_params_

In [None]:
#to see the best mean score
gs_clf.best_score_

In [None]:
#Fine tuning lr
from sklearn.model_selection import GridSearchCV
parameters_lr = {'vect__ngram_range': [(1, 1), (1, 2), (2, 2), (1, 3), (2, 3), (3, 3)],
               'tfidf__use_idf': (True, False),}
gs_clf_lr = GridSearchCV(text_clf_lr, parameters_lr, n_jobs=-1)
gs_clf_lr = gs_clf_lr.fit(train.text, train.depressed)
gs_clf_lr.best_params_

In [None]:
#to see the best mean score
gs_clf_lr.best_score_

In [None]:
#Fine tuning SVM
from sklearn.model_selection import GridSearchCV
parameters_svm = {'vect__ngram_range': [(1, 1), (1, 2)],
               'tfidf__use_idf': (True, False),
               'clf-svm__alpha': (1e-2, 1e-3, 1e-4, 1e-5, 1e-6),}
gs_clf_svm = GridSearchCV(text_clf_svm, parameters_svm, n_jobs=-1)
gs_clf_svm = gs_clf_svm.fit(train.text, train.depressed)
gs_clf_svm.best_params_

In [None]:
gs_clf_svm.best_score_

In [None]:
#The neural network was probably overfitting badly because of the data leakage but it was fun to play with
#Checking that TensorFlow works
import tensorflow as tf
hello = tf.constant('Hello, TensorFlow!')
sess = tf.Session()
print(sess.run(hello))
#Should print out Hellow TensorFlow if it is working

In [None]:
#Taken from https://vgpena.github.io/classifying-tweets-with-keras-and-tensorflow/

import numpy as np

data_set_np= data_set.loc[:,['text', 'depressed']]
training = data_set_np.as_matrix()

In [None]:
#create our training data from the tweets
train_x = [x[0] for x in training]

In [None]:
# index all the tweets
train_y = np.asarray([x[1] for x in training])

In [None]:
import json
import keras
import keras.preprocessing.text as kpt
from keras.preprocessing.text import Tokenizer

In [None]:
# only work with the 7000 most popular words found in our dataset
max_words = 7000
# create a new Tokenizer
tokenizer = Tokenizer(num_words=max_words)
# feed our tweets to the Tokenizer
tokenizer.fit_on_texts(train_x)

# Tokenizers come with a convenient list of words and IDs
dictionary = tokenizer.word_index
# Let's save this out so we can use it later
with open('dictionary.json', 'w') as dictionary_file_nn:
    json.dump(dictionary, dictionary_file_nn)


In [None]:
def convert_text_to_index_array(text):
    # one really important thing that `text_to_word_sequence` does
    # is make all texts the same length -- in this case, the length
    # of the longest text in the set.
    return [dictionary[word] for word in kpt.text_to_word_sequence(text)]

allWordIndices = []
# for each tweet, change each token to its ID in the Tokenizer's word_index
for text in train_x:
    wordIndices = convert_text_to_index_array(text)
    allWordIndices.append(wordIndices)

In [None]:
# now we have a list of all tweets converted to index arrays.
# cast as an array for future usage.
allWordIndices = np.asarray(allWordIndices)

# create one-hot matrices out of the indexed tweets
train_x = tokenizer.sequences_to_matrix(allWordIndices, mode='binary')
# treat the labels as categories
train_y = keras.utils.to_categorical(train_y, 2)

In [None]:
from keras.layers import Dense
from keras.layers import Dropout
from keras.models import Sequential

In [None]:
#Keras’ Sequential() is a simple type of neural net that consists of a “stack” of layers executed in order
#The input and output layers are the most important, since they determine the overall shape of the neural net. 
#Out network will mostly consist of Dense layers — the “standard”, linear neural net layer of inputs, weights, and outputs.
#In our case, we’re inputting a sentence that will be converted to a one-hot matrix of length max_words — here, 3000. 
#We also include how many outputs we want to come out of that layer (512, for funsies) 
#and what kind of maximization (or “activation”) function to use.
#Activation functions are used when training the network; 
#they tell the network how to judge when a weight for a particular node has created a good fit.
# Activation functions differ, mostly in speed, but all the ones available in Keras and TensorFlow are viable; feel free to play around with them. 
#If you don’t explicitly add an activation function, that layer will use a linear one.
#Our output layer consists of two possible outputs, since that’s how many categories our data could get sorted into. 
#If you use a neural net to predict rather than classify, you’re actually creating a neural net with one possible output — the prediction.
model = Sequential()

In [None]:
#input_dim is set to the max words
model.add(Dense(512, activation='relu', input_dim=7000))

In [None]:
#Dropouts are used to randomly remove data, which can help avoid overfitting.
model.add(Dropout(0.5))

In [None]:
model.add(Dense(256, activation='sigmoid'))

In [None]:
model.add(Dropout(0.5))

In [None]:
model.add(Dense(2, activation='softmax'))

In [None]:
#compiling the network:
model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
#fitting (training) our model off of inputs train_x and categories train_y
history = model.fit(train_x, train_y,
#We evaluate data in groups of batch_size, checking the network’s accuracy, tweaking node weights, and then running through another batch. 
#Small batches let you train networks much more quickly than if you tried to use a batch the size of your entire training dataset.
  batch_size=128,
#epochs is how many times you do this batch-by-batch splitting. I’ve found 5 to be good in this case; I tried 7, but ended up overfitting
  epochs=5,
  verbose=1,
#validation_split says how much of your input you want to be reserved for testing data
#essential for seeing how accurate your network is at that point. 
#Recommended training-to-test ratios are 80:20 or 90:10.
  validation_split=0.1,
  shuffle=True)
#The value to watch is not acc but val_acc, or Validation Accuracy. 
#This is your neural net's score when predicting values for data in your validation split
#Your accuracy should start out low per epoch and rise throughout the epoch; it should increase at least a little across epochs. 
#If your accuracy starts decreasing, you’re overfitting.

In [None]:
score = model.evaluate(x_test, y_test,
                       batch_size=batch_size, verbose=1)
print('Test score:', score[0])
print('Test accuracy:', score[1])

In [None]:
#Saving the model so that you don’t have to keep repeating all of those steps
#Your model gets saved in two parts: One is the model’s structure itself
model_json_nn = model.to_json()
with open('model.json', 'w') as json_file:
    json_file.write(model_json_nn)
#other is the weights used in those model’s nodes.
model.save_weights('model_nn.h5')

In [None]:
import json
import numpy as np
import keras
import keras.preprocessing.text as kpt
from keras.preprocessing.text import Tokenizer
from keras.models import model_from_json

In [None]:
# we're still going to use a Tokenizer here, but we don't need to fit it
tokenizer = Tokenizer(num_words=7000)
# for human-friendly printing
labels = ['not depressed', 'depressed']

In [None]:
# read in our saved dictionary
with open('dictionary.json', 'r') as dictionary_file_nn:
    dictionary = json.load(dictionary_file_nn)

# this utility makes sure that all the words in your input
# are registered in the dictionary
# before trying to turn them into a matrix.
def convert_text_to_index_array(text):
    words = kpt.text_to_word_sequence(text)
    wordIndices = []
    for word in words:
        if word in dictionary:
            wordIndices.append(dictionary[word])
        else:
            print("'%s' not in training corpus; ignoring." %(word))
    return wordIndices

In [None]:
# read in your saved model structure
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
# and create a model from that
model = model_from_json(loaded_model_json)
# and weight your nodes with your saved values
model.load_weights('model_nn.h5')

In [None]:
# okay here's the interactive part
while 1:
    evalSentence = input('Input a sentence to be evaluated, or Enter to quit: ')

    if len(evalSentence) == 0:
        break

    # format your input for the neural net
    testArr = convert_text_to_index_array(evalSentence)
    inputed_text = tokenizer.sequences_to_matrix([testArr], mode='binary')
    # predict which bucket your input belongs in
    pred = model.predict(inputed_text)
    # and print it for the humons
    print("%s sentiment; %f%% confidence" % (labels[np.argmax(pred)], pred[0][np.argmax(pred)] * 100))

In [None]:
#If you have issues running the above section more than once try:
reset_selective inputed_text