In [21]:
#Import Libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics import precision_score, \
    recall_score, confusion_matrix, classification_report, \
    accuracy_score, f1_score, plot_confusion_matrix
from sklearn_pandas import DataFrameMapper
import csv
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import fasttext
import joblib
import copy

## Import Datasets

In [5]:
columns = [
    "id",
    "label",
    "alpha",
    "text"
]

train = pd.read_csv('/Users/julian.hicks/Documents/mids/w207ml/W207FinalProject/datasets/train.tsv', names=columns, sep="\t")
test = pd.read_csv('/Users/julian.hicks/Documents/mids/w207ml/W207FinalProject/datasets/test.tsv', names=columns, sep="\t")
dev = pd.read_csv('/Users/julian.hicks/Documents/mids/w207ml/W207FinalProject/datasets/dev.tsv', names=columns, sep="\t")

train

Unnamed: 0,id,label,alpha,text
0,0,0,a,What is the most effective classroom managemen...
1,1,0,a,Can I study abroad after 10th class from Bangl...
2,2,0,a,How can I make friends as a college junior?
3,3,0,a,How do I download free APK Minecraft: Pocket E...
4,4,0,a,"Like Kuvera, is ""Groww"" also a free online inv..."
...,...,...,...,...
1044892,1044892,0,a,How is a video similar to ordinary graphics?
1044893,1044893,0,a,How does training with a speed bag make you a ...
1044894,1044894,0,a,What marketing strategies are implemented to i...
1044895,1044895,0,a,What are the characteristics of wireless adapt...


### Set Up `TextObject`s and Transform as Appropriate for Each Model

In [4]:
from nltk.stem.porter import PorterStemmer
import numpy as np
import pandas as pd
import nltk as nltk
import re
import gensim 
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from imblearn.over_sampling import SMOTE, RandomOverSampler

class TextObject(pd.DataFrame):
    """
    A collection of strings for processing. Is a DataFrame with specific 
    columns. 
    """

    def __init__(self):
        super().__init__(columns=[
            "original_text",
            "text",
            "label"
        ]
        )
        self._attrs["applied_transformations"] = []
        nltk.download("stopwords")
        nltk.download("punkt")

    def add_text(self, text_collection, labels=None):
        """
        Adds a collection of text to to the TextObject. Accepts an iterable or 
        array-like of all text strings. Optionally, accepts labels of the same 
        length. Modifies self to be a pandas DataFrame with column "text" and 
        potentially "labels".
        """
        self["original_text"] = text_collection
        self["text"] = self.original_text
        self._attrs["applied_transformations"] = []
        if type(labels) == type(None):
            return
        if len(labels) == len(self):
            self["label"] = labels

    # Text Processing Methods:

    def text_processing(self, function):
        """
        Applies a generic text processing function to the self.text field.
        """
        self.text = self.text.apply(function)
        self._attrs["applied_transformations"].append(function.__name__)

    def lower(self):
        self.text = self.text.str.lower()
        self._attrs["applied_transformations"].append("lower")
    
    def strip(self):
        self.text = self.text.str.strip()
        self._attrs["applied_transformations"].append("strip")

    def remove_single_digits(self):
        self.text = self.text.apply(lambda text: re.sub("([\d]+)", "", text))
        self._attrs["applied_transformations"].append("remove_single_digits")
    
    def remove_nonletter_chars(self):
        self.text = self.text.apply(lambda text: re.sub("[^A-Za-z0-9 \\n]", " ", text))
        self._attrs["applied_transformations"].append("remove_nonletter_chars")

    def stop_word_tokenize(self):
        def tokenize(text):
            self.stoplist = nltk.corpus.stopwords.words('english')
            finalTokens = []
            tokens = nltk.word_tokenize(text)
            for w in tokens:
                if (w not in self.stoplist):
                    finalTokens.append(w)
            text = " ".join(finalTokens)
            return text
        self.text = self.text.apply(tokenize)
        self._attrs["applied_transformations"].append("stop_word_tokenize")
    
    def stem_sentence(self):
        def stem(text):
            porter=PorterStemmer()
            token_words=nltk.tokenize.word_tokenize(text)
            token_words
            stem_sentence=[]
            for word in token_words:
                stem_sentence.append(porter.stem(word))
                stem_sentence.append(" ")
            return "".join(stem_sentence)
        self.text = self.text.apply(stem)
        self._attrs["applied_transformations"].append("stem_sentence")

    def lemmatize_sentence(self):
        def lem(text):
            wordnet_lemmatizer = nltk.stem.WordNetLemmatizer()
            #token_words
            token_words=nltk.tokenize.word_tokenize(text)
            lemm_sentence=[]
            for word in token_words:
                lemm_sentence.append(wordnet_lemmatizer.lemmatize(word))
                lemm_sentence.append(" ")
            return "".join(lemm_sentence)
        self.text = self.text.apply(lem)
        self._attrs["applied_transformations"].append("lemmatize_sentence")

    def process_all(self, lower=True, strip=True, remove_single_digits =True, 
        remove_nonletter_chars=True, stop_word_tokenize=True, stem_sentence=True,
        lemmatize_sentence=True):
        if lower:
            self.lower()
        if strip:
            self.strip()
        if remove_single_digits:
            self.remove_single_digits()
        if remove_nonletter_chars:
            self.remove_nonletter_chars()
        if stop_word_tokenize:
            self.stop_word_tokenize()
        if stem_sentence:
            self.stem_sentence()
        if lemmatize_sentence:
            self.lemmatize_sentence()

In [18]:
def createTextObject(df):
    out = TextObject()
    out.add_text(df.text, df.label)
    return out

train = createTextObject(train)
test = createTextObject(test)
dev = createTextObject(dev)

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/julian.hicks/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/julian.hicks/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/julian.hicks/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/julian.hicks/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/julian.hicks/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/julian.hicks/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


### Logit Model 
This model uses a CountVectorizer and no text transformations. Since we've saved down the model using joblib, we can just load it here:

In [30]:
lr_model = joblib.load("lrmodel.joblib")
lr_thresh = joblib.load("lr_ot.joblib")
lr_vect = joblib.load("lr_cv.joblib")
lr_model

LogisticRegression(C=1, max_iter=1000)

In [45]:
lr_train = lr_vect.transform(train.text)
lr_test = lr_vect.transform(test.text)
lr_dev = lr_vect.transform(dev.text)

### Gradient-Boosted Tree
This model uses a TF-IDF Vectorizer and a number of transformations: `.lower()`, `.strip()`, `.remove_single_digits()`, `.remove_nonletter_chars()` & `.stop_word_tokenize()`
We will load the model, and vectorizer and set up the right transformed `TextObject`s

In [31]:
gb_model = joblib.load("gbmodel.joblib")
gb_thresh = joblib.load("gb_ot.joblib")
gb_vect = joblib.load("gb_tv.joblib")

In [50]:

gb_train = createTextObject(train)
gb_test  = createTextObject(test)
gb_dev   = createTextObject(dev)

print(type(gb_train))

gb_train.lower()
gb_train.strip()
gb_train.remove_single_digits()
gb_train.remove_nonletter_chars()
gb_train.stop_word_tokenize()

for m in gb_train._attrs["applied_transformations"]:
    getattr(gb_dev, m)()
    getattr(gb_test, m)()

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/julian.hicks/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/julian.hicks/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/julian.hicks/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/julian.hicks/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/julian.hicks/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/julian.hicks/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


<class '__main__.TextObject'>


  self.stoplist = nltk.corpus.stopwords.words('english')


In [51]:
gb_train_v = gb_vect.transform(gb_train.text)
gb_test_v  = gb_vect.transform(gb_test.text)
gb_dev_v   = gb_vect.transform(gb_dev.text)

### Naive Bayes

We also build a Naive Bayes model, which had similar transformations to our Gradient-Boosted Classifier. 

In [34]:
nb_model = joblib.load("nb_model.joblib")
nb_vect = joblib.load("nb_cv.joblib")
nb_thresh = joblib.load("nb_ot.joblib")

In [52]:
nb_train = nb_vect.transform(gb_train.text)
nb_test  = nb_vect.transform(gb_test.text)
nb_dev   = nb_vect.transform(gb_dev.text)

### FastText 
FastText is a pre-trained model meant to help with lightweight text-classification. We attempted both a transformed and untransformed methodology with this model, and got better results with the untransformed model. We will use that version, and therefore no major changes.

In [32]:
ft_model = fasttext.load_model("ft_model_orig.bin")
ft_thresh = joblib.load("ft_ot.joblib")



We also need to munge the outputs to get a decent result

In [62]:
def get_ft_preds(text_iter):
    preds = []
    for i in range(len(text_iter)):
        pred = ft_model.predict(re.sub('\\n','',text_iter[i]))
        if int(list(pred[0][0]).pop()) == 1:
            preds.append(pred[1][0])
        else:
            preds.append(1-pred[1][0])
    return preds

### Recurrent Neural Network
We trained a bespoke RNN to see if we could get strong performance. The vectorizer here is embedded in the input layer of the model. 

We found that a threshold of 0.80 worked well. 

In [33]:
rn_model = keras.models.load_model('quora_rnn')
rn_thresh = 0.80
rn_model

<tensorflow.python.keras.engine.sequential.Sequential at 0x7fee1912d070>

### BERT Model
We additionally trained a BERT-architecture model, which performed very well. 

## Combining our Models

There are a number of methodologies for combining models in an ensemble. Here, we're going to try three different methods:
1. Modal (Voting)
2. Mean Probability
3. Simple 2-Layer Network Combination

### Modal (Voting)

Here we will take the raw predictions from each model to determine the right selection. 

In [60]:
train_df = pd.DataFrame(train)
train_df["lr_prob"] = [i[1] for i in lr_model.predict_proba(lr_train)]
train_df["gb_prob"] = [i[1] for i in gb_model.predict_proba(gb_train_v)]
train_df["nb_prob"] = [i[1] for i in nb_model.predict_proba(nb_train)]
train_df["ft_prob"] = get_ft_preds(train.text)
train_df["rn_prob"] = rn_model.predict(train.text)
train_df

Unnamed: 0,original_text,text,label,lr_prob,gb_prob,nb_prob,ft_prob,rn_prob
0,What is the most effective classroom managemen...,What is the most effective classroom managemen...,0,0.000555,0.039844,9.134136e-07,0.999542,-1.653022
1,Can I study abroad after 10th class from Bangl...,Can I study abroad after 10th class from Bangl...,0,0.005821,0.063862,9.495923e-03,0.999979,-1.111974
2,How can I make friends as a college junior?,How can I make friends as a college junior?,0,0.004117,0.121463,9.901345e-03,0.999745,-0.571558
3,How do I download free APK Minecraft: Pocket E...,How do I download free APK Minecraft: Pocket E...,0,0.000692,0.149042,1.056736e-11,1.000005,-2.614906
4,"Like Kuvera, is ""Groww"" also a free online inv...","Like Kuvera, is ""Groww"" also a free online inv...",0,0.000191,0.083766,6.679665e-09,0.999964,-11.722013
...,...,...,...,...,...,...,...,...
1044892,How is a video similar to ordinary graphics?,How is a video similar to ordinary graphics?,0,0.003105,0.176345,4.799263e-03,0.999970,-0.391307
1044893,How does training with a speed bag make you a ...,How does training with a speed bag make you a ...,0,0.005284,0.120387,2.182861e-03,0.994148,-0.420259
1044894,What marketing strategies are implemented to i...,What marketing strategies are implemented to i...,0,0.000960,0.170836,4.564579e-11,0.999723,-3.145134
1044895,What are the characteristics of wireless adapt...,What are the characteristics of wireless adapt...,0,0.002340,0.150778,1.079880e-05,1.000007,-0.734787


In [64]:
test_df = pd.DataFrame(test)
test_df["lr_prob"] = [i[1] for i in lr_model.predict_proba(lr_test)]
test_df["gb_prob"] = [i[1] for i in gb_model.predict_proba(gb_test_v)]
test_df["nb_prob"] = [i[1] for i in nb_model.predict_proba(nb_test)]
test_df["ft_prob"] = get_ft_preds(test.text)
test_df["rn_prob"] = rn_model.predict(test.text)
test_df

Unnamed: 0,original_text,text,label,lr_prob,gb_prob,nb_prob,ft_prob,rn_prob
0,Does my BDS degree from India count if I want ...,Does my BDS degree from India count if I want ...,0,0.000071,0.224557,9.497170e-09,0.000098,-3.479209
1,Is there such a thing as an average face?,Is there such a thing as an average face?,0,0.035331,0.201973,4.246348e-01,0.007789,0.251670
2,Is Munich Volkshochschule a good choice to lea...,Is Munich Volkshochschule a good choice to lea...,0,0.021947,0.153941,1.355363e-03,0.004811,0.076729
3,What is The relationship between Texas and its...,What is The relationship between Texas and its...,0,0.007457,0.276170,6.619439e-01,0.001019,-0.625006
4,Will the current mining equipment work with se...,Will the current mining equipment work with se...,0,0.005579,0.100898,1.270251e-05,-0.000007,-1.466719
...,...,...,...,...,...,...,...,...
130607,What's the best way to teach your daughter to ...,What's the best way to teach your daughter to ...,0,0.041944,0.180011,4.178595e-02,0.020944,0.561008
130608,What are similarities and differences between ...,What are similarities and differences between ...,0,0.004699,0.110094,9.151326e-03,0.001823,-1.285976
130609,What is the best shoe cleaning product for tri...,What is the best shoe cleaning product for tri...,0,0.003072,0.192046,6.060790e-03,0.000542,0.658570
130610,Can I get a private medical college with 320 m...,Can I get a private medical college with 320 m...,0,0.001279,0.049469,5.716538e-07,0.000184,-10.727527


In [65]:
dev_df = pd.DataFrame(dev)
dev_df["lr_prob"] = [i[1] for i in lr_model.predict_proba(lr_dev)]
dev_df["gb_prob"] = [i[1] for i in gb_model.predict_proba(gb_dev_v)]
dev_df["nb_prob"] = [i[1] for i in nb_model.predict_proba(nb_dev)]
dev_df["ft_prob"] = get_ft_preds(dev.text)
dev_df["rn_prob"] = rn_model.predict(dev.text)
dev_df

Unnamed: 0,original_text,text,label,lr_prob,gb_prob,nb_prob,ft_prob,rn_prob
0,Where can I find best romantic shayaris?,Where can I find best romantic shayaris?,1,0.003461,0.066572,1.537171e-03,-1.192093e-07,-0.226986
1,When we check the UPSC rank list; many of the ...,When we check the UPSC rank list; many of the ...,0,0.003836,0.127764,8.641639e-07,1.839083e-02,-0.060640
2,Is there is any web site to create search engi...,Is there is any web site to create search engi...,0,0.003249,0.115049,1.516661e-04,2.480149e-04,-1.356835
3,How can you earn $50 just clicking ads?,How can you earn $50 just clicking ads?,0,0.002562,0.184455,1.987959e-02,-8.940697e-06,-0.865699
4,Which 3 branches of engineering have the most ...,Which 3 branches of engineering have the most ...,0,0.000199,0.051391,3.259093e-06,4.172325e-06,-2.535836
...,...,...,...,...,...,...,...,...
130608,How can the ViewSonic PA503S 3600 lumens SVGA ...,How can the ViewSonic PA503S 3600 lumens SVGA ...,0,0.001746,0.118619,6.697781e-13,1.001358e-05,-0.661709
130609,What are the biggest myths about Adolf Hitler?,What are the biggest myths about Adolf Hitler?,0,0.045431,0.534086,7.244850e-01,2.488196e-03,0.663470
130610,What song played in the movie of the gifted wh...,What song played in the movie of the gifted wh...,0,0.008959,0.131536,9.977263e-01,1.437485e-03,-0.627733
130611,"What do Socrates, Thomas Kuhn and Karl Popper ...","What do Socrates, Thomas Kuhn and Karl Popper ...",0,0.008071,0.233031,4.444433e-01,4.540682e-04,0.040162


Now we can "vote" among the methods to determine the right class.

In [68]:
def modal_prediction(row):
    votes = 0
    if row.lr_prob >= lr_thresh:
        votes +=  1
    else:
        votes += -1
    if row.gb_prob >= gb_thresh:
        votes +=  1
    else:
        votes += -1
    if row.nb_prob >= nb_thresh:
        votes +=  1
    else:
        votes += -1
    if row.ft_prob >= ft_thresh:
        votes +=  1
    else:
        votes += -1
    if row.rn_prob >= rn_thresh:
        votes +=  1
    else:
        votes += -1
    
    if votes > 0:
        return 1
    else:
        return 0


dev_df["modal_pred"] = dev_df.apply(modal_prediction, axis=1)
dev_df

Unnamed: 0,original_text,text,label,lr_prob,gb_prob,nb_prob,ft_prob,rn_prob,modal_pred
0,Where can I find best romantic shayaris?,Where can I find best romantic shayaris?,1,0.003461,0.066572,1.537171e-03,-1.192093e-07,-0.226986,0
1,When we check the UPSC rank list; many of the ...,When we check the UPSC rank list; many of the ...,0,0.003836,0.127764,8.641639e-07,1.839083e-02,-0.060640,0
2,Is there is any web site to create search engi...,Is there is any web site to create search engi...,0,0.003249,0.115049,1.516661e-04,2.480149e-04,-1.356835,0
3,How can you earn $50 just clicking ads?,How can you earn $50 just clicking ads?,0,0.002562,0.184455,1.987959e-02,-8.940697e-06,-0.865699,0
4,Which 3 branches of engineering have the most ...,Which 3 branches of engineering have the most ...,0,0.000199,0.051391,3.259093e-06,4.172325e-06,-2.535836,0
...,...,...,...,...,...,...,...,...,...
130608,How can the ViewSonic PA503S 3600 lumens SVGA ...,How can the ViewSonic PA503S 3600 lumens SVGA ...,0,0.001746,0.118619,6.697781e-13,1.001358e-05,-0.661709,0
130609,What are the biggest myths about Adolf Hitler?,What are the biggest myths about Adolf Hitler?,0,0.045431,0.534086,7.244850e-01,2.488196e-03,0.663470,0
130610,What song played in the movie of the gifted wh...,What song played in the movie of the gifted wh...,0,0.008959,0.131536,9.977263e-01,1.437485e-03,-0.627733,0
130611,"What do Socrates, Thomas Kuhn and Karl Popper ...","What do Socrates, Thomas Kuhn and Karl Popper ...",0,0.008071,0.233031,4.444433e-01,4.540682e-04,0.040162,0


In [70]:
print(classification_report(dev_df.label, dev_df.modal_pred))

              precision    recall  f1-score   support

           0       0.98      0.97      0.97    122465
           1       0.57      0.68      0.62      8148

    accuracy                           0.95    130613
   macro avg       0.77      0.82      0.80    130613
weighted avg       0.95      0.95      0.95    130613



We can see that this returns an F1 score of ~62%, which is equal to what we get on Logistic Regression. Let's try another method and see if we get better results.

### Mean Probability

If we average the probabilities of all methods, we may get a more accurate prediction. We will need to standardize the RNN-probabilities though, since it currently outputs values above 1 and below 0. We can do this by simply setting a floor and cieling on this field. 

In [88]:
def mean_proba(row):
    return sum([
        row.lr_prob,
        row.gb_prob,
        row.nb_prob,
        row.ft_prob,
        max(min(row.rn_prob,1),0)
    ])

dev_df["mean_prob"] = dev_df.apply(mean_proba, axis=1)

dev_df

Unnamed: 0,original_text,text,label,lr_prob,gb_prob,nb_prob,ft_prob,rn_prob,modal_pred,mean_prob
0,Where can I find best romantic shayaris?,Where can I find best romantic shayaris?,1,0.003461,0.066572,1.537171e-03,-1.192093e-07,-0.226986,0,0.071570
1,When we check the UPSC rank list; many of the ...,When we check the UPSC rank list; many of the ...,0,0.003836,0.127764,8.641639e-07,1.839083e-02,-0.060640,0,0.149992
2,Is there is any web site to create search engi...,Is there is any web site to create search engi...,0,0.003249,0.115049,1.516661e-04,2.480149e-04,-1.356835,0,0.118697
3,How can you earn $50 just clicking ads?,How can you earn $50 just clicking ads?,0,0.002562,0.184455,1.987959e-02,-8.940697e-06,-0.865699,0,0.206888
4,Which 3 branches of engineering have the most ...,Which 3 branches of engineering have the most ...,0,0.000199,0.051391,3.259093e-06,4.172325e-06,-2.535836,0,0.051597
...,...,...,...,...,...,...,...,...,...,...
130608,How can the ViewSonic PA503S 3600 lumens SVGA ...,How can the ViewSonic PA503S 3600 lumens SVGA ...,0,0.001746,0.118619,6.697781e-13,1.001358e-05,-0.661709,0,0.120375
130609,What are the biggest myths about Adolf Hitler?,What are the biggest myths about Adolf Hitler?,0,0.045431,0.534086,7.244850e-01,2.488196e-03,0.663470,0,1.969961
130610,What song played in the movie of the gifted wh...,What song played in the movie of the gifted wh...,0,0.008959,0.131536,9.977263e-01,1.437485e-03,-0.627733,0,1.139659
130611,"What do Socrates, Thomas Kuhn and Karl Popper ...","What do Socrates, Thomas Kuhn and Karl Popper ...",0,0.008071,0.233031,4.444433e-01,4.540682e-04,0.040162,0,0.726161


Let's apply our standard grid search to this probability.

In [89]:
def frange(start, stop, step):
  i = start
  while i < stop:
    yield i
    i += step

columnNames = ['threshold', 'f1Score', 'accuracy', 'recall', 'precision']
thresholdDF = pd.DataFrame(columns=columnNames)

for i in frange(0.05,1,0.05):
    pred2 = np.where(dev_df['mean_prob'] > i, 1, 0)
    f1Score = f1_score(dev_df.label, pred2)
    accuracy = accuracy_score(dev_df.label, pred2)
    recall = recall_score(dev_df.label, pred2)
    precision = precision_score(dev_df.label, pred2)
    thresholdDF = thresholdDF.append({'threshold': i, 'f1Score': f1Score, 
                                      'accuracy':accuracy, 'recall':recall, 
                                      'precision':precision}, 
                                      ignore_index=True)

thresholdDF

Unnamed: 0,threshold,f1Score,accuracy,recall,precision
0,0.05,0.120011,0.085152,1.0,0.063836
1,0.1,0.129459,0.162258,0.998527,0.069217
2,0.15,0.145699,0.271497,0.995827,0.0786
3,0.2,0.166679,0.380284,0.993495,0.09097
4,0.25,0.195718,0.492952,0.988954,0.108606
5,0.3,0.217573,0.557601,0.986009,0.122278
6,0.35,0.235009,0.600798,0.982941,0.133459
7,0.4,0.250695,0.634638,0.97975,0.143737
8,0.45,0.265234,0.662384,0.976804,0.15345
9,0.5,0.278595,0.685253,0.974227,0.162537


This isn't very good, and it also overfits to dev! We should move on to a different method. 

### Simple Neural Network

In [102]:
model = keras.Sequential()
model.add(tf.keras.layers.InputLayer(5))
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dense(1, activation='softmax'))


(None, 5)


(None, 1)