## Load Libraries

In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import random
%matplotlib inline
import random
random.seed(30)
from sklearn.utils import compute_sample_weight
import pickle
from sklearn.metrics import f1_score, accuracy_score, roc_auc_score
from sklearn.metrics import confusion_matrix
import seaborn as sns

In [None]:
def display_side_by_side(*args):
    html_str=''
    for df in args:
        html_str+=df
    display_html(html_str.replace('table','table style="display:inline"'),raw=True)    

def save_object(filename, object):
    with open(filename, 'wb') as filehandler:
        pickle.dump(object, filehandler)
        print('Saved to ' ,filename)
def read_object(filename):
    with open(filename, 'rb') as filehandler:
        pickle.load(filehandler)
        

## Data Preprocess

In [None]:
def load_data(filename):
    from sklearn.utils import Bunch
    def decodeText(text):
        if type(text) != str:
            try:
                text = text.decode('utf-8')
            except:
                text = text.decode('latin-1')
        text = text.rstrip()
        return text.split('@')
    
    contexts, labels = [], []
    with open(filename, 'rb') as f:
        text = f.readlines()
    
    for t in tqdm(text):
        context, label = decodeText(t)
        contexts += [context]
        if label == 'positive':
            target = 1
        elif label == 'negative':
            target = -1
        elif label == 'neutral':
            target = 0
        else:
            raise('ERROR: ' + label)
        labels += [target]
        
#    return Bunch(contexts = contexts, labels = np.array(labels))
    return pd.DataFrame(zip(contexts, labels), columns = ['contexts','labels'])
def preprocessing(data):
    """
    input:
        - data is a dataframe with columns contexts as series of text
    output:
        - preprocessed dataframe
    """
    import string
    import re
    from nltk import word_tokenize
    from nltk.corpus import stopwords
    from nltk.stem.wordnet import WordNetLemmatizer
    
    # lowercase 
    data.contexts = data.contexts.apply(lambda x: ' '.join(word.lower() for word in x.split()))
    
    data.contexts = data.contexts.apply(lambda x: re.sub(r"don't", "do not", x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"aren't", "are not", x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"couldn't", "could not", x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"didn't", "did not", x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"doesn't", "does not", x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"hadn't", "had not", x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"hasn't", "has not", x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"haven't", "have not", x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"isn't", "is not", x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"mightn't", "might not", x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"mustn't", "must not", x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"needn't", "need not", x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"shan't", "shall not", x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"wasn't", "was not", x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"weren't", "were not", x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"won't", "will not", x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"wouldn't", "would not", x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"shouldnt", "should not", x))
    
    # remove numbers
    data.contexts = data.contexts.apply(lambda x: re.sub(r"\d+", '', x))
    # remove punctuations
    # data.contexts = data.contexts.apply(lambda x: x.translate(str.maketrans('', '', string.punctuation)))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"what's", "what is ", x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"\'s", " ", x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"\'ve", " have ", x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"i'm", "i am ",  x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"\'re", " are ",  x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"\'d", " would ", x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"!", " ! ", x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"\/", " ", x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"\^", " ^ ", x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"\+", " + ", x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"\-", " - ", x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"(\d+)(k)", r"\g<1>000", x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r":", " : ", x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r" e g ", " eg ", x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r" b g ", " bg ",x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r" u s ", " american ", x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"\0s", "0",  x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r" 9 11 ", "911",  x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"e - mail", "email", x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"j k", "jk", x))
    data.contexts = data.contexts.apply(lambda x: re.sub(r"\s{2,}", " ", x))

    # remove whitespace
    data.contexts = data.contexts.apply(lambda x: x.strip())
    # tokenization
    data.contexts = data.contexts.apply(word_tokenize)
    # remove stop words
    #stop_words = stopwords.words('english')
    #for word in ['above', 'below','up','down', 'more', 'not']:
    #    stop_words.remove(word)
    #data.contexts = data.contexts.apply(lambda x: [word for word in x if word not in stop_words])
    # stemming
    lemmatizer = WordNetLemmatizer()
    data.contexts = data.contexts.apply(lambda x: [lemmatizer.lemmatize(word) for word in x])

In [None]:
data3 = load_data('Sentences_AllAgree.txt')
preprocessing(data3)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data3.contexts, data3.labels,test_size = 0.2, shuffle = True, random_state = 1)
X_dev, X_test, y_dev, y_test = train_test_split(X_test, y_test,test_size = 0.5, shuffle = True, random_state = 1)

In [None]:
from keras.utils import to_categorical
y_cat_train = to_categorical(y_train, num_classes=3)
y_cat_dev = to_categorical(y_dev, num_classes=3)
y_cat_test = to_categorical(y_test, num_classes=3)

In [None]:
print('max sentence length')
[np.max(data3.contexts.apply(lambda x: len(x)))]

## EAD

In [None]:
def EDA(data):
    from wordcloud import WordCloud, STOPWORDS
    wc = WordCloud().generate(' '.join(word for text in data.contexts for word in text))
    fig, [ax0, ax1] = plt.subplots(nrows = 1, ncols = 2, figsize = [10,4])
    ax0.imshow(wc)
    ax0.axis('off')
    
    pd.Series(data.labels).value_counts().plot(kind = 'bar', axes = ax1)
    plt.tight_layout()

In [None]:
EDA(data3)

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_selection import chi2

tfidf = TfidfVectorizer(sublinear_tf=True, 
                        min_df=5, 
                        norm='l2', 
                        encoding='latin-1', 
                        ngram_range=(1, 2), 
                        stop_words='english')
features = tfidf.fit_transform(data3.contexts.apply(lambda x: ' '.join(x))).toarray()
labels = data3.labels

In [None]:
N = 10
for category_id in sorted(labels.unique()):
    features_chi2 = chi2(features, labels == category_id)
    indices = np.argsort(features_chi2[0])
    feature_names = np.array(tfidf.get_feature_names())[indices]
    unigrams = [v for v in feature_names if len(v.split(' ')) == 1]
    bigrams = [v for v in feature_names if len(v.split(' ')) == 2]
    print("------------- Category {} ------------".format(category_id))
    print("Most correlated unigrams:")
    print(unigrams[-N:])
    print("Most correlated bigrams:")
    print(bigrams[-N:])
    print('\n')

## Sentiment Model

In [None]:
from IPython.display import display_html
def model_eval(model, X_train, X_dev, X_test, y_train, y_dev, y_test, name = 'Model'):
    print('--------------------------------------  {} --------------------------------------'.format(name))
    F1 = [model.score(X_train, y_train), model.score(X_dev, y_dev), model.score(X_test, y_test)]
    acc = [model.score(X_train, y_train, method = 'accuracy'), model.score(X_dev, y_dev, method = 'accuracy'), model.score(X_test, y_test, method = 'accuracy')]
    print("Training set F1score: ",F1[0]) 
    print("Dev set F1score: ", F1[1])
    print("Test set F1score: ", F1[2])
    
    def gen_confusion_matrix(pred_label, y_labels):
        conf_mat = confusion_matrix(y_labels, pred_label)
        cm = sns.light_palette("green", as_cmap=True)
        df = pd.DataFrame(conf_mat, index = ['actual_negative','actual_neutral','actual_positive'], columns = ['pred_negative','pred_neutral','pred_positive']).style.background_gradient(cmap=cm)
        count = np.sum(conf_mat, axis = 1)
        precision = np.diag(conf_mat) / np.sum(conf_mat, axis = 0)
        recall = np.diag(conf_mat) / np.sum(conf_mat, axis = 1)
        f1score = 2 * precision * recall / (precision + recall)
        #weighted_avg = [np.average(precision, weights = count), np.average(recall, weights = count), np.average(f1score, weights = count)]
        macro_avg = [np.average(precision), np.average(recall), np.average(f1score)]

        result = pd.DataFrame([precision, recall, f1score, count], index = ['precision','recall','f1score','count'], columns = ['negative','neutral','positive']).T
        macro_avg = pd.Series(macro_avg + [np.NaN], result.columns )
        #weighted_avg = pd.Series(weighted_avg + [np.NaN], result.columns )
        result.loc['macro_avg',:] = macro_avg
        #result.loc['weighted_avg',:] = weighted_avg
        result = result.round(2).style.highlight_max( axis = 0, color = 'green')
    
        return result, df

    result, df = gen_confusion_matrix(model.predict(X_train), y_train)
    print('------------  Train Confusion Matirx ------------')
    display_side_by_side(result.render(), df.render())    
    result, df = gen_confusion_matrix(model.predict(X_dev), y_dev)
    print('------------  Dev Confusion Matirx ------------')
    display_side_by_side(result.render(), df.render())
    result, df = gen_confusion_matrix(model.predict(X_test), y_test)
    print('------------  Test Confusion Matirx ------------')
    display_side_by_side(result.render(), df.render())
    return F1 + acc
def performance_compare_plot(perf):

    metrics = ['train_f1','val_f1','test_f1']
    fig, axes = plt.subplots(nrows = 1, ncols = 3, figsize = [5 * 3, 4], sharey = True)
    for idx, metric in enumerate(metrics):
        data = perf[['model_name','feature_method', metric]]
        sns.boxplot( x= 'model_name', y = metric, data = data, ax = axes[idx])
        chart = sns.stripplot(x = 'model_name', y = metric, data = data, size=8, jitter=True, edgecolor="gray", linewidth=2, ax = axes[idx])
        axes[idx].set_xticklabels(chart.get_xticklabels(), rotation=45, color = 'white')
        axes[idx].set_title(metric, color = 'white')
        axes[idx].tick_params(labelcolor = 'white')
    plt.tight_layout()
    plt.show()

In [None]:
from sklearn.metrics import f1_score

In [None]:
from scipy.sparse import dok_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.naive_bayes import MultinomialNB
from collections import Counter

class SentimentClassifier:
    def __init__(self, feature_method, model = LogisticRegression(C=1.0), min_feature_ct = 10):
        # min_feature_ct: int, ignore the features appear less than this number to avoid overfitting
        self.word2idx = {}
        self.idx2word = {}
        self.feature_method = feature_method
        self.min_feature_ct = min_feature_ct
        #self.L2_reg = L2_reg
        self.model = model
    def pipeline(self, X, training = False):
        """
        input:
            - X: featurized input
        output:
            2d sparse matrix
        """
        
        # Build feature_vocab during training
        if training:
            fea_ct = Counter([word for sent in X for word in self.feature_method(sent)])
            fea_ct = {key: val for key, val in fea_ct.items() if val >= self.min_feature_ct}

            self.word2idx = {key: idx + 1 for idx, key in enumerate(list(fea_ct.keys()))}
            #idx2word = {idx + 1: key for idx, key in enumerate(list(fea_ct.keys()))}
            self.word2idx['_unknown_'] = 0
            #idx2word[0] = '_unknown_'

        #text_encoded = [[self.word2idx.get(word, 0) for word in self.feature_method(sent)] for sent in X]
        #print(text_encoded)
        sp_matrix = dok_matrix((len(X), len(self.word2idx)))
        for nrow, sent in enumerate(X):
            feat = self.feature_method(sent)
            for key, val in feat.items():
                # print(nrow, key, val)
                sp_matrix[nrow, self.word2idx.get(key, 0 )] = val
        return sp_matrix    
    def fit(self, X, y):
        X= self.pipeline(X, training = True)
        weight_dict = compute_class_weight(y)
        self.model.fit(X,y, sample_weight = [weight_dict.get(label) for label in y])
        return self
    def predict(self, X):
        X = self.pipeline(X, training = False)
        return self.model.predict(X)
    def score(self, X, y, method = 'F1-score'):
        pred_y = self.predict(X)
        if method == 'F1-score':
            #X= self.pipeline(X, training = False)
            #return self.model.score(X, y) 
            #weight_dict = compute_class_weight(y)
            #return f1_score(y, pred_y, sample_weight = [weight_dict.get(label) for label in y])
            return f1_score(y, pred_y, average = 'macro')
        elif method == 'accuracy':
            X= self.pipeline(X, training = False)
            return self.model.score(X, y) 

In [None]:
def BOW_featurizer(text):
    from collections import Counter
    """
    input:
        - text: a list of tokens (str or int)
    output:
        dictionary of {token: count}
    """
    return Counter(text)    
from nltk.util import pad_sequence
from nltk.util import everygrams, ngrams
def everyGram(text, N = 5):
    return Counter(list(everygrams(list(pad_sequence(text,
            pad_left = True, left_pad_symbol = '<s>',
            pad_right = True, right_pad_symbol = '<s>',
            n= N)),
            max_len = N))) 
def NGram(text, N = 5):
    return Counter(list(ngrams(list(pad_sequence(text,
        pad_left = True, left_pad_symbol = '<s>',
        pad_right = True, right_pad_symbol = '<s>',
        n= N)),
        n = N)))

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
def compute_class_weight(y):
    """
    n_samples / (n_classes * n_samples_with_class)
    """
    ct = Counter(y)
    n_classes = len(ct.keys())
    n_samples = np.sum(np.array(list(ct.values())))
    weights = n_samples / (n_classes * np.array(list(ct.values())))
    weight = {}
    for i, label in enumerate(ct.keys()):
        weight[label] = weights[i]

    return weight


In [None]:
model = LogisticRegression(C=1.0)

model_list = {'randomForest':RandomForestClassifier(n_estimators=200, max_depth=3, random_state=0),
          'LinearSVC':LinearSVC(),
          'MultinomialNB':MultinomialNB(  ),
          'LogisticRegression':LogisticRegression(C=1.0, tol=1e-4, random_state=0, solver='lbfgs', multi_class='auto'),
             }
# , class_weight = 'balanced'

#### textfile4

In [None]:
feature_method_list = {'BOW': BOW_featurizer, 'everyGram':everyGram}
perf = []
for model_name, model in model_list.items():
    print('---------------------------------------------- {} ------------------------------------------------'.format(model_name))
    for feature_method_name, feature_method in feature_method_list.items():
        cls = SentimentClassifier(model = model, feature_method=feature_method, min_feature_ct = 10)
        cls = cls.fit(X_train, y_train)
        train_f1 , val_f1 , test_f1, train_acc , val_acc , test_acc  = model_eval(cls, X_train, X_dev, X_test, y_train, y_dev, y_test, name = feature_method_name)
        perf += [[model_name, feature_method_name,train_acc, train_f1 , val_acc, val_f1 , test_acc, test_f1 ]]
perf = pd.DataFrame(perf, columns = ['model_name','feature_method','train_acc','train_f1','val_acc','val_f1','test_acc','test_f1'])

In [None]:
performance_compare_plot(perf)

In [None]:
perf['model_name'] = perf['model_name'] + '_' + perf['feature_method']
perf.pop('feature_method')
perf.set_index('model_name').style.highlight_max( axis = 0, color = 'green')

In [None]:
perf.to_csv('perf.csv')

In [None]:
# tmp_perf = pd.read_csv('perf.csv', index_col = 0)
tmp_perf = tmp_perf.sort_values('test_f1', ascending = False)
#.style.hightlight_max(axis = 0, color = 'green')
tmp_perf.set_index('model_name').style.highlight_max( axis = 0, color = 'green')

In [None]:
tmp = tmp_perf[:4].reset_index()

In [None]:
tmp.reset_index()['model_name'].values

In [None]:
tmp.model_name = ['BI_LSTM_auc', 'LogReg_everyGram', 'CNN_acc',
       'LogReg_BOW']

In [None]:
performance_compare_plot(tmp)

## Embedding models

In [None]:
# Keras
#from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Flatten, LSTM, Conv1D, MaxPooling1D, Dropout, Activation, Bidirectional, GlobalMaxPooling1D
from keras.layers.embeddings import Embedding
from keras.metrics import AUC
from keras.callbacks import EarlyStopping
## Plotly
import plotly.offline as py
import plotly.graph_objs as go
py.init_notebook_mode(connected=True)
# Others
import nltk
import string
import numpy as np
import pandas as pd
from nltk.corpus import stopwords

from sklearn.manifold import TSNE
from collections import Counter

In [None]:
### Create sequence
max_vocab_size = 20000
max_sent_length = 109  # changed in version 2
embedding_dim = 100  
max_sent_length3 = 80   # newly added in version 2
hidden_dim = 256
n_epoches = 100
min_token_ct = 10

### Create sequence
max_vocab_size = 20000
max_sent_length = 109  # note this difference here from the previous version
max_sent_length3 = 80  # newly added
embedding_dim = 100
hidden_dim = 256
n_epoches = 100
min_token_ct = 10

In [None]:
class Tokenizer():
    def __init__(self, min_token_ct):
        print('Initialize tokenizer')
        self.word_index = {}
        self.min_token_ct = min_token_ct
        self.vocab = None
        
        self.UNK='unknown'
        self.PAD='_pad_'
    def fit_on_texts(self,X):
        """
        For given training data, list of vocabulary list, i.g.
        [["this", "set", "1"],
         ["this", "is", "another", "set"],
         ]

        return the vocab list and rev_vocab dictionary
        2 numerical encodings are reserved: {PAD:0,UNK:1}
        """

        
        token_ct = Counter([word for sent in X for word in sent])
        token_ct = {k: v for k, v in token_ct.items() if v >= self.min_token_ct}
        vocab = sorted(token_ct, key=token_ct.get, reverse=True)
        vocab = [self.PAD,self.UNK] + vocab

        word2idx = {word: idx for idx, word in enumerate(vocab)}
        idx2word = {idx: word for idx, word in enumerate(vocab)}
        self.vocab = vocab
        self.word_index = word2idx
    def texts_to_sequences(self,X):
        """
        For given training data, list of vocabulary list, i.g.
        [["this", "set", "1"],
         ["this", "is", "another", "set"],
        ]
        """
        assert(len(self.word_index) > 0 )
        UNK_idx = self.word_index.get(self.UNK)
        return [[self.word_index.get(word, UNK_idx) for word in sent] for sent in X ]     

In [None]:
tokenizer3 = Tokenizer(min_token_ct)
tokenizer3.fit_on_texts(X_train)
vocabulary_size3 = len(tokenizer3.vocab) 

print([ vocabulary_size3])

In [None]:
X_cod_train = pad_sequences(tokenizer3.texts_to_sequences(X_train), maxlen=max_sent_length3, padding = 'post' )
X_cod_dev = pad_sequences(tokenizer3.texts_to_sequences(X_dev), maxlen=max_sent_length3, padding = 'post' )
X_cod_test = pad_sequences(tokenizer3.texts_to_sequences(X_test), maxlen=max_sent_length3, padding = 'post' )

In [None]:
from IPython.display import display_html
def bias_var_tradeoff_plot(history):
    metrics = list(history.history.keys())
    length = len(metrics)
    metrics = metrics[- int(length / 2):]
    length = len(metrics)
    
    fig, axes = plt.subplots(nrows = 1, ncols = length, figsize = [5 * length,4])
    for i, ax in enumerate(axes):
        ax.set_title(metrics[i], color = 'white')
        ax.plot(history.history[metrics[i]], label='train')
        ax.plot(history.history['val_' +metrics[i]], label='validation')

    plt.legend()
    plt.show()
def model_eval(model, history, X_train, y_train_labels, X_dev, y_dev_labels, X_test, y_test, y_test_labels, model_name = None):
    test_accr = model.evaluate(X_test, y_test)
    test_acc = test_accr[1]
    print('\n')
    print('----------------------- {}: Test_Loss: {:0.3f} Test_Accuracy: {:0.3f} -----------------------'.format(model_name, test_accr[0],test_accr[1]))
    
    #accuracy = [history.history['accuracy'][-1], history.history['val_accuracy'][-1], test_accr[1]]
    #loss = [history.history['loss'][-1], history.history['val_loss'][-1], test_accr[0]]
    

    def gen_confusion_matrix(y_pred, y_labels):
        from sklearn.metrics import confusion_matrix
        import seaborn as sns
        pred_label = []
        for pred in y_pred:
            if np.argmax(pred) == 0:
                pred_label += [0]
            elif np.argmax(pred) == 1:
                pred_label += [1]
            elif np.argmax(pred) == 2:
                pred_label += [-1]


        conf_mat = confusion_matrix(y_labels, pred_label)
        cm = sns.light_palette("green", as_cmap=True)
        df = pd.DataFrame(conf_mat, index = ['actual_negative','actual_neutral','actual_positive'], columns = ['pred_negative','pred_neutral','pred_positive']).style.background_gradient(cmap=cm)
        print('Confusion Matirx')


        count = np.sum(conf_mat, axis = 1)
        precision = np.diag(conf_mat) / np.sum(conf_mat, axis = 0)
        recall = np.diag(conf_mat) / np.sum(conf_mat, axis = 1)
        f1score = 2 * precision * recall / (precision + recall)
        weighted_avg = [np.average(precision, weights = count), np.average(recall, weights = count), np.average(f1score, weights = count)]
        macro_avg = [np.average(precision), np.average(recall), np.average(f1score)]

        result = pd.DataFrame([precision, recall, f1score, count], index = ['precision','recall','f1score','count'], columns = ['negative','neutral','positive']).T
        macro_avg = pd.Series(macro_avg + [np.NaN], result.columns )
        #weighted_avg = pd.Series(weighted_avg + [np.NaN], result.columns )
        result.loc['macro_avg',:] = macro_avg
        #result.loc['weighted_avg',:] = weighted_avg
        result = result.round(2).style.highlight_max( axis = 0, color = 'green')
        
        return result, df, macro_avg[2]
    print('------------  Train Confusion Matirx ------------')
    result, df, train_f1 = gen_confusion_matrix(model.predict(X_train), y_train_labels)
    display_side_by_side(result.render(), df.render())
    print('------------  Dev Confusion Matirx ------------')
    result, df, dev_f1 = gen_confusion_matrix(model.predict(X_dev), y_dev_labels)
    display_side_by_side(result.render(), df.render())
    print('------------  Test Confusion Matirx ------------')
    result, df, test_f1 = gen_confusion_matrix(model.predict(X_test), y_test_labels)
    display_side_by_side(result.render(), df.render())
    
    train_acc = history.history['accuracy'][-1]
    val_acc = history.history['val_accuracy'][-1]
    
    return train_acc, train_f1 , val_acc, dev_f1 , test_acc, test_f1
def performance_compare_plot(perf):
    import seaborn as sns
    metrics = ['train_acc','val_acc','test_acc']
    fig, axes = plt.subplots(nrows = 1, ncols = 3, figsize = [5 * 3, 4], sharey = True)
    for idx, metric in enumerate(metrics):
        data = perf[['model_name',metric]]
        sns.boxplot( x= 'model_name', y = metric, data = data, ax = axes[idx])
        chart = sns.stripplot(x = 'model_name', y = metric, data = data, size=8, jitter=True, edgecolor="gray", linewidth=2, ax = axes[idx])
        axes[idx].set_xticklabels(chart.get_xticklabels(), rotation=60, color = 'white')
        axes[idx].set_title(metric, color = 'white')
        axes[idx].tick_params(labelcolor = 'white')
    plt.tight_layout()
    plt.show()

    metrics = ['train_f1','val_f1','test_f1']
    fig, axes = plt.subplots(nrows = 1, ncols = 3, figsize = [5 * 3, 4], sharey = True)
    for idx, metric in enumerate(metrics):
        data = perf[['model_name', metric]]
        sns.boxplot( x= 'model_name', y = metric, data = data, ax = axes[idx])
        chart = sns.stripplot(x = 'model_name', y = metric, data = data, size=8, jitter=True, edgecolor="gray", linewidth=2, ax = axes[idx])
        axes[idx].set_xticklabels(chart.get_xticklabels(), rotation=60, color = 'white')
        axes[idx].set_title(metric, color = 'white')
        axes[idx].tick_params(labelcolor = 'white')
    plt.tight_layout()
    plt.show()

### GLOVE

In [None]:
glove = pd.read_csv("glove_6B_100d_top100k.csv"); glove.head()

In [None]:
unseen_word_list = []
embedding_matrix3 = np.zeros((vocabulary_size3, 100))
for word, index in tokenizer3.word_index.items():
    embedding_vector = glove.get(word)
    if embedding_vector is not None:
        embedding_matrix3[index] = embedding_vector
    else:
        unseen_word_list += [word]
print('{}/{}'.format(len(unseen_word_list), len(tokenizer3.word_index)))
print(unseen_word_list)

In [None]:
import keras
METRICS = [
      keras.metrics.Precision(name='precision'),
      keras.metrics.Recall(name='recall'),
      keras.metrics.AUC(curve="PR", name='auc'),
    'accuracy'
]

In [None]:
## Network architecture
def lstm(vocabulary_size, embedding_dim,hidden_dim, max_sent_length, embedding_matrix ):
    model = Sequential()
    model.add(Embedding(vocabulary_size, embedding_dim, input_length=max_sent_length,weights = [embedding_matrix], trainable=True))
    model.add(Dropout(0.2))
    model.add(Conv1D(64,5,activation = 'relu'))
    model.add(MaxPooling1D(pool_size = 4))
    model.add(LSTM(hidden_dim))
    model.add(Dense(3, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics= METRICS)

    model.summary()
    return model
## Network architecture
def CNN(vocabulary_size, embedding_dim, hidden_dim, max_sent_length, embedding_matrix ):
    model = Sequential()
    model.add(Embedding(vocabulary_size, embedding_dim, input_length=max_sent_length,weights = [embedding_matrix], trainable=True))
    model.add(Dropout(0.2))
    model.add(Conv1D(100,5,activation = 'tanh'))
    model.add(GlobalMaxPooling1D())
    model.add(Dense(3, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics= METRICS)

   #model.add(Conv1D(filters=100, kernel_size=3, activation="tanh", name="Conv1D-1"))


    model.summary()
    return model
## Network architecture
def bi_lstm(vocabulary_size, embedding_dim, hidden_dim, max_sent_length, embedding_matrix ):
    model = Sequential()
    model.add(Embedding(vocabulary_size, embedding_dim, input_length=max_sent_length,weights = [embedding_matrix], trainable=True))
    model.add(Dropout(0.2))
    model.add(Bidirectional(LSTM(int(hidden_dim / 2))))
    model.add(Dense(3, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics= METRICS)

    model.summary()
    return model

model_list = {'LSTM': lstm, 'CNN': CNN, 'BI_LSTM':bi_lstm}

#### Stop by Accuracy

In [None]:
es = EarlyStopping(monitor='val_accuracy', mode='max', verbose=1, patience=5)

In [None]:
model_list = {'CNN': CNN, 'BI_LSTM':bi_lstm, 'LSTM': lstm, }

In [None]:
model_list = {'CNN': CNN }
my_models = []
histories = []
for model_name, model in model_list.items():
    keras.backend.clear_session()
    my_model = model(vocabulary_size3, embedding_dim, hidden_dim, max_sent_length3, embedding_matrix3 )
    #weight_dict = compute_class_weight(y_train3)  # there is a typo in version 1: should be y_train3
    history = my_model.fit(X_cod_train3, y_cat_train3, validation_data = (X_cod_dev3, y_cat_dev3 ), epochs = n_epoches, callbacks=[es],
                            sample_weight = compute_sample_weight('balanced',y_cat_train3),verbose = 1)
    my_models += [my_model]
    histories += [history]
    bias_var_tradeoff_plot(history)

In [None]:
perf = []
for my_model, history, model_name in zip(my_models, histories, model_list.keys()):
    train_acc, train_f1 , val_acc, val_f1 , test_acc, test_f1 = model_eval(my_model, history, X_cod_train3, y_train3, X_cod_dev3, y_dev3, X_cod_test3, y_cat_test3, y_test3, model_name = model_name)
    perf += [[model_name, train_acc, train_f1 , val_acc, val_f1 , test_acc, test_f1 ]]
perf = pd.DataFrame(perf, columns = ['model_name','train_acc','train_f1','val_acc','val_f1','test_acc','test_f1'])

In [None]:
model_list = {'BI_LSTM':bi_lstm,}
my_models = []
histories = []
for model_name, model in model_list.items():
    keras.backend.clear_session()
    my_model = model(vocabulary_size3, embedding_dim, hidden_dim, max_sent_length3, embedding_matrix3 )
    #weight_dict = compute_class_weight(y_train3)  # there is a typo in version 1: should be y_train3
    history = my_model.fit(X_cod_train3, y_cat_train3, validation_data = (X_cod_dev3, y_cat_dev3 ), epochs = n_epoches, callbacks=[es],
                            sample_weight = compute_sample_weight('balanced',y_cat_train3),verbose = 1)
    my_models += [my_model]
    histories += [history]
    bias_var_tradeoff_plot(history)

In [None]:
perf = []
for my_model, history, model_name in zip(my_models, histories, model_list.keys()):
    train_acc, train_f1 , val_acc, val_f1 , test_acc, test_f1 = model_eval(my_model, history, X_cod_train3, y_train3, X_cod_dev3, y_dev3, X_cod_test3, y_cat_test3, y_test3, model_name = model_name)
    perf += [[model_name, train_acc, train_f1 , val_acc, val_f1 , test_acc, test_f1 ]]
perf = pd.DataFrame(perf, columns = ['model_name','train_acc','train_f1','val_acc','val_f1','test_acc','test_f1'])

In [None]:
tmp_perf = pd.read_csv('perf.csv', index_col = 0)
tmp_perf

In [None]:
pd.concat([tmp_perf, perf]).to_csv('perf.csv')

In [None]:
performance_compare_plot(tmp_perf)

#### Stop by AUC

In [None]:
es = EarlyStopping(monitor='val_auc', mode='max', verbose=1, patience=5)

In [None]:
model_list = {'CNN_auc': CNN }
my_models = []
histories = []
for model_name, model in model_list.items():
    #keras.backend.clear_session()
    my_model = model(vocabulary_size3, embedding_dim, hidden_dim, max_sent_length3, embedding_matrix3 )
    #weight_dict = compute_class_weight(y_train3)  # there is a typo in version 1: should be y_train3
    history = my_model.fit(X_cod_train, y_cat_train, validation_data = (X_cod_dev, y_cat_dev ), epochs = n_epoches, callbacks=[es],
                            sample_weight = compute_sample_weight('balanced',y_cat_train),verbose = 1)
    my_models += [my_model]
    histories += [history]
    bias_var_tradeoff_plot(history)
    

In [None]:
perf = []
for my_model, history, model_name in zip(my_models, histories, model_list.keys()):
    train_acc, train_f1 , val_acc, val_f1 , test_acc, test_f1 = model_eval(my_model, history, X_cod_train, y_train, X_cod_dev, y_dev, X_cod_test, y_cat_test, y_test, model_name = model_name)
    perf += [[model_name, train_acc, train_f1 , val_acc, val_f1 , test_acc, test_f1 ]]
perf = pd.DataFrame(perf, columns = ['model_name','train_acc','train_f1','val_acc','val_f1','test_acc','test_f1'])

In [None]:
tmp_perf = pd.read_csv('perf.csv', index_col = 0)
tmp_perf

In [None]:
pd.concat([tmp_perf, perf]).to_csv('perf.csv')

In [None]:
performance_compare_plot(tmp_perf)

In [None]:
model_list = {'BI_LSTM_auc': bi_lstm }
my_models = []
histories = []
for model_name, model in model_list.items():
    #keras.backend.clear_session()
    my_model = model(vocabulary_size3, embedding_dim, hidden_dim, max_sent_length3, embedding_matrix3 )
    #weight_dict = compute_class_weight(y_train3)  # there is a typo in version 1: should be y_train3
    history = my_model.fit(X_cod_train, y_cat_train, validation_data = (X_cod_dev, y_cat_dev ), epochs = n_epoches, callbacks=[es],
                            sample_weight = compute_sample_weight('balanced',y_cat_train),verbose = 1)
    my_models += [my_model]
    histories += [history]
    bias_var_tradeoff_plot(history)
    

In [None]:
perf = []
for my_model, history, model_name in zip(my_models, histories, model_list.keys()):
    train_acc, train_f1 , val_acc, val_f1 , test_acc, test_f1 = model_eval(my_model, history, X_cod_train, y_train, X_cod_dev, y_dev, X_cod_test, y_cat_test, y_test, model_name = model_name)
    perf += [[model_name, train_acc, train_f1 , val_acc, val_f1 , test_acc, test_f1 ]]
perf = pd.DataFrame(perf, columns = ['model_name','train_acc','train_f1','val_acc','val_f1','test_acc','test_f1'])

In [None]:
tmp_perf = pd.read_csv('perf.csv', index_col = 0)
tmp_perf

In [None]:
perf

In [None]:
pd.concat([tmp_perf, perf]).to_csv('perf.csv')

In [None]:
performance_compare_plot(tmp_perf[:6])

In [None]:
tmp_perf = tmp_perf.sort_values('test_f1', ascending = False)
#.style.hightlight_max(axis = 0, color = 'green')
tmp_perf.set_index('model_name').style.highlight_max( axis = 0, color = 'green')