In [1]:
import os
import re
import pandas as pd
import numpy as np

# Libraries for text preprocessing
import re
import nltk
#nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from nltk.tokenize import RegexpTokenizer
#nltk.download('wordnet') 
from nltk.stem.wordnet import WordNetLemmatizer

In [2]:
pd.options.display.max_rows = 4000

In [3]:
##Creating a list of stop words and adding custom stopwords
stop_words = set(stopwords.words("english"))
print(stop_words)

{"didn't", 've', 'until', 'for', 'are', 'what', 'not', 'mightn', "she's", 'had', 'between', 'm', 'on', "doesn't", 'shouldn', 'weren', 'they', 'be', 'was', 'so', 'most', "weren't", 'i', 'themselves', 'can', 'do', 'we', 'before', 'under', 'over', 'itself', 'wouldn', 'ma', 're', 'd', 'while', 'him', 'isn', 'did', 'now', 'to', 'hers', 'their', "haven't", "aren't", "you'll", "isn't", "mustn't", 'at', 'from', 'y', 'ours', 'or', 'won', 'because', "hadn't", 'again', 'me', 'by', 'below', 'further', "wasn't", 'against', "don't", 'it', 'yours', 'no', 'about', 'them', 'its', 'himself', 'if', 'why', 'which', 'does', 'hadn', "should've", 'once', 'here', 'don', 'both', 'out', 'just', "shouldn't", 'she', "won't", "shan't", 'of', "needn't", 'then', 'he', 'nor', "you'd", 'off', 'the', 'some', 'shan', 'doing', 'ourselves', 'hasn', 'own', 'her', 'through', 'an', 'wasn', 'my', 'you', 'but', 'your', 'any', 'in', "hasn't", "it's", 'herself', 'same', 'his', 'above', 'those', 'more', 'ain', 'couldn', 'with', '

In [4]:
def preprocess(text):
    #Preprocess                
    text = text.replace('\n',' ')
    #Remove punctuations
    text = re.sub('[^a-zA-Z]', ' ', text)

    #Convert to lowercase
    text = text.lower()

    #remove tags
    text=re.sub("&lt;/?.*?&gt;"," &lt;&gt; ",text)

    # remove special characters and digits
    text=re.sub("(\\d|\\W)+"," ",text)

    ##Convert to list from string
    text = text.split()

    ##Stemming
    #ps=PorterStemmer()

    #Lemmatisation
    lem = WordNetLemmatizer()
    text = [lem.lemmatize(word) for word in text if not word in  
            stop_words] 
    text = " ".join(text)
    return text

path = '/home/user/Shyam/Code/Release_6.0/Dev/Snorkel/data/filtered/'
docs = []
filenames = []
labels = []

for root, dirs, files in os.walk(path):
    for file in files:
        with open (os.path.join(root+'/'+file), encoding='utf8') as f:            

            text = f.read()
            text = preprocess(text)                            
            docs.append(text)
            filenames.append(file)
            
            if 'msa' in root:
                labels.append('MSA')
            
            if 'sow' in root:
                labels.append('SOW')
            
            if 'addendum' in root:
                labels.append('Addendum')
                
            if 'nda' in root:
                labels.append('NDA')
                
            if 'other' in root:
                labels.append('Others')

print(len(docs))

df_labeled = pd.DataFrame(list(zip(filenames, docs, labels)), columns=['filename','text', 'label'])
df_labeled.head()

In [6]:
df_labeled = pd.read_csv('labeled_data.csv')
df_labeled.head()
print(df_labeled.shape)

(1400, 3)


In [7]:
df_labeled.label.value_counts()

MSA         467
Addendum    278
SOW         264
Others      257
NDA         134
Name: label, dtype: int64

In [9]:
#Split labelled data into test and dev sets
import numpy as np
msk = np.random.rand(len(df_labeled)) < 0.8

df_test = df_labeled[msk]
df_dev = df_labeled[~msk]

In [10]:
print(df_test.shape, df_dev.shape)

(1117, 3) (283, 3)


In [11]:
df_dev.label.value_counts()

MSA         89
Others      62
Addendum    55
SOW         46
NDA         31
Name: label, dtype: int64

df_test = pd.DataFrame()
for label in df_labeled.label.unique():
    df_test = df_test.append(df_labeled[df_labeled.label == label].sample(50))
df_test.shape

df_labeled = df_labeled[~df_labeled.isin(df_test)].dropna()
df_labeled.shape

df_dev = pd.DataFrame()
for label in df_labeled.label.unique():
    if label != 'NDA':
        df_dev = df_dev.append(df_labeled[df_labeled.label == label].sample(100))
    else:
        df_dev = df_dev.append(df_labeled[df_labeled.label == label].sample(50))
df_dev.shape

df_valid = df_labeled[~df_labeled.isin(df_dev)].dropna()
df_valid.shape

path = '/home/user/Shyam/DATASET/classified_corpus_text/'

docs = []
filenames = []

for root, dirs, files in os.walk(path):
    for file in files:
        if file.endswith('.txt') and file not in df_labeled.filename.values:
            with open (os.path.join(root+'/'+file), encoding='utf8') as f:            
                text = f.read()
                text = preprocess(text)                            
                docs.append(text)
                filenames.append(file)

print(len(docs))

df_train = pd.DataFrame(list(zip(filenames, docs)), columns=['filename','text'])
df_train.head()

In [13]:
df_unlabled = pd.read_csv('unlabeled_data.csv')
df_unlabled.shape

(15720, 2)

In [14]:
#Split labelled data into train and valid sets
import numpy as np
msk = np.random.rand(len(df_unlabled)) < 0.8

df_train = df_unlabled[msk]
df_valid = df_unlabled[~msk]

In [15]:
print(df_train.shape, df_valid.shape)

(12508, 2) (3212, 2)


In [16]:
y_test = df_test.label.map({'Addendum': 0, 'MSA': 1, 'SOW': 4, 'NDA': 2, 'Others': 3})
y_test = np.array(y_test)
np.bincount(y_test)

array([223, 378, 103, 195, 218])

In [17]:
y_dev = df_dev.label.map({'Addendum': 0, 'MSA': 1, 'SOW': 4, 'NDA': 2, 'Others': 3})
y_dev = np.array(y_dev)
np.bincount(y_dev)

array([55, 89, 31, 62, 46])

In [19]:
df_test.drop('label', axis=1, inplace=True)
df_dev.drop('label', axis=1, inplace=True)
#df_valid.drop('label', axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [20]:
from snorkel.labeling import LabelingFunction
from snorkel.labeling import labeling_function
from snorkel.labeling import PandasLFApplier

ABSTAIN = -1
MSA = 1
SOW = 4
ADDENDUM = 0
NDA = 2
OTHERS = 3

labl_functions = []

def keyword_lookup(x, keywords, label):
    if any(word in x.text.lower() for word in keywords):
        return label
    return ABSTAIN

#===============MSA===================11
msa_keywords = ['indemnified party', 'indemnifying party', 'force majeure', 'intellectual industrial', 
                'wk service provider', 'intellectual industrial property', 'industrial property right', 
                'privacy restricted data', 'prior written notice', 'force majeure event', 'subject matter hereof']

def make_keyword_lf_msa(keywords, label=MSA):
    return LabelingFunction(
        name=f"keyword_{keywords[0]}",
        f=keyword_lookup,
        resources=dict(keywords=keywords, label=label))

for key in msa_keywords:
    labl_functions.append(make_keyword_lf_msa([key]))

@labeling_function()
def regex_agreement(x):
    return MSA if re.search(r"agreement (.+?) between (.+?) and (.+?)", x.text) else ABSTAIN
    
#labl_functions.append(regex_agreement)




#===============SOW===================12
sow_keywords = ['sow effective date', 'work sow', 'sow shall', 'sow term', 'service sow', 'defined sow', 
                'specified sow', 'outlined sow', 'addendum sow', 'client sow', 'sow agreement', 
                'statement work effective', 'sow end date', 'sow duration']

def make_keyword_lf_sow(keywords, label=SOW):
    return LabelingFunction(
        name=f"keyword_{keywords[0]}",
        f=keyword_lookup,
        resources=dict(keywords=keywords, label=label),
    )

for key in sow_keywords:
    labl_functions.append(make_keyword_lf_sow([key]))


    
    
#===============ADDENDUM===================25
addendum_keywords = ['rom work', 'addendum number', 'addendum part', 'amendment part',
                     'term addendum', 'term amendment', 'addendum made entered',
                     'addendum entered', 'duration addendum', 
                     'purpose addendum', 
                     'addendum executed', 'subsequent addendum', 'amendment number', 
                     'amendment date', 'amendment entered', 'amendment made', 'amendment executed', 
                     'amendment effective date', 
                     'addendum may executed', 'effective date addendum', 
                     'amendment made entered', 
                     'agreement hereby amended', 'service agreement amendment']

#'addendum effective date', 'cost plus maintenance', 'cost plus component', 'eligible cost plus', 'amendment become effective',
#'addendum become effective', 'purpose amendment', 'duration amendment', 'production health', 'effective date amendment', 

def make_keyword_lf_addendum(keywords, label=ADDENDUM):
    return LabelingFunction(
        name=f"keyword_{keywords[0]}",
        f=keyword_lookup,
        resources=dict(keywords=keywords, label=label),
    )

for key in addendum_keywords:
    labl_functions.append(make_keyword_lf_addendum([key]))

@labeling_function()
def regex_addendum(x):
    return ADDENDUM if re.search(r"(?:addendum|amendment) (?:is the (first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|[0-9](st|nd|rd|th))|no.\s?[0-9]|number (one|two|three|four|five|six|seven|eight|nine|ten|[0-9])|#\s?[0-9])", x.text) else ABSTAIN
    
#labl_functions.append(regex_addendum)



#===============NDA===================8
nda_keywords = ['mutual confidentiality', 'affiliated entity', 'agreement negotiation', 'disclosure hereunder', 
                'mutual confidentiality agreement', 'non confidential basis', 'confidential information agent', 
                'confidentiality non disclosure', 'party certain confidential information',
                'party desire disclose party', 'party wish protect','party furnish']
#'consider mutual agreement'


def make_keyword_lf_nda(keywords, label=NDA):
    return LabelingFunction(
        name=f"keyword_{keywords[0]}",
        f=keyword_lookup,
        resources=dict(keywords=keywords, label=label),
    )

for key in nda_keywords:
    labl_functions.append(make_keyword_lf_nda([key]))
    
    

#===============OTHERS===================
other_keywords = ['sir madam letter', 'letter inform', 'engagement letter', 'service order form',
                  'change request form', 'signature form', 
                  'agreement service order', 'service component order', 'term service order', 'component order']
#'software order form', 'sale order form', 'order form order', 

def make_keyword_lf_others(keywords, label=OTHERS):
    return LabelingFunction(
        name=f"keyword_{keywords[0]}",
        f=keyword_lookup,
        resources=dict(keywords=keywords, label=label),
    )

for key in other_keywords:
    labl_functions.append(make_keyword_lf_others([key]))

    
@labeling_function()
def others_lookup(x):
    if all(word not in x.text for word in list(set(msa_keywords + sow_keywords + nda_keywords + addendum_keywords))):
        return OTHERS
    return ABSTAIN

#labl_functions.append(others_lookup)

print(labl_functions)

[LabelingFunction keyword_indemnified party, Preprocessors: [], LabelingFunction keyword_indemnifying party, Preprocessors: [], LabelingFunction keyword_force majeure, Preprocessors: [], LabelingFunction keyword_intellectual industrial, Preprocessors: [], LabelingFunction keyword_wk service provider, Preprocessors: [], LabelingFunction keyword_intellectual industrial property, Preprocessors: [], LabelingFunction keyword_industrial property right, Preprocessors: [], LabelingFunction keyword_privacy restricted data, Preprocessors: [], LabelingFunction keyword_prior written notice, Preprocessors: [], LabelingFunction keyword_force majeure event, Preprocessors: [], LabelingFunction keyword_subject matter hereof, Preprocessors: [], LabelingFunction keyword_sow effective date, Preprocessors: [], LabelingFunction keyword_work sow, Preprocessors: [], LabelingFunction keyword_sow shall, Preprocessors: [], LabelingFunction keyword_sow term, Preprocessors: [], LabelingFunction keyword_service sow

count = 0
keys = ['agree obligate','consider mutual covenant','mutual benefit']

df_nda = df_dev[df_dev.label == 'MSA']
for text in df_nda.text.values:    
        for key in keys:
            if key in text:
                count += 1
            
print(count)

In [21]:
#Apply the label functions to the train and valid sets
applier = PandasLFApplier(lfs=labl_functions)
L_train = applier.apply(df=df_train)
L_dev = applier.apply(df=df_dev)
L_valid = applier.apply(df=df_valid)

  from pandas import Panel
100%|██████████| 12508/12508 [00:18<00:00, 692.02it/s]
100%|██████████| 283/283 [00:00<00:00, 590.90it/s]
100%|██████████| 3212/3212 [00:04<00:00, 690.20it/s]


In [22]:
#Check the performance of label functions

from snorkel.labeling import LFAnalysis
LFAnalysis(L=L_train, lfs=labl_functions).lf_summary().sort_values(by='Coverage')

Unnamed: 0,j,Polarity,Coverage,Overlaps,Conflicts
keyword_rom work,25,[0],0.00016,0.00016,0.00016
keyword_service component order,67,[3],0.000959,0.000959,0.000799
keyword_component order,69,[3],0.001199,0.001119,0.000959
keyword_wk service provider,4,[1],0.001599,0.001279,0.00024
keyword_signature form,65,[3],0.002638,0.001039,0.001039
keyword_sir madam letter,60,[3],0.003358,0.003358,0.0
keyword_sow end date,23,[4],0.003678,0.003678,0.001839
keyword_agreement hereby amended,46,[0],0.004797,0.003358,0.001839
keyword_amendment made entered,45,[0],0.005676,0.005676,0.002398
keyword_service agreement amendment,47,[0],0.005756,0.004957,0.001839


In [23]:
LFAnalysis(L=L_dev, lfs=labl_functions).lf_summary(y_dev).sort_values(by='Emp. Acc.')

Unnamed: 0,j,Polarity,Coverage,Overlaps,Conflicts,Correct,Incorrect,Emp. Acc.
keyword_sow end date,23,[],0.0,0.0,0.0,0,0,0.0
keyword_service order form,63,[3],0.003534,0.003534,0.003534,0,1,0.0
keyword_engagement letter,62,[],0.0,0.0,0.0,0,0,0.0
keyword_rom work,25,[],0.0,0.0,0.0,0,0,0.0
keyword_agreement hereby amended,46,[0],0.007067,0.003534,0.0,1,1,0.5
keyword_amendment date,38,[0],0.010601,0.0,0.0,2,1,0.666667
keyword_term service order,68,[3],0.021201,0.021201,0.007067,4,2,0.666667
keyword_sow term,14,[4],0.056537,0.04947,0.017668,12,4,0.75
keyword_term addendum,29,[0],0.031802,0.031802,0.010601,7,2,0.777778
keyword_force majeure,2,[1],0.116608,0.09894,0.021201,26,7,0.787879


from snorkel.analysis import get_label_buckets

buckets = get_label_buckets(y_dev, L_dev[:, 42])
buckets
#df_dev.iloc[buckets[(OTHERS, 1)]]

In [24]:
from snorkel.labeling import MajorityLabelVoter

majority_model = MajorityLabelVoter(5)
preds_train = majority_model.predict(L=L_train)

In [25]:
majority_acc = majority_model.score(L=L_dev, Y=y_dev)["accuracy"]
print(f"{'Majority Vote Accuracy:':<25} {majority_acc * 100:.1f}%")



Majority Vote Accuracy:   89.6%


In [26]:
from snorkel.labeling import LabelModel
label_model = LabelModel(cardinality=5, verbose=True)
label_model.fit(L_train, n_epochs=500, lr=0.001, log_freq=50, seed=123)

In [27]:
label_model_acc = label_model.score(L=L_dev, Y=y_dev)["accuracy"]
print(f"{'Label Model Accuracy:':<25} {label_model_acc * 100:.1f}%")



Label Model Accuracy:     87.5%


In [28]:
probs_train = label_model.predict_proba(L_train)
probs_train

array([[9.99962036e-01, 3.51458219e-07, 1.44226200e-06, 1.77799754e-05,
        1.83903225e-05],
       [5.24696881e-02, 9.04975673e-02, 4.18662470e-02, 6.18365632e-02,
        7.53329934e-01],
       [9.06454349e-01, 3.55441348e-03, 1.25847661e-03, 4.77458684e-03,
        8.39581745e-02],
       ...,
       [6.27365059e-19, 1.42218917e-24, 1.00000000e+00, 1.10575803e-15,
        1.34283491e-19],
       [6.27365059e-19, 1.42218917e-24, 1.00000000e+00, 1.10575803e-15,
        1.34283491e-19],
       [2.57528153e-09, 2.45968824e-11, 9.99999937e-01, 5.85094270e-08,
        2.02912788e-09]])

In [29]:
probs_valid = label_model.predict_proba(L_valid)
probs_valid

array([[2.74031266e-11, 6.58467341e-12, 2.51105914e-14, 8.65840742e-12,
        1.00000000e+00],
       [9.99995005e-01, 5.51636277e-08, 1.33356781e-07, 2.14394546e-06,
        2.66219812e-06],
       [2.00000000e-01, 2.00000000e-01, 2.00000000e-01, 2.00000000e-01,
        2.00000000e-01],
       ...,
       [3.41155511e-14, 2.28266254e-11, 1.00000000e+00, 4.54966554e-12,
        4.55157911e-14],
       [6.04504604e-13, 4.70909390e-17, 1.00000000e+00, 5.78060588e-11,
        3.47020614e-13],
       [1.40769293e-13, 6.33675595e-20, 1.00000000e+00, 3.15736168e-11,
        2.88973455e-14]])

In [30]:
from snorkel.labeling import filter_unlabeled_dataframe

df_train_filtered, probs_train_filtered = filter_unlabeled_dataframe(
    X=df_train, y=probs_train, L=L_train
)

In [31]:
print(df_train_filtered.shape, probs_train_filtered.shape)

(6345, 2) (6345, 5)


In [32]:
x_train = df_train_filtered.text.values
y_train = probs_train
x_valid = df_valid.text.values
y_valid = probs_valid

In [33]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
# prepare tokenizer
t = Tokenizer(num_words=10000)
t.fit_on_texts(x_train)
t.fit_on_texts(x_valid)
post_seq_train = t.texts_to_sequences(x_train)
post_seq_valid = t.texts_to_sequences(x_valid)
post_seq_padded_train = pad_sequences(post_seq_train, maxlen=200, padding='post')
post_seq_padded_valid = pad_sequences(post_seq_valid, maxlen=200, padding='post')

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [34]:
print(post_seq_padded_train.shape, y_train.shape, post_seq_padded_valid.shape, y_valid.shape)

(6345, 200) (6345, 5) (3212, 200) (3212, 5)


In [39]:
from snorkel.analysis import metric_score
from snorkel.utils import preds_to_probs
from utils import get_keras_lstm, get_keras_early_stopping

# Define a vanilla logistic regression model with Keras
keras_model = get_keras_lstm(20000)

keras_model.fit(
    x=x_train,
    y=y_train,
    validation_data=(x_valid, y_valid),
    callbacks=[get_keras_early_stopping()],
    epochs=20,
    verbose=2
)

ValueError: A target array with shape (6345, 5) was passed for an output of shape (None, 1) while using as loss `binary_crossentropy`. This loss expects targets to have the same shape as the output.

In [40]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional
from utils import get_keras_early_stopping

max_features = 20000 # cut texts after this number of words
maxlen = 100 # (among top max_features most common words)
batch_size = 32

model = Sequential()
model.add(Embedding(max_features, 128, input_length=200))
model.add(Bidirectional(LSTM(64)))
#model.add(Dropout(0.5))
model.add(Dense(5, activation='softmax'))

In [41]:
model.compile('adam', 'categorical_crossentropy', metrics=['accuracy'])

In [42]:
model.fit(post_seq_padded_train, y_train,
          batch_size=batch_size,
          epochs=25,
          validation_data=[post_seq_padded_valid, y_valid],
          callbacks=[get_keras_early_stopping()])

Train on 6345 samples, validate on 3212 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Restoring model weights from the end of the best epoch.
Epoch 00012: early stopping


<keras.callbacks.History at 0x7f6696fb7278>

In [50]:
preds_valid = model.predict(post_seq_padded_valid).argmax(axis=1)
preds_valid

array([4, 4, 1, ..., 1, 1, 1])

In [51]:
i = np.bincount(preds_valid)
ii = np.nonzero(i)[0]
np.vstack((ii,i[ii])).T

array([[   0,  266],
       [   1, 1307],
       [   2,    1],
       [   3,  791],
       [   4,  761]])

In [55]:
i = np.bincount(y_valid.argmax(axis=1))
ii = np.nonzero(i)[0]
np.vstack((ii,i[ii])).T

array([[   0,  335],
       [   1, 1255],
       [   3,  791],
       [   4,  745]])

In [56]:
from snorkel.analysis import metric_score
test_acc = metric_score(golds=y_valid.argmax(axis=1), preds=preds_valid, metric="accuracy")
print(f"Test Accuracy: {test_acc * 100:.1f}%")

Test Accuracy: 84.9%


In [172]:
L_test = applier.apply(df=df_test)

  from pandas import Panel
100%|██████████| 250/250 [00:00<00:00, 424.35it/s]


In [107]:
probs_test = label_model.predict_proba(L_test)
probs_test

array([[9.99502253e-01, 1.09733980e-04, 1.28094946e-04, 1.15306255e-13,
        2.59917615e-04],
       [9.88115653e-01, 7.79762757e-03, 1.11658684e-03, 1.34293003e-10,
        2.97013292e-03],
       [9.99502253e-01, 1.09733980e-04, 1.28094946e-04, 1.15306255e-13,
        2.59917615e-04],
       ...,
       [1.53994039e-08, 9.55237364e-11, 9.99999982e-01, 8.49228287e-23,
        2.37759142e-09],
       [4.95747546e-10, 2.02490512e-09, 9.99999997e-01, 2.65123577e-27,
        1.10078215e-10],
       [6.45498946e-04, 2.71567553e-02, 9.71639731e-01, 4.42465519e-13,
        5.58015256e-04]])

In [175]:
x_test = df_test.text.values
t.fit_on_texts(x_test)
post_seq_test = t.texts_to_sequences(x_test)
post_seq_padded_test = pad_sequences(post_seq_test, maxlen=200, padding='post')

In [176]:
preds_test = model.predict(post_seq_padded_test).argmax(axis=1)
preds_test

array([1, 3, 3, 1, 1, 4, 3, 1, 3, 1, 3, 3, 2, 1, 3, 3, 4, 1, 3, 1, 3, 2,
       4, 4, 3, 3, 3, 4, 3, 1, 1, 3, 2, 1, 3, 3, 1, 1, 2, 3, 4, 3, 1, 3,
       1, 4, 1, 1, 4, 1, 1, 3, 3, 3, 3, 3, 1, 3, 3, 1, 1, 3, 1, 0, 1, 0,
       1, 3, 4, 3, 4, 0, 1, 3, 3, 4, 3, 3, 1, 1, 1, 1, 3, 3, 1, 3, 4, 1,
       3, 3, 1, 1, 3, 3, 3, 1, 1, 1, 1, 3, 1, 4, 1, 4, 1, 2, 3, 3, 3, 1,
       1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 3, 3, 1, 1, 1, 1, 4,
       3, 1, 1, 1, 0, 1, 4, 1, 1, 3, 1, 1, 1, 3, 1, 1, 0, 3, 1, 1, 1, 4,
       1, 4, 0, 2, 0, 3, 1, 3, 3, 1, 1, 4, 3, 1, 1, 1, 1, 4, 4, 3, 4, 1,
       3, 3, 4, 0, 0, 4, 4, 3, 4, 1, 4, 1, 0, 3, 0, 3, 3, 1, 0, 0, 4, 4,
       1, 4, 1, 3, 3, 3, 1, 1, 4, 1, 1, 1, 4, 4, 1, 0, 1, 3, 1, 4, 1, 3,
       1, 1, 1, 4, 3, 3, 4, 1, 1, 1, 4, 1, 1, 3, 1, 1, 4, 1, 3, 1, 1, 3,
       1, 1, 0, 4, 4, 1, 4, 1])

In [113]:
i = np.bincount(preds_test)
ii = np.nonzero(i)[0]
np.vstack((ii,i[ii])).T

array([[  0,  17],
       [  1, 107],
       [  2,   5],
       [  3,  74],
       [  4,  47]])

In [114]:
i = np.bincount(y_test)
ii = np.nonzero(i)[0]
np.vstack((ii,i[ii])).T

array([[ 0, 50],
       [ 1, 50],
       [ 2, 50],
       [ 3, 50],
       [ 4, 50]])

In [177]:
from snorkel.analysis import metric_score
test_acc = metric_score(golds=y_test, preds=preds_test, metric="accuracy")
print(f"Test Accuracy: {test_acc * 100:.1f}%")

Test Accuracy: 27.6%
