In [None]:
!python -m spacy download en_core_web_lg

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import defaultdict
from tqdm.notebook import tqdm
import spacy

import nltk
from nltk import word_tokenize
from nltk.corpus import stopwords
from nltk.corpus import wordnet as wn
from nltk.stem import WordNetLemmatizer
from nltk.stem import PorterStemmer
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import AdaBoostClassifier

from string import punctuation

from sklearn.metrics.pairwise import cosine_similarity
from sklearn import svm

import random
import pickle

#---------------downloading ---------------------
spacy = spacy.load("en_core_web_lg")
nltk.download('wordnet')
nltk.download('stopwords')
nltk.download('punkt')
stopWords = stopwords.words('english')

In [2]:
#SNI dataset:
train_raw_sni = pd.read_json("./drive/MyDrive/ColabData/snli_1.0_train.jsonl",lines = True)
train_raw_sni = train_raw_sni[train_raw_sni.gold_label!='-'][:3633]


#RTE dataset
train_raw_rte3_train = pd.read_xml("./drive/MyDrive/ColabData/rte3_dev.xml")# 800 sample data
train_raw_rte3_train.drop(columns=("length"),inplace = True)
train_raw_rte2_train = pd.read_xml("./drive/MyDrive/ColabData/rte2_dev.xml")# 400 sample data
train_raw_rte2_test = pd.read_xml("./drive/MyDrive/ColabData/rte2_test.xml")# 800 sample data
train_raw_rte1_train = pd.read_xml("./drive/MyDrive/ColabData/rte1_dev.xml")# 567 sample data
train_raw_rte1_test = pd.read_xml("./drive/MyDrive/ColabData/rte1_test.xml")# 800 sample data
train_raw_rte1_train.rename(columns = {"value":"entailment"},inplace = True)
train_raw_rte1_test.rename(columns = {"value":"entailment"},inplace = True)


train_raw = train_raw_rte3_train.append(train_raw_rte2_train,ignore_index=True)
train_raw = train_raw.append(train_raw_rte2_test,ignore_index=True)
train_raw = train_raw.append(train_raw_rte1_train,ignore_index=True)
train_raw = train_raw.append(train_raw_rte1_test,ignore_index=True)




train_raw.loc[ train_raw["entailment"] == "YES",   "entailment" ] = "entailment"
train_raw.loc[ train_raw["entailment"] == True,  "entailment" ] = "entailment"
train_raw.loc[ train_raw["entailment"] == "NO",    "entailment" ] = "other"
train_raw.loc[ train_raw["entailment"] == False, "entailment" ] = "other"

train_raw.rename(columns = {"t":"sentence1","h":"sentence2","entailment":"gold_label"},inplace = True)
train_raw = train_raw.append(train_raw_sni,ignore_index = True)



#RTE dataset
test_raw_rte3 = pd.read_xml("./drive/MyDrive/ColabData/rte3_test.xml")
test_raw_sni = pd.read_json("./drive/MyDrive/ColabData/snli_1.0_test.jsonl",lines = True)
test_raw_sni = train_raw_sni[train_raw_sni.gold_label!='-'][:1200]

test_raw_rte3.loc[test_raw_rte3["entailment"]=="YES","entailment"] = "entailment"
test_raw_rte3.loc[test_raw_rte3["entailment"]=="NO","entailment"] = "other"
test_raw_rte3.rename(columns = {"t":"sentence1","h":"sentence2","entailment":"gold_label"},inplace = True)

test_raw = test_raw_rte3.append(test_raw_sni,ignore_index = True)



In [3]:
train = train_raw
train.replace("neutral","other", inplace = True)
train.replace("contradiction","other", inplace = True)
train = train.reset_index()

test = test_raw

print("len test:",len(test),"len train:",len(train))

len test: 2000 len train: 7000


In [4]:
def normalizedTokenizer(text):
   wnl = WordNetLemmatizer()
   ps = PorterStemmer()
   return [ps.stem(wnl.lemmatize(x)) for x in word_tokenize(text.lower()) if x not in punctuation and x not in stopWords] #

# First approach, cosine-similarity

In [5]:
def generateBagOfWord(sentences):
  BOW = defaultdict(lambda: defaultdict(lambda: 0))
  corpus = list(enumerate(sentences))
  for i, document in corpus:
    tokens = normalizedTokenizer(document)
    for token in tokens:
        BOW[i][token] += 1
  return BOW

In [6]:
def predict(T,H,threshold):
  bow = generateBagOfWord([T,H])
  data = (pd.DataFrame(bow).fillna(0, inplace=False).T).values
  similarity = cosine_similarity(data[0].reshape(1, -1),data[1].reshape(1, -1))
  if similarity > threshold:
    return "entailment"
  else:
    return "other"

In [7]:
def evaluation(threshold):
  tp = fp = fn = tn = 0
  for index in range(len(test)):
    prediction = predict(test.sentence1[index],test.sentence2[index],threshold)
    actualLable = test.gold_label[index]
    if actualLable == "entailment":
      if prediction == "entailment":
        tp +=1
      else:
        fn +=1
    else:
        if prediction == "other":
          tn+=1
        else:
            fp+=1
  return {"Accuracy":round( (tp+tn) / (tp+tn+fp+fn),2),"Percision":round(tp/(tp+fp),2),"Recall":round(tp/(tp+fn),2)}

In [8]:
threshold = 0.3
evaluation(threshold)

{'Accuracy': 0.61, 'Percision': 0.52, 'Recall': 0.79}

In [9]:
T = "Tim managed to stop the car"
H = "Tim stopped the car"
H1 = "Tim tried to stop the car"
H2 = "Tim did something to the car"
H3 = "the color of the cor is blue"
H4 = "car is not moving any more"
print(" T->H",predict (T,H,threshold),"correct:entrailment\n","T->H1",predict (T,H1,threshold),"correct:entrailment\n","T->H2",predict (T,H2,threshold),"correct:entrailment\n",
      "T->H3",predict (T,H3,threshold),"correct:other\n","T->H4",predict (T,H4,threshold),"correct:entrailment\n",)


T = "Tim has bought fish tonight"
H = "Tim has spent money"
H1 = "Tim has bought something"
H2 = "Tim Is relaxed and happy!"
print(" T->H",predict (T,H,threshold),"correct:entrailment\n","T->H1",predict (T,H1,threshold),"correct:entrailment\n","T->H2",predict (T,H2,threshold),"correct:other\n",)


T = "A girl playing a violin along with a group of people"
H = "A girl is playing an instrument."
H1 = "A girl is playing outdoor alone"
H2 = "A girl is playing volleyball with other groups of people"
print(" T->H",predict (T,H,threshold),"correct:entrailment\n","T->H1",predict (T,H1,threshold),"correct:other\n","T->H2",predict (T,H2,threshold),"correct:other\n",)

 T->H entailment correct:entrailment
 T->H1 entailment correct:entrailment
 T->H2 entailment correct:entrailment
 T->H3 other correct:other
 T->H4 entailment correct:entrailment

 T->H other correct:entrailment
 T->H1 entailment correct:entrailment
 T->H2 other correct:other

 T->H entailment correct:entrailment
 T->H1 entailment correct:other
 T->H2 entailment correct:other



# Second approach: Cosine Directional Similarity

In [10]:
def predict(T,H,threshold):
  threshold_one = threshold[0]
  threshold_two = threshold[1]
  threshold_three = threshold[2]
  sentence_one = normalizedTokenizer(T)
  sentence_two = normalizedTokenizer(H)
  c = len([x for x in sentence_one if x in sentence_two])
  cos_T = np.sqrt(c/len(sentence_one))
  cos_H = np.sqrt(c/len(sentence_two))
  cos_HT = np.sqrt( (4*np.power(c,2)) / ( (len(sentence_two)+c) * (len(sentence_one)+c) ) )
  if (cos_HT - cos_T) <= threshold_one and (cos_H - cos_HT) <= threshold_two and max(cos_H,cos_T,cos_HT) >=threshold_three:
    return "entailment"
  else:
    return "other"

In [11]:
def evaluation(threshold):
  tp = fp = fn = tn = 0
  for index in range(len(test)):
    prediction = predict(test.sentence1[index],test.sentence2[index],threshold)
    actualLable = test.gold_label[index]
    if actualLable == "entailment":
      if prediction == "entailment":
        tp +=1
      else:
        fn +=1
    else:
        if prediction == "other":
          tn+=1
        else:
            fp+=1
  return {"Accuracy":round( (tp+tn) / (tp+tn+fp+fn),2),"Percision":round(tp/(tp+fp),2),"Recall":round(tp/(tp+fn),2)}

In [12]:
threshold = [0.5,0.7,0.7]
evaluation(threshold)

{'Accuracy': 0.6, 'Percision': 0.51, 'Recall': 0.87}

In [13]:
T = "Tim managed to stop the car"
H = "Tim stopped the car"
H1 = "Tim tried to stop the car"
H2 = "Tim did something to the car"
H3 = "the color of the cor is blue"
H4 = "car is not moving any more"
print(" T->H",predict (T,H,threshold),"correct:entrailment\n","T->H1",predict (T,H1,threshold),"correct:entrailment\n","T->H2",predict (T,H2,threshold),"correct:entrailment\n",
      "T->H3",predict (T,H3,threshold),"correct:other\n","T->H4",predict (T,H4,threshold),"correct:entrailment\n",)


T = "Tim has bought fish tonight"
H = "Tim has spent money"
H1 = "Tim has bought something"
H2 = "Tim Is relaxed and happy!"
print(" T->H",predict (T,H,threshold),"correct:entrailment\n","T->H1",predict (T,H1,threshold),"correct:entrailment\n","T->H2",predict (T,H2,threshold),"correct:other\n",)


T = "A girl playing a violin along with a group of people"
H = "A girl is playing an instrument."
H1 = "A girl is playing outdoor alone"
H2 = "A girl is playing volleyball with other groups of people"
print(" T->H",predict (T,H,threshold),"correct:entrailment\n","T->H1",predict (T,H1,threshold),"correct:other\n","T->H2",predict (T,H2,threshold),"correct:other\n",)

 T->H entailment correct:entrailment
 T->H1 entailment correct:entrailment
 T->H2 entailment correct:entrailment
 T->H3 other correct:other
 T->H4 entailment correct:entrailment

 T->H other correct:entrailment
 T->H1 entailment correct:entrailment
 T->H2 other correct:other

 T->H entailment correct:entrailment
 T->H1 entailment correct:other
 T->H2 entailment correct:other



# Third Approach, Modified Lev. Distance

In [14]:
def editDistance(a,b):
  remove_cost = 3
  insert_cost = 3
  swap_cost = 2
  substitude_cost = 5
  cost = 0
  if len(b) < len(a):
    difference = (len(a) - len(b))
    cost += (remove_cost * difference)
    a = a[:-difference]
  elif len(b) > len(a):
    difference = (len(b) - len(a))
    cost += (insert_cost * difference)
    a = a + b[-difference:]
  for index in range(len(b)):
    if a[index] != b[index]:
      if index != len(b)-1 and index != 0:
         if a[index + 1] == b[index]:
           cost += swap_cost
           a = a[:index]+a[index+1]+a[index]+a[index+2:]
         elif a[index - 1] == b[index]:
           cost += swap_cost
           a = a[:index-1]+a[index-1]+a[index]+a[index+1:]
      else:
        cost += substitude_cost
        a = a[:index]+b[index]+a[index+1:]
  return cost


def predict(T,H):
  sentence_one = "".join(normalizedTokenizer(T))
  sentence_two = "".join(normalizedTokenizer(H))
  ed_TH = editDistance(sentence_one,sentence_two)
  ed_HT = editDistance(sentence_two,sentence_one)
  if ed_TH < ed_HT:
    return "entailment"
  else:
    return "other"

In [15]:
def evaluation():
  tp = fp = fn = tn = 0
  for index in range(len(test)):
    prediction = predict(test.sentence1[index],test.sentence2[index])
    actualLable = test.gold_label[index]
    if actualLable == "entailment":
      if prediction == "entailment":
        tp +=1
      else:
        fn +=1
    else:
        if prediction == "other":
          tn+=1
        else:
            fp+=1
  return {"Accuracy":round( (tp+tn) / (tp+tn+fp+fn),2),"Percision":round(tp/(tp+fp),2),"Recall":round(tp/(tp+fn),2)}

In [16]:
evaluation()

{'Accuracy': 0.55, 'Percision': 0.23, 'Recall': 0.05}

In [17]:
T = "Tim managed to stop the car"
H = "Tim stopped the car"
H1 = "Tim tried to stop the car"
H2 = "Tim did something to the car"
H3 = "the color of the cor is blue"
H4 = "car is not moving any more"
print(" T->H",predict (T,H),"correct:entrailment\n","T->H1",predict (T,H1),"correct:entrailment\n","T->H2",predict (T,H2),"correct:entrailment\n",
      "T->H3",predict (T,H3),"correct:other\n","T->H4",predict (T,H4),"correct:entrailment\n",)


T = "Tim has bought fish tonight"
H = "Tim has spent money"
H1 = "Tim has bought something"
H2 = "Tim Is relaxed and happy!"
print(" T->H",predict (T,H),"correct:entrailment\n","T->H1",predict (T,H1),"correct:entrailment\n","T->H2",predict (T,H2),"correct:other\n",)


T = "A girl playing a violin along with a group of people"
H = "A girl is playing an instrument."
H1 = "A girl is playing outdoor alone"
H2 = "A girl is playing volleyball with other groups of people"
print(" T->H",predict (T,H),"correct:entrailment\n","T->H1",predict (T,H1),"correct:other\n","T->H2",predict (T,H2),"correct:other\n",)

 T->H other correct:entrailment
 T->H1 other correct:entrailment
 T->H2 other correct:entrailment
 T->H3 other correct:other
 T->H4 other correct:entrailment

 T->H other correct:entrailment
 T->H1 other correct:entrailment
 T->H2 other correct:other

 T->H other correct:entrailment
 T->H1 entailment correct:other
 T->H2 other correct:other



# Fourth Approach, SVM and GaussianNB

In [18]:
def tagsDifference(T,H):
  counter = 0
  sentence_one = spacy(T)
  sentence_two = spacy(H)
  tags_T = []
  tags_H = []
  for token in sentence_one:
    tags_T.append(token.tag_)
  for token in sentence_two:
    tags_H.append(token.tag_)  
  for tag in tags_H:
    if tag in tags_T:
      tags_T.remove(tag)
      counter +=1
  return counter / len(tags_H)



def tagsSimilarity(T,H):
  
  sentence_one = spacy(T)
  sentence_two = spacy(H)
  tags_T = []
  tags_H = []
  for token in sentence_one:
    tags_T.append(token.tag_)
  for token in sentence_two:
    tags_H.append(token.tag_)  

  bow = generateBagOfWord([" ".join(tags_T)," ".join(tags_H)])
  data = (pd.DataFrame(bow).fillna(0, inplace=False).T).values
  similarity = cosine_similarity(data[0].reshape(1, -1),data[1].reshape(1, -1)).flatten()[0]
  return similarity

In [19]:
def generateData(T,H):
  vector = []
  bow = generateBagOfWord([T,H])
  data = (pd.DataFrame(bow).fillna(0, inplace=False).T).values

  similarity = cosine_similarity(data[0].reshape(1, -1),data[1].reshape(1, -1)).flatten()[0]

  sentence_one = normalizedTokenizer(T)
  sentence_two = normalizedTokenizer(H)
  c = len([x for x in sentence_one if x in sentence_two])
  cos_T = np.sqrt(c/len(sentence_one))
  cos_H = np.sqrt(c/len(sentence_two))
  cos_HT = np.sqrt( (4*np.power(c,2)) / ( (len(sentence_two)+c) * (len(sentence_one)+c) ) )

  sentence_one = "".join(normalizedTokenizer(T))
  sentence_two = "".join(normalizedTokenizer(H))
  ed_TH = editDistance(sentence_one,sentence_two)
  ed_HT = editDistance(sentence_two,sentence_one)

  vector.append(similarity)
  vector.append(cos_T)
  vector.append(cos_H)
  vector.append(cos_HT)
  vector.append(ed_HT - ed_TH)
  vector.append(tagsDifference(T,H))
  vector.append(tagsSimilarity(T,H))
  vector.append(np.sqrt(tagsSimilarity(T,H)))
  return vector


def getX_Y(indecces,dataset):
  x = []
  y = []
  for index in tqdm(indecces):
    x.append(generateData(dataset.sentence1[index], dataset.sentence2[index]))
    if dataset.gold_label[index] == "entailment":
      y.append(1)
    else:
      y.append(0)
  return x,y

In [20]:
indecces = train.index.tolist()
random.shuffle(indecces)
X,Y = getX_Y(indecces,train)

indecces = test.index.tolist()
random.shuffle(indecces)
X_test,Y_test = getX_Y(indecces,test)

  0%|          | 0/7000 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

In [21]:
#---------------------Store prepared Data--------------------------------
with open('./drive/MyDrive/ColabData/X_train.data', 'wb') as f:
        pickle.dump(X, f)

with open('./drive/MyDrive/ColabData/Y_train.data', 'wb') as f:
        pickle.dump(Y, f)

with open('./drive/MyDrive/ColabData/X_test.data', 'wb') as f:
        pickle.dump(X_test, f)

with open('./drive/MyDrive/ColabData/Y_test.data', 'wb') as f:
        pickle.dump(Y_test, f)

In [22]:
#---------------------Load prepared Data--------------------------------
with open('./drive/MyDrive/ColabData/X_train.data', 'rb') as f:
        X = pickle.load(f)

with open('./drive/MyDrive/ColabData/Y_train.data', 'rb') as f:
        Y = pickle.load(f)

with open('./drive/MyDrive/ColabData/X_test.data', 'rb') as f:
        X_test = pickle.load(f)

with open('./drive/MyDrive/ColabData/Y_test.data', 'rb') as f:
        Y_test = pickle.load(f)

**SVM**

In [58]:
def clf_model():
  clf = svm.SVC(kernel = 'rbf', C = 200, gamma = 200)
  clf.fit(X,Y)
  return clf

model_SVM = clf_model()

In [24]:
def predict(T,H):
  test = [generateData(T,H)]
  if model_SVM.predict(test)[0] == 1:
      return "entailment"
  else:
      return "other"

def evaluate():
  tp = fp = fn = tn = 0
  for index in tqdm(range(len(test))):

    prediction =model_SVM.predict( [X_test[index]] )[0]
    actualLable = Y_test[index]
    if actualLable == 1:
      if prediction == 1:
        tp +=1
      else:
        fn +=1
    else:
        if prediction == 0:
          tn+=1
        else:
            fp+=1
  return {"Accuracy":round( (tp+tn) / (tp+tn+fp+fn),2),"Percision":round(tp/(tp+fp),2),"Recall":round(tp/(tp+fn),2)}

In [59]:
#{'Accuracy': 0.79, 'Percision': 0.77, 'Recall': 0.7}  C = 10 gamma = 200 and 1 epoch
#{'Accuracy': 0.70, 'Percision': 0.64, 'Recall': 0.58}  C = 10 gamma = 10 and 2 epochs
#{'Accuracy': 0.70, 'Percision': 0.64, 'Recall': 0.58}  C = 10 gamma = 10 and 5 epochs
evaluate() 

  0%|          | 0/2000 [00:00<?, ?it/s]

{'Accuracy': 0.81, 'Percision': 0.79, 'Recall': 0.71}

In [60]:
T = "Tim managed to stop the car"
H = "Tim stopped the car"
H1 = "Tim tried to stop the car"
H2 = "Tim did something to the car"
H3 = "the color of the cor is blue"
H4 = "car is not moving any more"
print(" T->H",predict (T,H),"correct:entrailment\n","T->H1",predict (T,H1),"correct:entrailment\n","T->H2",predict (T,H2),"correct:entrailment\n",
      "T->H3",predict (T,H3),"correct:other\n","T->H4",predict (T,H4),"correct:entrailment\n",)


T = "Tim has bought fish tonight"
H = "Tim has spent money tonight"
H1 = "Tim has bought something"
H2 = "Tim Is relaxed and happy!"
print(" T->H",predict (T,H),"correct:entrailment\n","T->H1",predict (T,H1),"correct:entrailment\n","T->H2",predict (T,H2),"correct:other\n",)


T = "A girl playing a violin along with a group of people"
H = "A girl is playing an instrument."
H1 = "A girl is playing outdoor alone"
H2 = "A girl is playing volleyball with other groups of people"
print(" T->H",predict (T,H),"correct:entrailment\n","T->H1",predict (T,H1),"correct:other\n","T->H2",predict (T,H2),"correct:other\n",)

 T->H other correct:entrailment
 T->H1 entailment correct:entrailment
 T->H2 other correct:entrailment
 T->H3 other correct:other
 T->H4 other correct:entrailment

 T->H other correct:entrailment
 T->H1 entailment correct:entrailment
 T->H2 other correct:other

 T->H other correct:entrailment
 T->H1 other correct:other
 T->H2 other correct:other



**GaussianNB Classifier**

In [80]:
def clf_model():
  clf = GaussianNB(var_smoothing= 1e-3)
  clf.fit(X,Y)
  return clf

model_NB = clf_model()

In [81]:
def predict(T,H):
  test = [generateData(T,H)]
  if model_NB.predict(test)[0] == 1:
      return "entailment"
  else:
      return "other"

def evaluate():
  tp = fp = fn = tn = 0
  for index in tqdm(range(len(test))):
    prediction = model_NB.predict( [X_test[index]] )[0]
    actualLable = Y_test[index]
    if actualLable == 1:
      if prediction == 1:
        tp +=1
      else:
        fn +=1
    else:
        if prediction == 0:
          tn+=1
        else:
            fp+=1
  return {"Accuracy":round( (tp+tn) / (tp+tn+fp+fn),2),"Percision":round(tp/(tp+fp),2),"Recall":round(tp/(tp+fn),2)}

In [82]:
#{'Accuracy': 0.66, 'Percision': 0.56, 'Recall': 0.72} for 1 epoch
#{'Accuracy': 0.64, 'Percision': 0.55, 'Recall': 0.74} for 2 epochs
#{'Accuracy': 0.64, 'Percision': 0.55, 'Recall': 0.74} for 5 epochs
evaluate()

  0%|          | 0/2000 [00:00<?, ?it/s]

{'Accuracy': 0.66, 'Percision': 0.56, 'Recall': 0.72}

In [83]:
T = "Tim managed to stop the car"
H = "Tim stopped the car"
H1 = "Tim tried to stop the car"
H2 = "Tim did something to the car"
H3 = "the color of the cor is blue"
H4 = "car is not moving any more"
print(" T->H",predict (T,H),"correct:entrailment\n","T->H1",predict (T,H1),"correct:entrailment\n","T->H2",predict (T,H2),"correct:entrailment\n",
      "T->H3",predict (T,H3),"correct:other\n","T->H4",predict (T,H4),"correct:entrailment\n",)


T = "Tim has bought fish tonight"
H = "Tim has spent money"
H1 = "Tim has bought something"
H2 = "Tim Is relaxed and happy!"
print(" T->H",predict (T,H),"correct:entrailment\n","T->H1",predict (T,H1),"correct:entrailment\n","T->H2",predict (T,H2),"correct:other\n",)


T = "A girl playing a violin along with a group of people"
H = "A girl is playing an instrument."
H1 = "A girl is playing outdoor alone"
H2 = "A girl is playing volleyball with other groups of people"
print(" T->H",predict (T,H),"correct:entrailment\n","T->H1",predict (T,H1),"correct:other\n","T->H2",predict (T,H2),"correct:other\n",)

 T->H entailment correct:entrailment
 T->H1 entailment correct:entrailment
 T->H2 entailment correct:entrailment
 T->H3 other correct:other
 T->H4 other correct:entrailment

 T->H other correct:entrailment
 T->H1 entailment correct:entrailment
 T->H2 other correct:other

 T->H entailment correct:entrailment
 T->H1 other correct:other
 T->H2 entailment correct:other



**Decision Tree classifier**

In [117]:
def clf_model():
  clf = DecisionTreeClassifier()
  clf.fit(X,Y)
  return clf

model_DTC = clf_model()

In [118]:
def predict(T,H):
  test = [generateData(T,H)]
  if model_DTC.predict(test)[0] == 1:
      return "entailment"
  else:
      return "other"


def evaluate():
  tp = fp = fn = tn = 0
  for index in tqdm(range(len(test))):
    prediction = model_DTC.predict( [X_test[index]] )[0]
    actualLable = Y_test[index]
    if actualLable == 1:
      if prediction == 1:
        tp +=1
      else:
        fn +=1
    else:
        if prediction == 0:
          tn+=1
        else:
            fp+=1
  return {"Accuracy":round( (tp+tn) / (tp+tn+fp+fn),2),"Percision":round(tp/(tp+fp),2),"Recall":round(tp/(tp+fn),2)}

In [119]:
#{'Accuracy': 0.71, 'Percision': 0.67, 'Recall': 0.55} for 1 epoch
#{'Accuracy': 0.82, 'Percision': 0.78, 'Recall': 0.78}
#{'Accuracy': 0.71, 'Percision': 0.67, 'Recall': 0.55} for 2 epochs
#{'Accuracy': 0.71, 'Percision': 0.67, 'Recall': 0.55} for 5 epochs

evaluate() 

  0%|          | 0/2000 [00:00<?, ?it/s]

{'Accuracy': 0.82, 'Percision': 0.78, 'Recall': 0.78}

In [120]:
T = "Tim managed to stop the car"
H = "Tim stopped the car"
H1 = "Tim tried to stop the car"
H2 = "Tim did something to the car"
H3 = "the color of the cor is blue"
H4 = "car is not moving any more"
print(" T->H",predict (T,H),"correct:entrailment\n","T->H1",predict (T,H1),"correct:entrailment\n","T->H2",predict (T,H2),"correct:entrailment\n",
      "T->H3",predict (T,H3),"correct:other\n","T->H4",predict (T,H4),"correct:entrailment\n",)


T = "Tim has bought fish tonight"
H = "Tim has spent money"
H1 = "Tim has bought something"
H2 = "Tim Is relaxed and happy!"
print(" T->H",predict (T,H),"correct:entrailment\n","T->H1",predict (T,H1),"correct:entrailment\n","T->H2",predict (T,H2),"correct:other\n",)


T = "A girl playing a violin along with a group of people"
H = "A girl is playing an instrument."
H1 = "A girl is playing outdoor alone"
H2 = "A girl is playing volleyball with other groups of people"
print(" T->H",predict (T,H),"correct:entrailment\n","T->H1",predict (T,H1),"correct:other\n","T->H2",predict (T,H2),"correct:other\n",)

 T->H other correct:entrailment
 T->H1 entailment correct:entrailment
 T->H2 entailment correct:entrailment
 T->H3 other correct:other
 T->H4 other correct:entrailment

 T->H other correct:entrailment
 T->H1 other correct:entrailment
 T->H2 entailment correct:other

 T->H other correct:entrailment
 T->H1 entailment correct:other
 T->H2 entailment correct:other



**AdaBoost Classifier**

In [139]:
def clf_model():
  clf = AdaBoostClassifier(n_estimators=10)
  clf.fit(X,Y)
  return clf

model_Ada = clf_model()

In [140]:
def predict(T,H):
  test = [generateData(T,H)]
  if model_Ada.predict(test)[0] == 1:
      return "entailment"
  else:
      return "other"

def evaluate():
  tp = fp = fn = tn = 0
  for index in tqdm(range(len(test))):
    prediction = model_Ada.predict( [X_test[index]] )[0]
    actualLable = Y_test[index]
    if actualLable == 1:
      if prediction == 1:
        tp +=1
      else:
        fn +=1
    else:
        if prediction == 0:
          tn+=1
        else:
            fp+=1
  return {"Accuracy":round( (tp+tn) / (tp+tn+fp+fn),2),"Percision":round(tp/(tp+fp),2),"Recall":round(tp/(tp+fn),2)}

In [141]:
#{'Accuracy': 0.66, 'Percision': 0.56, 'Recall': 0.77} for 1 epoch
#{'Accuracy': 0.66, 'Percision': 0.56, 'Recall': 0.77} for 2 epochs
#{'Accuracy': 0.66, 'Percision': 0.56, 'Recall': 0.77} for 5 epochs
evaluate() 

  0%|          | 0/2000 [00:00<?, ?it/s]

{'Accuracy': 0.69, 'Percision': 0.6, 'Recall': 0.65}

In [142]:
T = "Tim managed to stop the car"
H = "Tim stopped the car"
H1 = "Tim tried to stop the car"
H2 = "Tim did something to the car"
H3 = "the color of the cor is blue"
H4 = "car is not moving any more"
print(" T->H",predict (T,H),"correct:entrailment\n","T->H1",predict (T,H1),"correct:entrailment\n","T->H2",predict (T,H2),"correct:entrailment\n",
      "T->H3",predict (T,H3),"correct:other\n","T->H4",predict (T,H4),"correct:entrailment\n",)


T = "Tim has bought fish tonight"
H = "Tim has spent money"
H1 = "Tim has bought something"
H2 = "Tim Is relaxed and happy!"
print(" T->H",predict (T,H),"correct:entrailment\n","T->H1",predict (T,H1),"correct:entrailment\n","T->H2",predict (T,H2),"correct:other\n",)


T = "A girl playing a violin along with a group of people"
H = "A girl is playing an instrument."
H1 = "A girl is playing outdoor alone"
H2 = "A girl is playing volleyball with other groups of people"
print(" T->H",predict (T,H),"correct:entrailment\n","T->H1",predict (T,H1),"correct:other\n","T->H2",predict (T,H2),"correct:other\n",)

 T->H entailment correct:entrailment
 T->H1 entailment correct:entrailment
 T->H2 other correct:entrailment
 T->H3 other correct:other
 T->H4 other correct:entrailment

 T->H other correct:entrailment
 T->H1 entailment correct:entrailment
 T->H2 other correct:other

 T->H other correct:entrailment
 T->H1 other correct:other
 T->H2 other correct:other



**MLP Classifier**

In [155]:
def clf_model():
  clf = MLPClassifier(random_state=50)
  clf.fit(X,Y)
  return clf

model_MLP = clf_model()

In [156]:
def predict(T,H):
  test = [generateData(T,H)]
  if model_MLP.predict(test)[0] == 1:
      return "entailment"
  else:
      return "other"

def evaluate():
  tp = fp = fn = tn = 0
  for index in tqdm(range(len(test))):
    prediction = model_MLP.predict( [X_test[index]] )[0]
    actualLable = Y_test[index]
    if actualLable == 1:
      if prediction == 1:
        tp +=1
      else:
        fn +=1
    else:
        if prediction == 0:
          tn+=1
        else:
            fp+=1
  return {"Accuracy":round( (tp+tn) / (tp+tn+fp+fn),2),"Percision":round(tp/(tp+fp),2),"Recall":round(tp/(tp+fn),2)}

In [157]:
#{'Accuracy': 0.72, 'Percision': 0.66, 'Recall': 0.61} for 1 epoch
#{'Accuracy': 0.7, 'Percision': 0.63, 'Recall': 0.69} for 2 epochs
#{'Accuracy': 0.69, 'Percision': 0.64, 'Recall': 0.57} for 5 epochs
evaluate()

  0%|          | 0/2000 [00:00<?, ?it/s]

{'Accuracy': 0.7, 'Percision': 0.63, 'Recall': 0.64}

In [158]:
T = "Tim managed to stop the car"
H = "Tim stopped the car"
H1 = "Tim tried to stop the car"
H2 = "Tim did something to the car"
H3 = "the color of the cor is blue"
H4 = "car is not moving any more"
print(" T->H",predict (T,H),"correct:entrailment\n","T->H1",predict (T,H1),"correct:entrailment\n","T->H2",predict (T,H2),"correct:entrailment\n",
      "T->H3",predict (T,H3),"correct:other\n","T->H4",predict (T,H4),"correct:entrailment\n",)


T = "Tim has bought fish tonight"
H = "Tim has spent money"
H1 = "Tim has bought something"
H2 = "Tim Is relaxed and happy!"
print(" T->H",predict (T,H),"correct:entrailment\n","T->H1",predict (T,H1),"correct:entrailment\n","T->H2",predict (T,H2),"correct:other\n",)


T = "A girl playing a violin along with a group of people"
H = "A girl is playing an instrument."
H1 = "A girl is playing outdoor alone"
H2 = "A girl is playing volleyball with other groups of people"
print(" T->H",predict (T,H),"correct:entrailment\n","T->H1",predict (T,H1),"correct:other\n","T->H2",predict (T,H2),"correct:other\n",)

 T->H entailment correct:entrailment
 T->H1 other correct:entrailment
 T->H2 other correct:entrailment
 T->H3 other correct:other
 T->H4 other correct:entrailment

 T->H other correct:entrailment
 T->H1 other correct:entrailment
 T->H2 other correct:other

 T->H other correct:entrailment
 T->H1 other correct:other
 T->H2 entailment correct:other



**Combine**

In [225]:
def predict(T,H):
  test = [generateData(T,H)]
  prediction = []
  models = [model_SVM, model_MLP, model_Ada, model_DTC, model_NB]
  for model in models:
    prediction.append(model.predict(test)[0])
  number_of_entailments = len(list(filter(lambda x: x==1,prediction)))
  number_of_other = len(list(filter(lambda x: x==0,prediction)))
  if number_of_entailments>number_of_other:
      return "entailment"
  else:
      return "other"




def evaluate():
  tp = fp = fn = tn = 0
  models = [model_SVM, model_MLP, model_Ada, model_DTC, model_NB]

  for index in tqdm(range(len(test))):
    sentence = [X_test[index]]
    prediction = []
    for model in models:
      prediction.append(model.predict(sentence)[0])
      if model is model_DTC:
         prediction.append(model.predict(sentence)[0])
         if model.predict(sentence)[0] == 1:
          prediction.append(model.predict(sentence)[0])
          prediction.append(model.predict(sentence)[0])
          prediction.append(model.predict(sentence)[0])
          

    number_of_entailments = len(list(filter(lambda x: x==1,prediction)))
    number_of_other = len(list(filter(lambda x: x==0,prediction)))
    if number_of_entailments>number_of_other:
        prediction = 1
    else:
        prediction = 0
    actualLable = Y_test[index]
    if actualLable == 1:
      if prediction == 1:
        tp +=1
      else:
        fn +=1
    else:
        if prediction == 0:
          tn+=1
        else:
            fp+=1
  return {"Accuracy":round( (tp+tn) / (tp+tn+fp+fn),2),"Percision":round(tp/(tp+fp),2),"Recall":round(tp/(tp+fn),2)}

In [226]:
evaluate()

  0%|          | 0/2000 [00:00<?, ?it/s]

{'Accuracy': 0.83, 'Percision': 0.77, 'Recall': 0.83}

In [214]:
T = "Tim managed to stop the car"
H = "Tim stopped the car"
H1 = "Tim tried to stop the car"
H2 = "Tim did something to the car"
H3 = "the color of the cor is blue"
H4 = "car is not moving any more"
print(" T->H",predict (T,H),"correct:entrailment\n","T->H1",predict (T,H1),"correct:entrailment\n","T->H2",predict (T,H2),"correct:entrailment\n",
      "T->H3",predict (T,H3),"correct:other\n","T->H4",predict (T,H4),"correct:entrailment\n",)


T = "Tim has bought fish tonight"
H = "Tim has spent money tonight"
H1 = "Tim has bought something"
H2 = "Tim Is relaxed and happy!"
print(" T->H",predict (T,H),"correct:entrailment\n","T->H1",predict (T,H1),"correct:entrailment\n","T->H2",predict (T,H2),"correct:other\n",)


T = "A girl playing a violin along with a group of people"
H = "A girl is playing an instrument."
H1 = "A girl is playing outdoor alone"
H2 = "A girl is playing volleyball with other groups of people"
print(" T->H",predict (T,H),"correct:entrailment\n","T->H1",predict (T,H1),"correct:other\n","T->H2",predict (T,H2),"correct:other\n",)

 T->H entailment correct:entrailment
 T->H1 entailment correct:entrailment
 T->H2 other correct:entrailment
 T->H3 other correct:other
 T->H4 other correct:entrailment

 T->H other correct:entrailment
 T->H1 entailment correct:entrailment
 T->H2 other correct:other

 T->H other correct:entrailment
 T->H1 other correct:other
 T->H2 entailment correct:other



Cosine similarity:
{'Accuracy': 0.61, 'Percision': 0.52, 'Recall': 0.79}


---



Cosine Directional Similarity:
{'Accuracy': 0.6, 'Percision': 0.51, 'Recall': 0.87}


---



Modified Lev. Distance:
{'Accuracy': 0.55, 'Percision': 0.23, 'Recall': 0.05}


---



SVM:
{'Accuracy': 0.81, 'Percision': 0.79, 'Recall': 0.71}


---




GaussianNB Classifier:
{'Accuracy': 0.66, 'Percision': 0.56, 'Recall': 0.72}


---



Decision Tree classifier:
{'Accuracy': 0.82, 'Percision': 0.78, 'Recall': 0.78}


---



AdaBoost Classifier:
{'Accuracy': 0.69, 'Percision': 0.6, 'Recall': 0.65}


---



MLP Classifier:
{'Accuracy': 0.7, 'Percision': 0.63, 'Recall': 0.64}


---



Combine:
{'Accuracy': 0.83, 'Percision': 0.77, 'Recall': 0.83}
