In [1]:
import pandas as pd
import numpy as np
from pgmpy.factors.discrete import TabularCPD
from pgmpy.models import BayesianModel
import matplotlib.pyplot as plt
import networkx as nx
from pgmpy.inference import VariableElimination
from tqdm import tqdm

In [2]:
feature_data = pd.read_csv("../dataset/15features.csv")

In [3]:
for idx,columns in enumerate(feature_data.columns):
    if columns != "imagename":
        feature_data[str(columns)] = feature_data[str(columns)] - 1

In [4]:
feature_data.head()

Unnamed: 0,imagename,pen_pressure,letter_spacing,size,dimension,is_lowercase,is_continuous,slantness,tilt,entry_stroke_a,staff_of_a,formation_n,staff_of_d,exit_stroke_d,word_formation,constancy
0,0968c_num1.png,1,1,1,0,1,1,2,1,0,1,1,2,1,1,0
1,0809c_num2.png,1,1,1,1,1,1,2,0,0,1,1,2,0,1,1
2,0237b_num6.png,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1
3,0069b_num2.png,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0
4,0966c_num4.png,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1


In [204]:
seen_train = pd.read_csv("../dataset/seen-dataset/dataset_seen_training_siamese.csv")
val_data = pd.read_csv("../dataset/seen-dataset/dataset_seen_validation_siamese.csv")
val_data.head()

Unnamed: 0.1,Unnamed: 0,left,right,label
0,0,0901a_num1.png,0901a_num2.png,1
1,1,1194a_num2.png,1194a_num1.png,1
2,2,0300a_num2.png,0300a_num1.png,1
3,3,0872a_num2.png,0872a_num1.png,1
4,4,0080a_num2.png,0080a_num1.png,1


In [205]:
trainData = pd.merge(seen_train,feature_data.add_suffix('1'),left_on="left",right_on="imagename1",how="inner")
trainData = pd.merge(trainData,feature_data.add_suffix('2'),left_on="right",right_on="imagename2",how="inner")
trainData = trainData.drop(["Unnamed: 0","imagename1","imagename2"],axis=1)
val_data = pd.merge(val_data,feature_data.add_suffix('1'),left_on="left",right_on="imagename1",how="inner")
val_data = pd.merge(val_data,feature_data.add_suffix('2'),left_on="right",right_on="imagename2",how="inner")
val_data = val_data.drop(["Unnamed: 0","imagename1","imagename2"],axis=1)

In [206]:
val_data.head()

Unnamed: 0,left,right,label,pen_pressure1,letter_spacing1,size1,dimension1,is_lowercase1,is_continuous1,slantness1,...,is_continuous2,slantness2,tilt2,entry_stroke_a2,staff_of_a2,formation_n2,staff_of_d2,exit_stroke_d2,word_formation2,constancy2
0,0901a_num1.png,0901a_num2.png,1,0,2,2,2,1,0,0,...,0,0,1,0,3,1,1,0,1,1
1,0901a_num1.png,1194a_num2.png,0,0,2,2,2,1,0,0,...,1,1,1,0,1,1,1,1,1,1
2,0901a_num1.png,0300a_num2.png,0,0,2,2,2,1,0,0,...,1,1,0,0,1,1,2,3,1,1
3,0901a_num1.png,0872a_num2.png,0,0,2,2,2,1,0,0,...,1,0,0,0,3,1,1,1,1,1
4,0901a_num1.png,0080a_num2.png,0,0,2,2,2,1,0,0,...,0,0,0,0,1,1,1,0,1,1


In [8]:
combined_model = BayesianModel([('pen_pressure1','is_pen_pressure_sim'),
                                ('pen_pressure2','is_pen_pressure_sim'),
                                ('slantness1','is_slantness_sim'),
                                ('slantness2','is_slantness_sim'),
                                ('tilt1','is_tilt_sim'),
                                ('tilt2','is_tilt_sim'),
                                ('is_slantness_sim','is_tilt_sim'),
                                ('staff_of_a1','is_staff_of_a_sim'),
                                ('staff_of_a2','is_staff_of_a_sim'),
                                ('staff_of_d1','is_staff_of_d_sim'),
                                ('staff_of_d2','is_staff_of_d_sim'),
                                ('is_staff_of_a_sim','is_staff_of_d_sim'),
                                ('entry_stroke_a1','entry_stroke_a_sim'),
                                ('entry_stroke_a2','entry_stroke_a_sim'),
                                ('exit_stroke_d1','is_exit_stroke_d_sim'),
                                ('exit_stroke_d2','is_exit_stroke_d_sim'),
                                ('entry_stroke_a_sim','is_exit_stroke_d_sim'),
                                ('is_lowercase1','is_lowercase_sim'),
                                ('is_lowercase2','is_lowercase_sim'),
                                ('is_continuous1','is_continuous_sim'),
                                ('is_continuous2','is_continuous_sim'),
                                ('is_lowercase_sim','is_continuous_sim'),
                                ('dimension1','dimension_sim'),
                                ('dimension2','dimension_sim'),
                                ('letter_spacing1','letter_spacing_sim'),
                                ('letter_spacing2','letter_spacing_sim'),
                                ('size1','size_sim'),
                                ('size2','size_sim'),
                                ('dimension_sim','size_sim'),
                                ('letter_spacing_sim','size_sim'),
                                ('constancy1','constancy_sim'),
                                ('constancy2','constancy_sim'),
                                ('size_sim','constancy_sim'),
                                ('word_formation1','word_formation_sim'),
                                ('word_formation2','word_formation_sim'),
                                ('constancy_sim','word_formation_sim'),
                                ('formation_n1','formation_n_sim'),
                                ('formation_n2','formation_n_sim'),
                                ('word_formation_sim','formation_n_sim')
                               ])

cpd_pen_pressure1 = TabularCPD('pen_pressure1',2,[[0.5],
                                                [0.5]],
                                                evidence=[], evidence_card=[])
cpd_pen_pressure2 = TabularCPD('pen_pressure2',2,[[0.5],
                                                [0.5]],
                                                evidence=[], evidence_card=[])
cpd_is_pen_pressure_sim = TabularCPD('is_pen_pressure_sim',2,[[0.1,0.9,0.9,0.1],
                                                            [0.9,0.1,0.1,0.9]],
                                                            evidence=['pen_pressure1','pen_pressure2'], 
                                                            evidence_card=[2,2])
cpd_slantness1 = TabularCPD('slantness1',4,[[0.25],[0.25],[0.25],[0.25]],
                                                evidence=[], evidence_card=[])
cpd_slantness2 = TabularCPD('slantness2',4,[[0.25],[0.25],[0.25],[0.25]],
                                                evidence=[], evidence_card=[])
cpd_is_slantness_sim = TabularCPD('is_slantness_sim',2,[[0.1,0.2,0.3,0.4,0.2,0.1,0.3,0.4,0.3,0.2,0.1,0.4,0.4,0.3,0.2,0.1],
                                                            [0.9,0.8,0.7,0.6,0.8,0.9,0.7,0.6,0.7,0.8,0.9,0.6,0.6,0.7,0.8,0.9]],
                                                            evidence=['slantness1','slantness2'], 
                                                            evidence_card=[4,4])
cpd_tilt1 = TabularCPD('tilt1',2,[[0.5],
                                                [0.5]],
                                                evidence=[], evidence_card=[])
cpd_tilt2 = TabularCPD('tilt2',2,[[0.5],
                                                [0.5]],
                                                evidence=[], evidence_card=[])
cpd_is_tilt_sim = TabularCPD('is_tilt_sim',2,[[0.4,0.1,0.9,0.6,0.9,0.6,0.4,0.1],
                                                            [0.6,0.9,0.1,0.4,0.1,0.4,0.6,0.9]],
                                                            evidence=['tilt1','tilt2','is_slantness_sim'], 
                                                            evidence_card=[2,2,2])
cpd_staff_of_a1 = TabularCPD('staff_of_a1',4,[[0.25],[0.25],[0.25],[0.25]],
                                                evidence=[], evidence_card=[])
cpd_staff_of_a2 = TabularCPD('staff_of_a2',4,[[0.25],[0.25],[0.25],[0.25]],
                                                evidence=[], evidence_card=[])
cpd_is_staff_of_a_sim = TabularCPD('is_staff_of_a_sim',2,[[0.1,0.2,0.3,0.4,0.2,0.1,0.3,0.4,0.3,0.2,0.1,0.4,0.4,0.3,0.2,0.1],
                                                            [0.9,0.8,0.7,0.6,0.8,0.9,0.7,0.6,0.7,0.8,0.9,0.6,0.6,0.7,0.8,0.9]],
                                                            evidence=['staff_of_a1','staff_of_a2'], 
                                                            evidence_card=[4,4])
cpd_staff_of_d1 = TabularCPD('staff_of_d1',3,[[0.33],
                                    [0.34],[0.33]],
                                    evidence=[], evidence_card=[])
cpd_staff_of_d2 = TabularCPD('staff_of_d2',3,[[0.33],
                                    [0.34],[0.33]],
                                    evidence=[], evidence_card=[])
cpd_is_staff_of_d_sim = TabularCPD('is_staff_of_d_sim',2,[[0.4,0.1,0.9,0.6,0.9,0.6,0.1,0.6,0.4,0.1,0.9,0.6,0.9,0.6,0.9,0.6,0.4,0.9],
                                              [0.6,0.9,0.1,0.4,0.1,0.4,0.9,0.4,0.6,0.9,0.1,0.4,0.1,0.4,0.1,0.4,0.6,0.1]],
                             evidence=['staff_of_d1','staff_of_d2','is_staff_of_a_sim'], 
                             evidence_card=[3,3,2])
cpd_exit_stroke_d1 = TabularCPD('exit_stroke_d1',4,[[0.25],[0.25],[0.25],[0.25]],
                                                evidence=[], evidence_card=[])
cpd_exit_stroke_d2 = TabularCPD('exit_stroke_d2',4,[[0.25],[0.25],[0.25],[0.25]],
                                                evidence=[], evidence_card=[])
cpd_is_exit_stroke_d_sim = TabularCPD('is_exit_stroke_d_sim',2,[[0.9,0.1,0.9,0.6,0.9,0.6,0.9,0.6,0.9,0.6,0.4,0.1,0.9,0.6,0.9,0.6,0.9,0.6,0.9,0.6,0.4,0.1,0.9,0.6,0.9,0.6,0.9,0.6,0.9,0.6,0.4,0.1],
                                                            [0.1,0.9,0.1,0.4,0.1,0.4,0.1,0.4,0.1,0.4,0.6,0.9,0.1,0.4,0.1,0.4,0.1,0.4,0.1,0.4,0.6,0.9,0.1,0.4,0.1,0.4,0.1,0.4,0.1,0.4,0.6,0.9]],
                                                            evidence=['exit_stroke_d1','exit_stroke_d2','entry_stroke_a_sim'], 
                                                            evidence_card=[4,4,2])

cpd_is_lowercase1 = TabularCPD('is_lowercase1',2,[[0.5],
                                                [0.5]],
                                                evidence=[], evidence_card=[])
cpd_is_lowercase2 = TabularCPD('is_lowercase2',2,[[0.5],
                                                [0.5]],
                                                evidence=[], evidence_card=[])
cpd_is_continuous1 = TabularCPD('is_continuous1',2,[[0.5],
                                                [0.5]],
                                                evidence=[], evidence_card=[])
cpd_is_continuous2 = TabularCPD('is_continuous2',2,[[0.5],
                                                [0.5]],
                                                evidence=[], evidence_card=[])
cpd_dimension1 = TabularCPD('dimension1',3,[[0.33],
                                    [0.34],[0.33]],
                                                evidence=[], evidence_card=[])
cpd_dimension2 = TabularCPD('dimension2',3,[[0.33],
                                    [0.34],[0.33]],
                                                evidence=[], evidence_card=[])
cpd_letter_spacing1 = TabularCPD('letter_spacing1',3,[[0.33],
                                    [0.34],[0.33]],
                                    evidence=[], evidence_card=[])
cpd_letter_spacing2 = TabularCPD('letter_spacing2',3,[[0.33],
                                    [0.34],[0.33]],
                                    evidence=[], evidence_card=[])
cpd_size1 = TabularCPD('size1',3,[[0.33],
                                    [0.34],[0.33]],
                                    evidence=[], evidence_card=[])
cpd_size2 = TabularCPD('size2',3,[[0.33],
                                    [0.34],[0.33]],
                                    evidence=[], evidence_card=[])
cpd_constancy1 = TabularCPD('constancy1',2,[[0.5],
                                    [0.5]],
                                    evidence=[], evidence_card=[])
cpd_constancy2 = TabularCPD('constancy2',2,[[0.5],
                                    [0.5]],
                                    evidence=[], evidence_card=[])
cpd_word_formation1 = TabularCPD('word_formation1',2,[[0.5],
                                    [0.5]],
                                    evidence=[], evidence_card=[])
cpd_word_formation2 = TabularCPD('word_formation2',2,[[0.5],
                                    [0.5]],
                                    evidence=[], evidence_card=[])
cpd_formation_n1 = TabularCPD('formation_n1',2,[[0.5],
                                    [0.5]],
                                    evidence=[], evidence_card=[])
cpd_formation_n2 = TabularCPD('formation_n2',2,[[0.5],
                                    [0.5]],
                                    evidence=[], evidence_card=[])
cpd_entry_stroke_a1 = TabularCPD('entry_stroke_a1',2,[[0.5],
                                    [0.5]],
                                    evidence=[], evidence_card=[])
cpd_entry_stroke_a2 = TabularCPD('entry_stroke_a2',2,[[0.5],
                                    [0.5]],
                                    evidence=[], evidence_card=[])
cpd_is_lowercase_sim = TabularCPD('is_lowercase_sim',2,[[0.1,0.9,0.9,0.1],
                                                            [0.9,0.1,0.1,0.9]],
                                                            evidence=['is_lowercase1','is_lowercase2'], 
                                                            evidence_card=[2,2])
cpd_is_continuous_sim = TabularCPD('is_continuous_sim',2,[[0.9,0.1,0.9,0.6,0.9,0.6,0.9,0.1],
                                                            [0.1,0.9,0.1,0.4,0.1,0.4,0.1,0.9]],
                                                            evidence=['is_continuous1','is_continuous2','is_lowercase_sim'], 
                                                            evidence_card=[2,2,2])
cpd_dimension_sim = TabularCPD('dimension_sim',2,[[0.1,0.8,0.9,0.8,0.1,0.8,0.9,0.8,0.1],
                                                [0.9,0.2,0.1,0.2,0.9,0.2,0.1,0.2,0.9]],
                                                evidence=['dimension1','dimension2'], evidence_card=[3,3])
cpd_letter_spacing_sim = TabularCPD('letter_spacing_sim',2,[[0.1,0.8,0.9,0.8,0.1,0.8,0.9,0.8,0.1],
                                                [0.9,0.2,0.1,0.2,0.9,0.2,0.1,0.2,0.9]],
                                                evidence=['letter_spacing1','letter_spacing2'], evidence_card=[3,3])
cpd_size_sim = TabularCPD('size_sim',2,[[0.6,0.3,0.3,0.1,0.8,0.7,0.7,0.3,0.9,0.8,0.7,0.4,0.7,0.6,0.6,0.3,0.6,0.3,0.3,0.1,0.8,0.4,0.4,0.85,0.9,0.8,0.8,0.3,0.8,0.4,0.4,0.85,0.6,0.3,0.3,0.1],
                                        [0.4,0.7,0.7,0.9,0.2,0.3,0.3,0.7,0.1,0.2,0.3,0.6,0.3,0.4,0.4,0.7,0.4,0.7,0.7,0.9,0.2,0.6,0.6,0.15,0.1,0.2,0.2,0.7,0.2,0.6,0.6,0.15,0.4,0.7,0.7,0.9]],
                                        evidence=['size1','size2','dimension_sim','letter_spacing_sim'], evidence_card=[3,3,2,2])
cpd_constancy_sim = TabularCPD('constancy_sim',2,[[0.9,0.1,0.9,0.6,0.9,0.6,0.7,0.1],
                                        [0.1,0.9,0.1,0.4,0.1,0.4,0.3,0.9]],
                                        evidence=['constancy1','constancy2','size_sim'], evidence_card=[2,2,2])
cpd_word_formation_sim = TabularCPD('word_formation_sim',2,[[0.9,0.1,0.9,0.7,0.9,0.7,0.9,0.1],
                                        [0.1,0.9,0.1,0.3,0.1,0.3,0.1,0.9]],
                                        evidence=['word_formation1','word_formation2','constancy_sim'], evidence_card=[2,2,2])
cpd_formation_n_sim = TabularCPD('formation_n_sim',2,[[0.7,0.1,0.9,0.4,0.9,0.4,0.6,0.1],
                                        [0.3,0.9,0.1,0.6,0.1,0.6,0.4,0.9]],
                                        evidence=['formation_n1','formation_n2','word_formation_sim'], evidence_card=[2,2,2])
cpd_entry_stroke_a_sim = TabularCPD('entry_stroke_a_sim',2,[[0.1,0.9,0.9,0.1],
                                                            [0.9,0.1,0.1,0.9]],
                                        evidence=['entry_stroke_a1','entry_stroke_a2'], evidence_card=[2,2])

combined_model.add_cpds(cpd_pen_pressure1,
                        cpd_pen_pressure2,
                        cpd_is_pen_pressure_sim,
                        cpd_slantness1,
                        cpd_slantness2,
                        cpd_is_slantness_sim,
                        cpd_tilt1,
                        cpd_tilt2,
                        cpd_is_tilt_sim,
                        cpd_staff_of_a1,
                        cpd_staff_of_a2,
                        cpd_is_staff_of_a_sim,
                        cpd_staff_of_d1,
                        cpd_staff_of_d2,
                        cpd_is_staff_of_d_sim,
                        cpd_exit_stroke_d1,
                        cpd_exit_stroke_d2,
                        cpd_is_exit_stroke_d_sim,
                        cpd_is_lowercase1,
                        cpd_is_lowercase2,
                        cpd_is_lowercase_sim,
                        cpd_is_continuous1,
                        cpd_is_continuous2,
                        cpd_is_continuous_sim,
                        cpd_dimension1,
                        cpd_dimension2,
                        cpd_dimension_sim,
                        cpd_letter_spacing1,
                        cpd_letter_spacing2,
                        cpd_letter_spacing_sim,
                        cpd_size1,
                        cpd_size2,
                        cpd_size_sim,
                        cpd_constancy1,
                        cpd_constancy2,
                        cpd_constancy_sim,
                        cpd_word_formation1,
                        cpd_word_formation2,
                        cpd_word_formation_sim,
                        cpd_formation_n1,
                        cpd_formation_n2,
                        cpd_formation_n_sim,
                        cpd_entry_stroke_a1,
                        cpd_entry_stroke_a2,
                        cpd_entry_stroke_a_sim
                       )
combined_model.check_model()

True

In [9]:
mle = VariableElimination(combined_model)

In [10]:
for idx,columns in enumerate(feature_data.columns):
    if idx != 0:
        print(str(np.unique(feature_data[columns]))+columns)

[0 1]pen_pressure
[0 1 2]letter_spacing
[0 1 2]size
[0 1 2]dimension
[0 1]is_lowercase
[0 1]is_continuous
[0 1 2 3]slantness
[0 1]tilt
[0 1]entry_stroke_a
[0 1 2 3]staff_of_a
[0 1]formation_n
[0 1 2]staff_of_d
[0 1 2 3]exit_stroke_d
[0 1]word_formation
[0 1]constancy


## Learning the weights in Structured CPD

### Training

In [12]:
simFeatures = [[] for _ in range(100)]
var = {'is_pen_pressure_sim',
       'is_slantness_sim',
       'is_tilt_sim',
       'is_staff_of_a_sim',
       'is_staff_of_d_sim',
       'entry_stroke_a_sim',
       'is_exit_stroke_d_sim',
      'is_lowercase_sim',
      'is_continuous_sim',
      'dimension_sim',
      'letter_spacing_sim',
       'size_sim',
       'constancy_sim',
       'word_formation_sim',
       'formation_n_sim'
      }
evidence_labels = trainData.columns[3:]
for idx in tqdm(range(100)):
    inf = mle.query(variables=var,evidence=dict(zip(evidence_labels,trainData.iloc[idx,3:].tolist())))
    for simfeature in var:
        simFeatures[idx].append(np.argmax(inf[simfeature].values))

  phi.values = phi.values[slice_]
  phi1.values = phi1.values[slice_]
100%|██████████| 100/100 [00:16<00:00,  6.22it/s]


In [13]:
simDf = pd.DataFrame(data=simFeatures,columns=var)

simDf = pd.concat([simDf,trainData.label],axis=1)

simDf.to_csv("./sigTrainData.csv")

In [17]:
simDf = pd.read_csv("./sigTrainData.csv")

In [63]:
xTrain = simDf.iloc[:99,0:15].values.tolist()
xTrain = np.array(xTrain)

In [133]:
from sklearn.linear_model import LogisticRegressionCV
from sklearn.metrics import precision_recall_fscore_support

In [65]:
model = LogisticRegressionCV(cv=10, random_state=0,
                            fit_intercept=True,max_iter=10000).fit(xTrain, yTrain)

pred = model.predict(xTrain)

precision,recall,f1,_ = precision_recall_fscore_support(yTrain, pred, average='binary')
print(precision,recall,f1)

In [102]:
weights = model.coef_

In [103]:
b = model.intercept_

In [167]:
nodeOutputs = simDf.iloc[2,:15].values
nodeOutputs

array([1., 0., 1., 1., 1., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0.])

In [162]:
def deterministic_sigmoid_node(biasNodeAccumulatorNode):
    return 1/(1+np.exp(biasNodeAccumulatorNode*-1))

def deterministic_verification_node(nodeOutputs,weights,b):
    weightFeaturesAccumulatorNode = np.dot(weights,nodeOutputs)
    biasNodeAccumulatorNode = b[0]+weightFeaturesAccumulatorNode[0]
    return deterministic_sigmoid_node(biasNodeAccumulatorNode)

In [168]:
deterministic_verification_node(nodeOutputs,weights,b)

0.0014751012148143032

In [188]:
deterministicNodePred = []
for nodeOutputs in xTrain:
    deterministicNodePred.append(deterministic_verification_node(nodeOutputs,weights,b))

### Validation

In [385]:
simFeatures_val = [[] for _ in range(len(val_data))]
evidence_labels = val_data.columns[3:]
for idx in tqdm(range(len(val_data))):
    inf = mle.query(variables=var,evidence=dict(zip(evidence_labels,val_data.iloc[idx,3:].tolist())))
    for simfeature in var:
        simFeatures_val[idx].append(np.argmax(inf[simfeature].values))




  phi.values = phi.values[slice_]
  phi1.values = phi1.values[slice_]



  0%|          | 1/894 [00:00<02:56,  5.06it/s][A[A[A


  0%|          | 2/894 [00:00<02:51,  5.19it/s][A[A[A


  0%|          | 3/894 [00:00<02:46,  5.35it/s][A[A[A


  0%|          | 4/894 [00:00<02:45,  5.36it/s][A[A[A


  1%|          | 5/894 [00:00<02:44,  5.39it/s][A[A[A


  1%|          | 6/894 [00:01<02:44,  5.41it/s][A[A[A


  1%|          | 7/894 [00:01<02:41,  5.49it/s][A[A[A


  1%|          | 8/894 [00:01<02:42,  5.44it/s][A[A[A


  1%|          | 9/894 [00:01<02:43,  5.43it/s][A[A[A


  1%|          | 10/894 [00:01<02:40,  5.50it/s][A[A[A


  1%|          | 11/894 [00:02<02:42,  5.44it/s][A[A[A


  1%|▏         | 12/894 [00:02<02:41,  5.47it/s][A[A[A


  1%|▏         | 13/894 [00:02<02:42,  5.42it/s][A[A[A


  2%|▏         | 14/894 [00:02<02:44,  5.35it/s][A[A[A


  2%|▏         | 15/894 [00:02<02:51,  5.11it/s][A[A[A


  2%|▏         | 16/894 [00:02<02

 14%|█▎        | 122/894 [00:22<02:18,  5.56it/s][A[A[A


 14%|█▍        | 123/894 [00:22<02:19,  5.53it/s][A[A[A


 14%|█▍        | 124/894 [00:22<02:17,  5.60it/s][A[A[A


 14%|█▍        | 125/894 [00:23<02:19,  5.53it/s][A[A[A


 14%|█▍        | 126/894 [00:23<02:18,  5.53it/s][A[A[A


 14%|█▍        | 127/894 [00:23<02:19,  5.50it/s][A[A[A


 14%|█▍        | 128/894 [00:23<02:17,  5.57it/s][A[A[A


 14%|█▍        | 129/894 [00:23<02:18,  5.54it/s][A[A[A


 15%|█▍        | 130/894 [00:24<02:17,  5.56it/s][A[A[A


 15%|█▍        | 131/894 [00:24<02:17,  5.54it/s][A[A[A


 15%|█▍        | 132/894 [00:24<02:20,  5.41it/s][A[A[A


 15%|█▍        | 133/894 [00:24<02:27,  5.16it/s][A[A[A


 15%|█▍        | 134/894 [00:24<02:25,  5.21it/s][A[A[A


 15%|█▌        | 135/894 [00:25<02:23,  5.31it/s][A[A[A


 15%|█▌        | 136/894 [00:25<02:25,  5.21it/s][A[A[A


 15%|█▌        | 137/894 [00:25<02:25,  5.19it/s][A[A[A


 15%|█▌        | 138/894

 29%|██▊       | 256/894 [00:47<02:14,  4.73it/s][A[A[A


 29%|██▊       | 257/894 [00:47<02:13,  4.76it/s][A[A[A


 29%|██▉       | 258/894 [00:47<02:11,  4.85it/s][A[A[A


 29%|██▉       | 259/894 [00:48<02:06,  5.01it/s][A[A[A


 29%|██▉       | 260/894 [00:48<02:05,  5.07it/s][A[A[A


 29%|██▉       | 261/894 [00:48<02:00,  5.23it/s][A[A[A


 29%|██▉       | 262/894 [00:48<02:13,  4.73it/s][A[A[A


 29%|██▉       | 263/894 [00:49<02:22,  4.43it/s][A[A[A


 30%|██▉       | 264/894 [00:49<02:21,  4.44it/s][A[A[A


 30%|██▉       | 265/894 [00:49<02:13,  4.73it/s][A[A[A


 30%|██▉       | 266/894 [00:49<02:07,  4.93it/s][A[A[A


 30%|██▉       | 267/894 [00:49<02:03,  5.06it/s][A[A[A


 30%|██▉       | 268/894 [00:49<02:03,  5.08it/s][A[A[A


 30%|███       | 269/894 [00:50<02:00,  5.19it/s][A[A[A


 30%|███       | 270/894 [00:50<01:58,  5.26it/s][A[A[A


 30%|███       | 271/894 [00:50<01:56,  5.33it/s][A[A[A


 30%|███       | 272/894

 44%|████▎     | 390/894 [01:12<01:27,  5.76it/s][A[A[A


 44%|████▎     | 391/894 [01:12<01:26,  5.80it/s][A[A[A


 44%|████▍     | 392/894 [01:12<01:26,  5.83it/s][A[A[A


 44%|████▍     | 393/894 [01:12<01:25,  5.84it/s][A[A[A


 44%|████▍     | 394/894 [01:12<01:25,  5.85it/s][A[A[A


 44%|████▍     | 395/894 [01:12<01:26,  5.76it/s][A[A[A


 44%|████▍     | 396/894 [01:13<01:27,  5.70it/s][A[A[A


 44%|████▍     | 397/894 [01:13<01:26,  5.73it/s][A[A[A


 45%|████▍     | 398/894 [01:13<01:24,  5.84it/s][A[A[A


 45%|████▍     | 399/894 [01:13<01:23,  5.90it/s][A[A[A


 45%|████▍     | 400/894 [01:13<01:22,  6.00it/s][A[A[A


 45%|████▍     | 401/894 [01:13<01:21,  6.03it/s][A[A[A


 45%|████▍     | 402/894 [01:14<01:23,  5.87it/s][A[A[A


 45%|████▌     | 403/894 [01:14<01:23,  5.86it/s][A[A[A


 45%|████▌     | 404/894 [01:14<01:24,  5.80it/s][A[A[A


 45%|████▌     | 405/894 [01:14<01:25,  5.74it/s][A[A[A


 45%|████▌     | 406/894

 59%|█████▊    | 524/894 [01:35<01:07,  5.49it/s][A[A[A


 59%|█████▊    | 525/894 [01:35<01:07,  5.45it/s][A[A[A


 59%|█████▉    | 526/894 [01:35<01:07,  5.45it/s][A[A[A


 59%|█████▉    | 527/894 [01:36<01:07,  5.41it/s][A[A[A


 59%|█████▉    | 528/894 [01:36<01:07,  5.38it/s][A[A[A


 59%|█████▉    | 529/894 [01:36<01:08,  5.30it/s][A[A[A


 59%|█████▉    | 530/894 [01:36<01:10,  5.17it/s][A[A[A


 59%|█████▉    | 531/894 [01:36<01:10,  5.14it/s][A[A[A


 60%|█████▉    | 532/894 [01:37<01:13,  4.95it/s][A[A[A


 60%|█████▉    | 533/894 [01:37<01:12,  4.98it/s][A[A[A


 60%|█████▉    | 534/894 [01:37<01:13,  4.88it/s][A[A[A


 60%|█████▉    | 535/894 [01:37<01:13,  4.88it/s][A[A[A


 60%|█████▉    | 536/894 [01:37<01:10,  5.06it/s][A[A[A


 60%|██████    | 537/894 [01:38<01:10,  5.07it/s][A[A[A


 60%|██████    | 538/894 [01:38<01:09,  5.15it/s][A[A[A


 60%|██████    | 539/894 [01:38<01:06,  5.35it/s][A[A[A


 60%|██████    | 540/894

 74%|███████▎  | 658/894 [02:00<00:40,  5.78it/s][A[A[A


 74%|███████▎  | 659/894 [02:00<00:41,  5.70it/s][A[A[A


 74%|███████▍  | 660/894 [02:00<00:41,  5.63it/s][A[A[A


 74%|███████▍  | 661/894 [02:00<00:41,  5.59it/s][A[A[A


 74%|███████▍  | 662/894 [02:00<00:40,  5.69it/s][A[A[A


 74%|███████▍  | 663/894 [02:01<00:40,  5.67it/s][A[A[A


 74%|███████▍  | 664/894 [02:01<00:39,  5.77it/s][A[A[A


 74%|███████▍  | 665/894 [02:01<00:39,  5.79it/s][A[A[A


 74%|███████▍  | 666/894 [02:01<00:39,  5.75it/s][A[A[A


 75%|███████▍  | 667/894 [02:01<00:38,  5.88it/s][A[A[A


 75%|███████▍  | 668/894 [02:01<00:37,  6.00it/s][A[A[A


 75%|███████▍  | 669/894 [02:02<00:38,  5.90it/s][A[A[A


 75%|███████▍  | 670/894 [02:02<00:38,  5.77it/s][A[A[A


 75%|███████▌  | 671/894 [02:02<00:39,  5.65it/s][A[A[A


 75%|███████▌  | 672/894 [02:02<00:38,  5.71it/s][A[A[A


 75%|███████▌  | 673/894 [02:02<00:38,  5.72it/s][A[A[A


 75%|███████▌  | 674/894

 89%|████████▊ | 792/894 [02:23<00:18,  5.65it/s][A[A[A


 89%|████████▊ | 793/894 [02:23<00:17,  5.68it/s][A[A[A


 89%|████████▉ | 794/894 [02:23<00:17,  5.70it/s][A[A[A


 89%|████████▉ | 795/894 [02:23<00:17,  5.72it/s][A[A[A


 89%|████████▉ | 796/894 [02:24<00:17,  5.65it/s][A[A[A


 89%|████████▉ | 797/894 [02:24<00:17,  5.64it/s][A[A[A


 89%|████████▉ | 798/894 [02:24<00:16,  5.80it/s][A[A[A


 89%|████████▉ | 799/894 [02:24<00:16,  5.80it/s][A[A[A


 89%|████████▉ | 800/894 [02:24<00:15,  5.92it/s][A[A[A


 90%|████████▉ | 801/894 [02:24<00:15,  5.89it/s][A[A[A


 90%|████████▉ | 802/894 [02:25<00:15,  5.86it/s][A[A[A


 90%|████████▉ | 803/894 [02:25<00:15,  5.85it/s][A[A[A


 90%|████████▉ | 804/894 [02:25<00:15,  5.91it/s][A[A[A


 90%|█████████ | 805/894 [02:25<00:15,  5.71it/s][A[A[A


 90%|█████████ | 806/894 [02:25<00:15,  5.63it/s][A[A[A


 90%|█████████ | 807/894 [02:26<00:15,  5.63it/s][A[A[A


 90%|█████████ | 808/894

In [384]:
deterministicNodePred = []
for nodeOutputs in simFeatures_val:
    deterministicNodePred.append(deterministic_verification_node(nodeOutputs,weights,b))

precision,recall,f1,_ = precision_recall_fscore_support(list(val_data.label), np.round(deterministicNodePred), average='binary')
print(precision,recall,f1)

0.49776286353467564 1.0 0.6646751306945482


In [217]:
'''
Just training on 100 rows gave f1 64
Distantly supervised learning?
'''

'\nJust training on 100 rows gave f1 64\nDistantly supervised learning?\n'

### Entropy based Approach

In [305]:
entropyDict={}
for feature in trainData.columns[3:]:
    feature_states = np.unique(trainData[feature])
    temp = []
    for state in feature_states:
        query = str(feature+"=="+str(state))
        prob = len(trainData.query(query))/len(trainData[feature])
        temp.append(prob)
    entropyDict[feature] = temp 

In [376]:
entropyRows_pos = []
entropyRows_neg = []
for idx_r in tqdm(range(len(trainData))):
    entropy = 0
    for idx_c,cols in enumerate(trainData.columns[3:]):
        prob = entropyDict[cols][int(trainData.iloc[idx_r,idx_c+3])]
        entropy -= prob * np.log2(prob)
    if(trainData.iloc[idx_r,2] == 1):
        entropyRows_pos.append(entropy)
    if(trainData.iloc[idx_r,2] == 0):
        entropyRows_neg.append(entropy)
entropyRows_pos = np.array(entropyRows_pos)
entropyRows_neg = np.array(entropyRows_neg)




  0%|          | 0/111753 [00:00<?, ?it/s][A[A[A


  0%|          | 75/111753 [00:00<02:29, 747.07it/s][A[A[A


  0%|          | 182/111753 [00:00<02:15, 820.70it/s][A[A[A


  0%|          | 294/111753 [00:00<02:05, 890.89it/s][A[A[A


  0%|          | 399/111753 [00:00<01:59, 932.96it/s][A[A[A


  0%|          | 521/111753 [00:00<01:50, 1003.07it/s][A[A[A


  1%|          | 633/111753 [00:00<01:47, 1035.27it/s][A[A[A


  1%|          | 748/111753 [00:00<01:44, 1065.18it/s][A[A[A


  1%|          | 852/111753 [00:00<01:44, 1056.79it/s][A[A[A


  1%|          | 966/111753 [00:00<01:42, 1080.19it/s][A[A[A


  1%|          | 1075/111753 [00:01<01:42, 1082.60it/s][A[A[A


  1%|          | 1182/111753 [00:01<01:43, 1065.20it/s][A[A[A


  1%|          | 1288/111753 [00:01<01:50, 1001.99it/s][A[A[A


  1%|          | 1396/111753 [00:01<01:47, 1022.32it/s][A[A[A


  1%|▏         | 1522/111753 [00:01<01:41, 1082.18it/s][A[A[A


  1%|▏         | 16

 13%|█▎        | 14742/111753 [00:12<01:20, 1199.61it/s][A[A[A


 13%|█▎        | 14863/111753 [00:12<01:21, 1188.06it/s][A[A[A


 13%|█▎        | 14985/111753 [00:12<01:20, 1195.55it/s][A[A[A


 14%|█▎        | 15118/111753 [00:12<01:18, 1232.70it/s][A[A[A


 14%|█▎        | 15242/111753 [00:12<01:19, 1212.31it/s][A[A[A


 14%|█▍        | 15379/111753 [00:13<01:16, 1253.80it/s][A[A[A


 14%|█▍        | 15506/111753 [00:13<01:20, 1198.50it/s][A[A[A


 14%|█▍        | 15637/111753 [00:13<01:18, 1229.40it/s][A[A[A


 14%|█▍        | 15761/111753 [00:13<01:20, 1195.16it/s][A[A[A


 14%|█▍        | 15889/111753 [00:13<01:18, 1219.32it/s][A[A[A


 14%|█▍        | 16021/111753 [00:13<01:16, 1247.54it/s][A[A[A


 14%|█▍        | 16147/111753 [00:13<01:17, 1232.04it/s][A[A[A


 15%|█▍        | 16285/111753 [00:13<01:15, 1271.84it/s][A[A[A


 15%|█▍        | 16413/111753 [00:13<01:15, 1266.05it/s][A[A[A


 15%|█▍        | 16541/111753 [00:14<01:15, 1262

 27%|██▋       | 30002/111753 [00:25<01:07, 1205.69it/s][A[A[A


 27%|██▋       | 30124/111753 [00:25<01:09, 1175.77it/s][A[A[A


 27%|██▋       | 30250/111753 [00:25<01:08, 1198.21it/s][A[A[A


 27%|██▋       | 30375/111753 [00:25<01:07, 1213.03it/s][A[A[A


 27%|██▋       | 30504/111753 [00:25<01:05, 1234.49it/s][A[A[A


 27%|██▋       | 30639/111753 [00:25<01:04, 1265.26it/s][A[A[A


 28%|██▊       | 30773/111753 [00:25<01:03, 1284.77it/s][A[A[A


 28%|██▊       | 30909/111753 [00:25<01:01, 1304.40it/s][A[A[A


 28%|██▊       | 31040/111753 [00:25<01:02, 1294.64it/s][A[A[A


 28%|██▊       | 31170/111753 [00:25<01:03, 1265.86it/s][A[A[A


 28%|██▊       | 31297/111753 [00:26<01:06, 1211.03it/s][A[A[A


 28%|██▊       | 31419/111753 [00:26<01:06, 1200.29it/s][A[A[A


 28%|██▊       | 31540/111753 [00:26<01:08, 1178.40it/s][A[A[A


 28%|██▊       | 31663/111753 [00:26<01:07, 1192.18it/s][A[A[A


 28%|██▊       | 31793/111753 [00:26<01:05, 1220

 40%|████      | 44988/111753 [00:37<00:52, 1266.15it/s][A[A[A


 40%|████      | 45115/111753 [00:37<00:54, 1221.54it/s][A[A[A


 40%|████      | 45243/111753 [00:37<00:53, 1238.43it/s][A[A[A


 41%|████      | 45368/111753 [00:37<00:54, 1228.29it/s][A[A[A


 41%|████      | 45495/111753 [00:38<00:53, 1240.28it/s][A[A[A


 41%|████      | 45631/111753 [00:38<00:51, 1273.50it/s][A[A[A


 41%|████      | 45759/111753 [00:38<00:52, 1248.11it/s][A[A[A


 41%|████      | 45885/111753 [00:38<00:53, 1239.23it/s][A[A[A


 41%|████      | 46012/111753 [00:38<00:52, 1247.06it/s][A[A[A


 41%|████▏     | 46141/111753 [00:38<00:52, 1257.99it/s][A[A[A


 41%|████▏     | 46273/111753 [00:38<00:51, 1275.04it/s][A[A[A


 42%|████▏     | 46410/111753 [00:38<00:50, 1301.97it/s][A[A[A


 42%|████▏     | 46541/111753 [00:38<00:50, 1302.69it/s][A[A[A


 42%|████▏     | 46672/111753 [00:38<00:51, 1269.36it/s][A[A[A


 42%|████▏     | 46800/111753 [00:39<00:52, 1238

 54%|█████▎    | 59942/111753 [00:50<00:41, 1258.73it/s][A[A[A


 54%|█████▍    | 60069/111753 [00:50<00:43, 1197.06it/s][A[A[A


 54%|█████▍    | 60193/111753 [00:50<00:42, 1208.97it/s][A[A[A


 54%|█████▍    | 60315/111753 [00:50<00:44, 1162.63it/s][A[A[A


 54%|█████▍    | 60436/111753 [00:50<00:43, 1176.19it/s][A[A[A


 54%|█████▍    | 60565/111753 [00:50<00:42, 1205.87it/s][A[A[A


 54%|█████▍    | 60697/111753 [00:50<00:41, 1237.54it/s][A[A[A


 54%|█████▍    | 60832/111753 [00:50<00:40, 1265.99it/s][A[A[A


 55%|█████▍    | 60961/111753 [00:50<00:39, 1271.77it/s][A[A[A


 55%|█████▍    | 61089/111753 [00:51<00:40, 1246.47it/s][A[A[A


 55%|█████▍    | 61215/111753 [00:51<00:41, 1223.62it/s][A[A[A


 55%|█████▍    | 61338/111753 [00:51<00:41, 1202.29it/s][A[A[A


 55%|█████▌    | 61470/111753 [00:51<00:40, 1234.73it/s][A[A[A


 55%|█████▌    | 61598/111753 [00:51<00:40, 1245.60it/s][A[A[A


 55%|█████▌    | 61728/111753 [00:51<00:39, 1257

 68%|██████▊   | 75434/111753 [01:02<00:32, 1131.43it/s][A[A[A


 68%|██████▊   | 75563/111753 [01:02<00:30, 1173.18it/s][A[A[A


 68%|██████▊   | 75697/111753 [01:02<00:29, 1216.55it/s][A[A[A


 68%|██████▊   | 75831/111753 [01:02<00:28, 1250.49it/s][A[A[A


 68%|██████▊   | 75960/111753 [01:02<00:28, 1259.98it/s][A[A[A


 68%|██████▊   | 76087/111753 [01:03<00:30, 1162.28it/s][A[A[A


 68%|██████▊   | 76206/111753 [01:03<00:33, 1068.47it/s][A[A[A


 68%|██████▊   | 76316/111753 [01:03<00:35, 985.02it/s] [A[A[A


 68%|██████▊   | 76418/111753 [01:03<00:37, 936.32it/s][A[A[A


 68%|██████▊   | 76515/111753 [01:03<00:39, 896.48it/s][A[A[A


 69%|██████▊   | 76620/111753 [01:03<00:37, 937.53it/s][A[A[A


 69%|██████▊   | 76735/111753 [01:03<00:35, 992.03it/s][A[A[A


 69%|██████▉   | 76850/111753 [01:03<00:33, 1033.82it/s][A[A[A


 69%|██████▉   | 76979/111753 [01:03<00:31, 1097.65it/s][A[A[A


 69%|██████▉   | 77092/111753 [01:04<00:31, 1106.21i

 81%|████████  | 90617/111753 [01:15<00:17, 1223.34it/s][A[A[A


 81%|████████  | 90740/111753 [01:15<00:17, 1207.72it/s][A[A[A


 81%|████████▏ | 90865/111753 [01:15<00:17, 1219.34it/s][A[A[A


 81%|████████▏ | 90996/111753 [01:15<00:16, 1244.56it/s][A[A[A


 82%|████████▏ | 91131/111753 [01:15<00:16, 1272.09it/s][A[A[A


 82%|████████▏ | 91267/111753 [01:15<00:15, 1296.46it/s][A[A[A


 82%|████████▏ | 91402/111753 [01:15<00:15, 1311.81it/s][A[A[A


 82%|████████▏ | 91534/111753 [01:15<00:15, 1286.01it/s][A[A[A


 82%|████████▏ | 91663/111753 [01:15<00:16, 1234.65it/s][A[A[A


 82%|████████▏ | 91793/111753 [01:15<00:15, 1252.51it/s][A[A[A


 82%|████████▏ | 91919/111753 [01:16<00:16, 1204.87it/s][A[A[A


 82%|████████▏ | 92041/111753 [01:16<00:16, 1185.10it/s][A[A[A


 82%|████████▏ | 92161/111753 [01:16<00:16, 1179.36it/s][A[A[A


 83%|████████▎ | 92283/111753 [01:16<00:16, 1188.99it/s][A[A[A


 83%|████████▎ | 92404/111753 [01:16<00:16, 1193

 95%|█████████▍| 105822/111753 [01:27<00:04, 1227.42it/s][A[A[A


 95%|█████████▍| 105949/111753 [01:27<00:04, 1238.25it/s][A[A[A


 95%|█████████▍| 106083/111753 [01:27<00:04, 1266.17it/s][A[A[A


 95%|█████████▌| 106217/111753 [01:27<00:04, 1285.54it/s][A[A[A


 95%|█████████▌| 106352/111753 [01:27<00:04, 1301.92it/s][A[A[A


 95%|█████████▌| 106488/111753 [01:27<00:03, 1317.45it/s][A[A[A


 95%|█████████▌| 106621/111753 [01:27<00:03, 1289.72it/s][A[A[A


 96%|█████████▌| 106751/111753 [01:27<00:03, 1277.04it/s][A[A[A


 96%|█████████▌| 106880/111753 [01:28<00:03, 1279.65it/s][A[A[A


 96%|█████████▌| 107009/111753 [01:28<00:03, 1281.62it/s][A[A[A


 96%|█████████▌| 107138/111753 [01:28<00:03, 1238.09it/s][A[A[A


 96%|█████████▌| 107265/111753 [01:28<00:03, 1247.03it/s][A[A[A


 96%|█████████▌| 107402/111753 [01:28<00:03, 1280.58it/s][A[A[A


 96%|█████████▌| 107531/111753 [01:28<00:03, 1283.04it/s][A[A[A


 96%|█████████▋| 107665/111753 [01

In [388]:
top100_pos = entropyRows_pos.argsort()[-100:][::-1]
top100_neg = entropyRows_neg.argsort()[-250:][::-1]
entropyData = np.append(top100_pos,top100_neg)

In [389]:
entropyTrain = [[] for _ in range(len(entropyData))]
entropyLabels = []
for i,idx in enumerate(tqdm(entropyData)):
    inf = mle.query(variables=var,evidence=dict(zip(evidence_labels,trainData.iloc[idx,3:].tolist())))
    entropyLabels.append(trainData.iloc[idx,32])
    for simfeature in var:
        entropyTrain[i].append(np.argmax(inf[simfeature].values))
entropyLabels = np.array(entropyLabels)
entropyTrain = np.array(entropyTrain)





  0%|          | 0/350 [00:00<?, ?it/s][A[A[A[A



  0%|          | 1/350 [00:00<01:11,  4.89it/s][A[A[A[A



  1%|          | 2/350 [00:00<01:08,  5.11it/s][A[A[A[A



  1%|          | 3/350 [00:00<01:06,  5.19it/s][A[A[A[A



  1%|          | 4/350 [00:00<01:04,  5.33it/s][A[A[A[A



  1%|▏         | 5/350 [00:00<01:05,  5.27it/s][A[A[A[A



  2%|▏         | 6/350 [00:01<01:04,  5.35it/s][A[A[A[A



  2%|▏         | 7/350 [00:01<01:04,  5.30it/s][A[A[A[A



  2%|▏         | 8/350 [00:01<01:04,  5.27it/s][A[A[A[A



  3%|▎         | 9/350 [00:01<01:08,  4.96it/s][A[A[A[A



  3%|▎         | 10/350 [00:01<01:10,  4.80it/s][A[A[A[A



  3%|▎         | 11/350 [00:02<01:07,  5.06it/s][A[A[A[A



  3%|▎         | 12/350 [00:02<01:05,  5.12it/s][A[A[A[A



  4%|▎         | 13/350 [00:02<01:06,  5.06it/s][A[A[A[A



  4%|▍         | 14/350 [00:02<01:08,  4.92it/s][A[A[A[A



  4%|▍         | 15/350 [00:02<01:09,  4.79it/s][A[A

 36%|███▋      | 127/350 [00:23<00:38,  5.74it/s][A[A[A[A



 37%|███▋      | 128/350 [00:23<00:38,  5.75it/s][A[A[A[A



 37%|███▋      | 129/350 [00:23<00:39,  5.60it/s][A[A[A[A



 37%|███▋      | 130/350 [00:23<00:38,  5.65it/s][A[A[A[A



 37%|███▋      | 131/350 [00:24<00:38,  5.73it/s][A[A[A[A



 38%|███▊      | 132/350 [00:24<00:37,  5.81it/s][A[A[A[A



 38%|███▊      | 133/350 [00:24<00:37,  5.83it/s][A[A[A[A



 38%|███▊      | 134/350 [00:24<00:37,  5.77it/s][A[A[A[A



 39%|███▊      | 135/350 [00:24<00:37,  5.67it/s][A[A[A[A



 39%|███▉      | 136/350 [00:24<00:37,  5.68it/s][A[A[A[A



 39%|███▉      | 137/350 [00:25<00:38,  5.56it/s][A[A[A[A



 39%|███▉      | 138/350 [00:25<00:37,  5.64it/s][A[A[A[A



 40%|███▉      | 139/350 [00:25<00:36,  5.71it/s][A[A[A[A



 40%|████      | 140/350 [00:25<00:37,  5.63it/s][A[A[A[A



 40%|████      | 141/350 [00:25<00:36,  5.69it/s][A[A[A[A



 41%|████      | 142/350 

 72%|███████▏  | 253/350 [00:45<00:18,  5.28it/s][A[A[A[A



 73%|███████▎  | 254/350 [00:46<00:18,  5.30it/s][A[A[A[A



 73%|███████▎  | 255/350 [00:46<00:18,  5.17it/s][A[A[A[A



 73%|███████▎  | 256/350 [00:46<00:18,  5.18it/s][A[A[A[A



 73%|███████▎  | 257/350 [00:46<00:17,  5.24it/s][A[A[A[A



 74%|███████▎  | 258/350 [00:46<00:17,  5.38it/s][A[A[A[A



 74%|███████▍  | 259/350 [00:46<00:16,  5.41it/s][A[A[A[A



 74%|███████▍  | 260/350 [00:47<00:17,  5.16it/s][A[A[A[A



 75%|███████▍  | 261/350 [00:47<00:17,  5.10it/s][A[A[A[A



 75%|███████▍  | 262/350 [00:47<00:16,  5.20it/s][A[A[A[A



 75%|███████▌  | 263/350 [00:47<00:16,  5.28it/s][A[A[A[A



 75%|███████▌  | 264/350 [00:47<00:17,  4.98it/s][A[A[A[A



 76%|███████▌  | 265/350 [00:48<00:17,  4.79it/s][A[A[A[A



 76%|███████▌  | 266/350 [00:48<00:17,  4.79it/s][A[A[A[A



 76%|███████▋  | 267/350 [00:48<00:18,  4.58it/s][A[A[A[A



 77%|███████▋  | 268/350 

In [390]:
model = LogisticRegressionCV(cv=10, random_state=0,
                            fit_intercept=True,max_iter=100000).fit(entropyTrain, entropyLabels)

pred = model.predict(entropyTrain)

precision,recall,f1,_ = precision_recall_fscore_support(entropyLabels, pred, average='binary')
print(precision,recall,f1)

0.7176470588235294 0.8551401869158879 0.7803837953091685


In [392]:
weights = model.coef_
b = model.intercept_

In [393]:
deterministicNodePred = []
for nodeOutputs in simFeatures_val:
    deterministicNodePred.append(deterministic_verification_node(nodeOutputs,weights,b))
    
precision,recall,f1,_ = precision_recall_fscore_support(list(val_data.label), np.round(deterministicNodePred), average='binary')
print(precision,recall,f1)

0.44782608695652176 0.6943820224719102 0.5444933920704845
