In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import time
import pickle
import statistics
import pandas as pd
import os
from scipy.spatial import distance

from collections import Counter
from sklearn.model_selection import train_test_split
from tqdm import trange
from statistics import mode
from tqdm import tqdm
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import roc_auc_score
from scipy.stats import entropy
from collections import Counter

In [None]:
from google.colab import drive
drive.mount('/content/gdrive',force_remount=True)

Mounted at /content/gdrive


# **CIFAR dataset and functions**

In [None]:
(train_imgs, train_lbls), (test_imgs, test_lbls) = tf.keras.datasets.cifar10.load_data()
train_imgs = (train_imgs.astype(np.float32) / 255.).reshape((-1, 32, 32, 3))
train_lbls = train_lbls.astype(np.int32)


test_imgs = (test_imgs.astype(np.float32) / 255.).reshape((-1, 32, 32, 3))
test_lbls = test_lbls.astype(np.int32)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [None]:
class Model():
  def __init__(self, name=None):

    self.name = name
    self.loss_fn = tf.losses.SparseCategoricalCrossentropy(from_logits=True)
    self.train_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()
    self.test_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()
    self.opt = tf.optimizers.Adam()
  
  def build_model(self,neurons_in_dense_layer):
    self.model =  tf.keras.Sequential()

    self.model.add(tf.keras.layers.Conv2D(32, (3, 3), padding='same',strides=1, input_shape=(32,32,3), use_bias=False))
    self.model.add(tf.keras.layers.BatchNormalization())
    self.model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
    self.model.add(tf.keras.layers.Conv2D( 64, (3, 3), padding='same',strides=1, use_bias=False))
    self.model.add(tf.keras.layers.BatchNormalization())
    self.model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
    self.model.add(tf.keras.layers.Conv2D( 64, (3, 3), padding='same',strides=1, use_bias=False))
    self.model.add(tf.keras.layers.BatchNormalization())
    self.model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
    self.model.add(tf.keras.layers.Dropout(0.25))

    self.model.add(tf.keras.layers.Conv2D(128, (3, 3), padding='same',strides=1, use_bias=False))
    self.model.add(tf.keras.layers.BatchNormalization())
    self.model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
    self.model.add(tf.keras.layers.Conv2D( 128, (3, 3), padding='same',strides=1, use_bias=False))
    self.model.add(tf.keras.layers.BatchNormalization())
    self.model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
    self.model.add(tf.keras.layers.Conv2D( 128, (3, 3), padding='same',strides=1, use_bias=False))
    self.model.add(tf.keras.layers.BatchNormalization())
    self.model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
    self.model.add(tf.keras.layers.Dropout(0.25))

    self.model.add(tf.keras.layers.Conv2D(128, (3, 3), padding='same',strides=1, use_bias=False))
    self.model.add(tf.keras.layers.BatchNormalization())
    self.model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
    self.model.add(tf.keras.layers.Conv2D( 128, (3, 3), padding='same',strides=1, use_bias=False))
    self.model.add(tf.keras.layers.BatchNormalization())
    self.model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
    self.model.add(tf.keras.layers.Conv2D( 128, (3, 3), padding='same',strides=1, use_bias=False))
    self.model.add(tf.keras.layers.BatchNormalization())
    self.model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
    self.model.add(tf.keras.layers.Dropout(0.25))

    self.model.add(tf.keras.layers.MaxPooling2D((2,2)))
    self.model.add(tf.keras.layers.Flatten())
    self.model.add(tf.keras.layers.Dense(128))
    self.model.add(tf.keras.layers.Dense(neurons_in_dense_layer)) #, activation='softmax'))
    self.model.add(Activation('softmax'))
    return self.model

  def train(self, train_imgs, train_lbls, val_imgs, val_lbls):
    self.model.compile(
      optimizer='adam',
      loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
      metrics = ['accuracy']
      )
    self.model.fit(train_imgs, train_lbls, validation_data= (val_imgs, val_lbls), epochs=20)

  def apply_softmax(self, logits):
    return tf.nn.softmax(logits)

  def save_model(self, path):
    self.model.save(path)

  def load_model(self, path):
    self.model = tf.keras.models.load_model(path)
    return self.model

  def temp_scaling(y_pred,temp):
    return tf.math.divide(y_pred,temp)

  # def temp_cal(y_pred):

  #   self.temp = tf.Variable(initial_value=1.0, trainable=True, dtype=tf.float32) 

  #   def compute_loss():
  #       y_pred_model_w_temp = tf.math.divide(y_pred, self.temp)
  #       loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(\
  #                                   tf.convert_to_tensor(keras.utils.to_categorical(new_val_labels)), y_pred_model_w_temp))
  #       return loss

  #   optimizer = tf.optimizers.Adam(learning_rate=0.01)

  #   print('Temperature Initial value: {}'.format(self.temp.numpy()))

  #   for i in range(300):
  #       opts = optimizer.minimize(compute_loss, var_list=[self.temp])


  #   print('Temperature Final value: {}'.format(self.temp.numpy()))

  #   return self.temp



In [None]:
def temp_cal(y_pred,val_labels,num_class):

  temp = tf.Variable(initial_value=1.0, trainable=True, dtype=tf.float32) 

  def compute_loss():
      y_pred_model_w_temp = tf.math.divide(y_pred, temp)
      loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(\
                                  tf.convert_to_tensor(keras.utils.to_categorical(val_labels,num_classes=num_class)),tf.convert_to_tensor(y_pred_model_w_temp)))
      return loss

  optimizer = tf.optimizers.Adam(learning_rate=0.01)

  print('Temperature Initial value: {}'.format(temp.numpy()))

  for i in range(300):
      opts = optimizer.minimize(compute_loss, var_list=[temp])


  print('Temperature Final value: {}'.format(temp.numpy()))

  return temp

In [None]:
def temp_scaling(y_pred,temp):
    return tf.math.divide(y_pred,temp)

In [None]:
def remove_anomalous_class(train_img,train_lbl,val_req,anomalous_class):
  trn_img=[]
  trn_lbl=[]
  for i in range(len(train_lbl)):
    if not train_lbl[i] in anomalous_class:
      trn_img.append(train_img[i])
      trn_lbl.append(train_lbl[i])

  if val_req:
    x_train,x_val,y_train,y_val =train_test_split(np.array(trn_img), np.array(trn_lbl), test_size=0.30, stratify = np.array(trn_lbl))
    return x_train,x_val,y_train,y_val

  else:
    return np.array(trn_img), np.array(trn_lbl)

# **9 as anomaly**
---



In [None]:
import os
print(os.getcwd())
os.chdir("/content/gdrive/My Drive/Colab Notebooks/Method 2/CIFAR/Leave one class out/leave 9/")
# os.chdir("/content/gdrive/My Drive/leave 8/")
print(os.listdir())

/content
['predictions_on_test.pkl', 'reference_vector_in_out.pkl']


In [None]:
# train_imgs9, val_imgs9 , train_lbl9, val_lbl9 = remove_anomalous_class(train_imgs,train_lbls,val_req=True,anomalous_class=[9])
# print(train_imgs9.shape, val_imgs9.shape , train_lbl9.shape, val_lbl9.shape)
# open_file = open(os.path.join('Training_data.pkl'), "wb")
# pickle.dump((train_imgs9, val_imgs9 , train_lbl9, val_lbl9 ), open_file)

(31500, 32, 32, 3) (13500, 32, 32, 3) (31500, 1) (13500, 1)


In [None]:
open_file = open(os.path.join('Training_data.pkl'), "rb")
(train_imgs9, val_imgs9 , train_lbl9, val_lbl9 )= pickle.load(open_file)
print(train_imgs9.shape, val_imgs9.shape , train_lbl9.shape, val_lbl9.shape)

(31500, 32, 32, 3) (13500, 32, 32, 3) (31500, 1) (13500, 1)


In [None]:
print( np.unique(train_lbl9),len(np.unique(train_lbl9)))

[0 1 2 3 4 5 6 7 8] 9


In [None]:
models = []
for i in range(len(np.unique(train_lbl9))):
  models.append(Model(name='9anomaly:classifier'+str(i)))

names = [m.name for m in models]
names

['9anomaly:classifier0',
 '9anomaly:classifier1',
 '9anomaly:classifier2',
 '9anomaly:classifier3',
 '9anomaly:classifier4',
 '9anomaly:classifier5',
 '9anomaly:classifier6',
 '9anomaly:classifier7',
 '9anomaly:classifier8']

In [None]:
def remap_labels(train_lbl,val_lbl,unique_lbl):
  # print(unique_lbl)
  index_map = {unique_lbl[0]:0, unique_lbl[1]:1,unique_lbl[2]:2,unique_lbl[3]:3,unique_lbl[4]:4,unique_lbl[5]:5,unique_lbl[6]:6,unique_lbl[7]:7}
  train_lbl = np.array([index_map[int(x)] for x in train_lbl ])
  val_lbl = np.array([index_map[int(x)] for x in val_lbl ])

  return train_lbl,val_lbl

In [None]:
labels = np.unique(train_lbl9)
labels

array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=int32)

In [None]:
# for indx,i in enumerate(labels):
#   # if i == 9:
      
#     train_imgs_wt_clas_i, train_lbls_wt_clas_i = remove_anomalous_class(train_imgs9,train_lbl9,val_req= False, anomalous_class = [i])
#     val_imgs_wt_clas_i, val_lbls__wt_clas_i = remove_anomalous_class(val_imgs9,val_lbl9,val_req= False, anomalous_class = [i])
#     unique_lbls = np.unique(train_lbls_wt_clas_i)
#     (train_lbls_wt_clas_i,val_lbls__wt_clas_i) = remap_labels(train_lbls_wt_clas_i,val_lbls__wt_clas_i,unique_lbls)
#     print(np.unique(train_lbls_wt_clas_i),np.unique(val_lbls__wt_clas_i))
#     models[indx].build_model(8)
#     models[indx].train(train_imgs_wt_clas_i, train_lbls_wt_clas_i, val_imgs_wt_clas_i, val_lbls__wt_clas_i)
#     models[indx].save_model('classifier'+str(indx))

In [None]:
train_loss = []
train_acc = []
val_loss = []
val_acc = []

for indx,i in enumerate(labels):
  # if i == 9:
      
    train_imgs_wt_clas_i, train_lbls_wt_clas_i = remove_anomalous_class(train_imgs9,train_lbl9,val_req= False, anomalous_class = [i])
    val_imgs_wt_clas_i, val_lbls__wt_clas_i = remove_anomalous_class(val_imgs9,val_lbl9,val_req= False, anomalous_class = [i])
    unique_lbls = np.unique(train_lbls_wt_clas_i)
    (train_lbls_wt_clas_i,val_lbls__wt_clas_i) = remap_labels(train_lbls_wt_clas_i,val_lbls__wt_clas_i,unique_lbls)
    # print(np.unique(train_lbls_wt_clas_i),np.unique(val_lbls__wt_clas_i))
    model = models[indx].load_model('classifier'+str(indx))
    train_ = model.evaluate(train_imgs_wt_clas_i, train_lbls_wt_clas_i)
    val_ = model.evaluate(val_imgs_wt_clas_i, val_lbls__wt_clas_i)
    train_loss.append(train_[0])
    train_acc.append(train_[1])
    val_loss.append(val_[0])
    val_acc.append(val_[1])



In [None]:
print('training accuracy average :',np.average(train_acc))
print('validation accuracy average :',np.average(val_acc))

training accuracy average : 0.8270992040634155
validation accuracy average : 0.8253055546018813


In [None]:
print(train_acc)
print(val_acc)

[0.786214292049408, 0.8174285888671875, 0.8722500205039978, 0.8502500057220459, 0.8193214535713196, 0.8693214058876038, 0.8463214039802551, 0.791857123374939, 0.7909285426139832]
[0.7774166464805603, 0.8191666603088379, 0.8684166669845581, 0.846833348274231, 0.8166666626930237, 0.8702499866485596, 0.847083330154419, 0.7854166626930237, 0.796500027179718]


In [None]:
classifiers = [models[i].load_model('classifier'+str(i)) for i in range(len(models))]

In [None]:
classifiers[0].summary()

In [None]:
layer_name_list = [classifier.layers[-2:][0].name for classifier in classifiers]
layer_name_list

['dense_1',
 'dense_3',
 'dense_5',
 'dense_7',
 'dense_9',
 'dense_11',
 'dense_13',
 'dense_15',
 'dense_17']

In [None]:
from keras import Model
model_dict = {}

for i in range(len(models)):
  model_output_i = classifiers[i].get_layer(layer_name_list[i]).output
  m0 = Model(inputs=classifiers[i].input, outputs=model_output_i)
  model_dict[i] = m0

In [None]:
y_pred = []
for i,label  in enumerate(labels):
  val_imgs_wt_clas_i, val_lbls_wt_clas_i = remove_anomalous_class(val_imgs9,val_lbl9,val_req= False, anomalous_class = [label])
  m = model_dict[i]
  print(i,np.unique(val_lbls_wt_clas_i))
  # m.summary()
  y_p = m.predict(val_imgs_wt_clas_i)
  y_pred.append(y_p)

0 [1 2 3 4 5 6 7 8]
1 [0 2 3 4 5 6 7 8]
2 [0 1 3 4 5 6 7 8]
3 [0 1 2 4 5 6 7 8]
4 [0 1 2 3 5 6 7 8]
5 [0 1 2 3 4 6 7 8]
6 [0 1 2 3 4 5 7 8]
7 [0 1 2 3 4 5 6 8]
8 [0 1 2 3 4 5 6 7]


In [None]:
len(y_pred)

9

In [None]:
temp_val = []
for i,label in enumerate(labels):
  # if i == 8:
    val_imgs_wt_clas_i, val_lbls_wt_clas_i = remove_anomalous_class(val_imgs9,val_lbl9,val_req= False, anomalous_class = [label])
    print(np.unique(val_lbls_wt_clas_i),y_pred[i].shape)

    unique_lbls = np.unique(val_lbls_wt_clas_i)
    (val_lbls_wt_clas_i,val_lbls_wt_clas_i) = remap_labels(val_lbls_wt_clas_i,val_lbls_wt_clas_i,unique_lbls)
  
    temp = temp_cal(y_pred[i],val_lbls_wt_clas_i,len(unique_lbls))
    temp_val.append(temp)

[1 2 3 4 5 6 7 8] (12000, 8)
[1 2 3 4 5 6 7 8]
Temperature Initial value: 1.0
Temperature Final value: 2.279046058654785
[0 2 3 4 5 6 7 8] (12000, 8)
[0 2 3 4 5 6 7 8]
Temperature Initial value: 1.0
Temperature Final value: 2.088531494140625
[0 1 3 4 5 6 7 8] (12000, 8)
[0 1 3 4 5 6 7 8]
Temperature Initial value: 1.0
Temperature Final value: 2.101140260696411
[0 1 2 4 5 6 7 8] (12000, 8)
[0 1 2 4 5 6 7 8]
Temperature Initial value: 1.0
Temperature Final value: 2.1739182472229004
[0 1 2 3 5 6 7 8] (12000, 8)
[0 1 2 3 5 6 7 8]
Temperature Initial value: 1.0
Temperature Final value: 2.0981557369232178
[0 1 2 3 4 6 7 8] (12000, 8)
[0 1 2 3 4 6 7 8]
Temperature Initial value: 1.0
Temperature Final value: 1.960584044456482
[0 1 2 3 4 5 7 8] (12000, 8)
[0 1 2 3 4 5 7 8]
Temperature Initial value: 1.0
Temperature Final value: 2.043466329574585
[0 1 2 3 4 5 6 8] (12000, 8)
[0 1 2 3 4 5 6 8]
Temperature Initial value: 1.0
Temperature Final value: 2.260460138320923
[0 1 2 3 4 5 6 7] (12000, 8)
[

In [None]:
temp_val 

[<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.279046>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.0885315>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.1011403>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.1739182>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.0981557>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=1.960584>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.0434663>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.2604601>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.0815485>]

In [None]:
temp_val = [2.279046,2.0885315,2.1011403,2.1739182,2.0981557,1.960584,2.0434663,2.2604601,2.0815485]

**Method 1**
---



In [None]:
# labels --> gives the non anomalous classes ex: here labels are 0 1 2 3 4 5 6 7 9

In [None]:
from tqdm import tqdm

entropy_in = 0
count_entr_in = 0
avg_in = [0,0,0,0,0,0,0,0,0]
count_in = [0,0,0,0,0,0,0,0,0]

entropy_out = 0
count_entr_out = 0
avg_out = [0,0,0,0,0,0,0,0,0]
count_out = [0,0,0,0,0,0,0,0,0]

for img, lbl in tqdm(zip(val_imgs9, val_lbl9)):
  img = img.reshape([-1, 32, 32, 3])

  for i,anmls_lbl in enumerate(labels):
    if not anmls_lbl == lbl:
      logits = model_dict[i](img)
      logits = tf.math.divide(logits, temp_val[i])
      pred = tf.nn.softmax(logits)
      max_val = np.max(pred)
      avg_in[i] = avg_in[i] + max_val
      count_in[i] = count_in[i] + 1

      entrpy = entropy(pred[0], base=len(pred[0]))
      entropy_in = entropy_in + entrpy
      count_entr_in = count_entr_in + 1 

    if anmls_lbl == lbl:
      logits = model_dict[i](img)
      logits = tf.math.divide(logits, temp_val[i])
      pred = tf.nn.softmax(logits)
      max_val = np.max(pred)
      avg_out[i] = avg_out[i] + max_val
      count_out[i] = count_out[i] + 1

      entrpy = entropy(pred[0], base=len(pred[0]))
      entropy_out = entropy_out + entrpy
      count_entr_out = count_entr_out + 1 


13500it [18:40, 12.05it/s]


In [None]:
classifier_avg_in = [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]
classifier_avg_out = [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]
for i in range(9):
  classifier_avg_in[i] = avg_in[i]/count_in[i]
  classifier_avg_out[i] = avg_out[i]/count_out[i]

treshold_value_in = 0.0
treshold_value_out = 0.0

for i in range(9):
  treshold_value_in = treshold_value_in + classifier_avg_in[i]
  treshold_value_out = treshold_value_out + classifier_avg_out[i]

treshold_value_in = treshold_value_in/len(classifier_avg_in)
treshold_value_out = treshold_value_out/len(classifier_avg_out)


In [None]:
print(classifier_avg_in)
print(classifier_avg_out)

print(treshold_value_in)
print(treshold_value_out)

entropy_ref_in_9 = entropy_in/count_entr_in
entropy_ref_out_9 = entropy_out/ count_entr_out

print(entropy_ref_in_9)
print(entropy_ref_out_9)

[0.7397078724031647, 0.733639513635387, 0.7965798389203846, 0.7757283639038602, 0.7550478136365613, 0.786048243680348, 0.7636079909739395, 0.729369676442196, 0.7276645988461872]
[0.7265342693924904, 0.6396735051174959, 0.5793993158340454, 0.581146329353253, 0.5706985271970431, 0.6310341851313909, 0.6112863792677721, 0.5898039894302686, 0.6793687089085579]
0.7563771013824476
0.6232161344035908
0.33198197531818585
0.48556956059152756


In [None]:
open_file = open(os.path.join('reference_vector_in_out.pkl'), "wb")
pickle.dump((classifier_avg_in,classifier_avg_out,entropy_ref_in_9,entropy_ref_out_9), open_file)

In [None]:
open_file = open(os.path.join('reference_vector_in_out.pkl'), "rb")
ref_vector_in_9,ref_vector_out_9,entropy_ref_in_9,entropy_ref_out_9 = pickle.load(open_file)

In [None]:
max_sm_all_wt_9 = []
ood = 0
ind= 1
for data, label in tqdm(zip(test_imgs, test_lbls)):

  entr = 0
  img = data.reshape([-1, 32, 32, 3])
  prediction_sftmx = []
  prediction_lbl = []
  
  if label[0] == 9:
    lbl = ood
  else:
    lbl = ind
  
  for i,anmls_lbl in enumerate(labels):
    logits = model_dict[i](img)
    logits = tf.math.divide(logits, temp_val[i])
    pred = tf.nn.softmax(logits)

    prediction_sftmx.append(np.max(pred))

    pred_ind = tf.argmax(pred[0]).numpy()
    labels_removed_anmls = np.delete(labels,0)
    pred_lbl = labels_removed_anmls[pred_ind]   ### reverse mapping the actual label

    prediction_lbl.append(pred_lbl)

    entr = entr + entropy(pred[0], base=len(pred[0]))

  max_sm_all_wt_9.append([prediction_sftmx,prediction_lbl,label[0],lbl,entr/len(labels)])

10000it [14:14, 11.70it/s]


In [None]:
open_file = open(os.path.join('predictions_on_test.pkl'), "wb")
pickle.dump(max_sm_all_wt_9, open_file)

In [None]:
open_file = open(os.path.join('predictions_on_test.pkl'), "rb")
max_sm_all_wt_9 = pickle.load(open_file)

euclidean and entroy (sim score + euclidean id/od)

In [None]:
from scipy.spatial import distance

id = 1
ood = 0
sim_score_in =[]
sim_score_out = []
sim_score_entr_in =[]
sim_score_entr_out = []
sim_score_add = []
y_true = []
y_pred=[]
y_entr_pred=[]
y_true_ood = []

for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_9):

  #print(avg_entr, max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood)
  dist_in = distance.euclidean(max_pred_each_cls,ref_vector_in_9)
  dist_out = distance.euclidean(max_pred_each_cls,ref_vector_out_9)

  dist_entr_in = distance.euclidean(avg_entr,entropy_ref_in_9)
  dist_entr_out = distance.euclidean(avg_entr,entropy_ref_out_9)
  
  sim_in = 1 / (1 + dist_in)
  sim_out = 1/ (1 + dist_out)
  sim_add = sim_in + sim_out

  sim_entr_in = 1 / (1 + dist_entr_in)
  sim_entr_out = 1/ (1 + dist_entr_out)

  sim_score_in.append(sim_in)
  sim_score_out.append(sim_out)
  sim_score_entr_in.append(sim_entr_in)
  sim_score_entr_out.append(sim_entr_out)
  sim_score_add.append(sim_add)

  if sim_in > sim_out:
    prd = id 
  else:
    prd = ood 

  if sim_entr_in > sim_entr_out:
    prd_e = id
  else:
    prd_e = ood 
  
  if lbl_id_or_ood == 1:
    label_true_ood = 0
  elif lbl_id_or_ood ==0:
    label_true_ood = 1

  
  y_pred.append(prd)
  y_entr_pred.append(prd_e)
  y_true.append(lbl_id_or_ood)
  y_true_ood.append(label_true_ood)

100%|██████████| 10000/10000 [00:01<00:00, 5390.48it/s]


In [None]:
from sklearn.metrics import roc_auc_score  ##### weighted SVM
print('Auroc with prediction as ID or OOD  ',roc_auc_score(y_true, y_pred))
print('Auroc prediction with sim score to in distribution only  :',roc_auc_score(y_true,sim_score_in))
print('Auroc prediction with sim score to ood distribution only  :',roc_auc_score(y_true_ood,sim_score_out))

print('\nAuroc prediction with sim added score to for in and ood distribution only  :',roc_auc_score(y_true_ood,sim_score_add))

print('\nAuroc with prediction as ID or OOD Entropy ',roc_auc_score(y_true, y_entr_pred))
print('Auroc prediction with sim score to in distribution only Entropy  :',roc_auc_score(y_true,sim_score_entr_in))
print('Auroc prediction with sim score to out distribution only Entropy  :',roc_auc_score(y_true_ood,sim_score_entr_out))


Auroc with prediction as ID or OOD   0.5092777777777778
Auroc prediction with sim score to in distribution only  : 0.5436018888888889
Auroc prediction with sim score to ood distribution only  : 0.4811276666666667

Auroc prediction with sim added score to for in and ood distribution only  : 0.4618586666666667

Auroc with prediction as ID or OOD Entropy  0.4966666666666666
Auroc prediction with sim score to in distribution only Entropy  : 0.47209055555555557
Auroc prediction with sim score to out distribution only Entropy  : 0.5070576666666666


Decision Rule 

In [None]:
############################### simple decision rule , not taloinmg more than 7 or 8 just took majority prediction ##################

id = 1
ood = 0
y_true = []
y_pred=[]

for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_9):
  count = Counter(pred_lbl_each_cls)
  most_common= count.most_common(1)[0][0]

  if most_common == label_actual:
    pred = id
  else:
    pred = ood 
  y_pred.append(pred)
  y_true.append(lbl_id_or_ood)

print(' Auroc with simple decision rule about majority_:  ',roc_auc_score(y_true, y_pred))
  

100%|██████████| 10000/10000 [00:00<00:00, 142724.08it/s]

 Auroc with simple decision rule about majority_:   0.7216666666666667





In [None]:
id = 1
ood = 0

In [None]:
y_true_dec_rule = []
y_pred_mr_than8_ = []
y_pred_mr_than7_ = []
for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_9):
  values, counts = np.unique(pred_lbl_each_cls, return_counts=True)

  if np.max(counts) >= 7:
    y_pred_mr_than7_.append(id)
  else:
    y_pred_mr_than7_.append(ood)

  if np.max(counts) >= 8:
    y_pred_mr_than8_.append(id)
  else:
    y_pred_mr_than8_.append(ood)

  if label_actual == 9:
    y_true_dec_rule.append(ood)
  else:
    y_true_dec_rule.append(id)

print('\nAuroc for decision rule > 7 ',roc_auc_score(y_true_dec_rule,y_pred_mr_than7_))
print('Auroc for decision rule > 8 ',roc_auc_score(y_true_dec_rule,y_pred_mr_than8_))

100%|██████████| 10000/10000 [00:00<00:00, 25212.44it/s]



Auroc for decision rule > 7  0.49211111111111105
Auroc for decision rule > 8  0.5451111111111112


In [None]:
sim_score_in =[]
sim_score_entr_in =[]
y_true_ = []
y_pred_mr_than7_ = []
y_pred_mr_than8_ = []
y_entr_pred_mr_than7_ = []
y_entr_pred_mr_than8_ = []

for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_9):

  # print(avg_entr, max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood)
  dist_in = distance.euclidean(max_pred_each_cls,ref_vector_in_9)
  dist_out = distance.euclidean(max_pred_each_cls,ref_vector_out_9)

  dist_entr_in = distance.euclidean(avg_entr,entropy_ref_in_9)
  dist_entr_out = distance.euclidean(avg_entr,entropy_ref_out_9)
  
  sim_in = 1 / (1 + dist_in)
  sim_out = 1/ (1 + dist_out)

  sim_entr_in = 1 / (1 + dist_entr_in)
  sim_entr_out = 1/ (1 + dist_entr_out)

  sim_score_in.append(sim_in)
  sim_score_entr_in.append(sim_entr_in)

  values, counts = np.unique(pred_lbl_each_cls, return_counts=True)
#############################################
  if sim_in > sim_out:
    if  np.max(counts) >=7:
      y_pred_mr_than7_.append(id)
    else:
      y_pred_mr_than7_.append(ood)
  else:
    y_pred_mr_than7_.append(ood)
###############################################
  if sim_in > sim_out:
    if  np.max(counts) >=8:
      y_pred_mr_than8_.append(id)
    else:
      y_pred_mr_than8_.append(ood)
  else:
    y_pred_mr_than8_.append(ood)
##############################################
  if sim_entr_in > sim_entr_out:  
    if  np.max(counts) >=7:
      y_entr_pred_mr_than7_.append(id)
    else:
      y_entr_pred_mr_than7_.append(ood)
  else:
    y_entr_pred_mr_than7_.append(ood )
###############################################
  if sim_entr_in > sim_entr_out:  
    if  np.max(counts) >=8:
      y_entr_pred_mr_than8_.append(id)
    else:
      y_entr_pred_mr_than8_.append(ood)
  else:
    y_entr_pred_mr_than8_.append(ood )

  if label_actual == 9:
    y_true_.append(ood)
  else:
    y_true_.append(id)


100%|██████████| 10000/10000 [00:01<00:00, 9422.53it/s]


In [None]:
print('Decision Rule(>=7) + Reference Vector ',roc_auc_score(y_true_,y_pred_mr_than7_))
print('Decision Rule(>=8) + Reference Vector ',roc_auc_score(y_true_,y_pred_mr_than8_))
print('Decision Rule(>=7) + Entropy Reference Value ',roc_auc_score(y_true_,y_entr_pred_mr_than7_))
print('Decision Rule(>=8) + Entropy Reference Value ',roc_auc_score(y_true_,y_entr_pred_mr_than8_))

Decision Rule(>=7) + Reference Vector  0.49661111111111106
Decision Rule(>=8) + Reference Vector  0.5463333333333333
Decision Rule(>=7) + Entropy Reference Value  0.4932222222222222
Decision Rule(>=8) + Entropy Reference Value  0.5431666666666667


**Method 2**
---



Normal weighted SVM




In [None]:
##########################  after appying temperature scaling and softmax  ###########################################
########################## the y_pred used is In + ood distribution for the ensemble classifier ############################
id = 1
ood = 0

new_dataset = []

for img, label in tqdm(zip(val_imgs9, val_lbl9)):
    img = img.reshape([-1,32,32,3])
    for i,lbl in enumerate(labels):
      if label == lbl:
        new_label = ood
      else :
        new_label = id

      m = model_dict[i]
      y_p = m.predict(img)
      temp_new_y_pred = temp_scaling(y_p,temp_val[i])
      new_softmax_y_pred = models[i].apply_softmax(temp_new_y_pred) 
      new_softmax_y_pred = np.array(new_softmax_y_pred).tolist()  
      new_softmax_y_pred = new_softmax_y_pred[0] 
      new_dataset.append([new_softmax_y_pred,new_label])
      

0it [00:00, ?it/s]



13500it [1:04:51,  3.47it/s]


In [None]:
df = pd.DataFrame(new_dataset, columns=["predictions", "label"])
predictions = df['predictions'].to_numpy()
new_labels = df['label'].to_numpy()

In [None]:
df.to_pickle("predictions_dataset.pkl")

In [None]:
df = pd.read_pickle('predictions_dataset.pkl')
df

Unnamed: 0,predictions,label
0,"[1.4516434930555988e-05, 0.01251536887139082, ...",1
1,"[0.00035242579178884625, 0.020869748666882515,...",1
2,"[0.00024803896667435765, 2.345687425986398e-05...",1
3,"[0.00010106699483003467, 1.4105333320912905e-0...",1
4,"[4.42785894847475e-05, 2.3258107262336125e-07,...",1
...,...,...
121495,"[4.03901249228511e-05, 3.0043961487535853e-06,...",1
121496,"[3.731064907697146e-06, 2.7976262572337873e-05...",0
121497,"[4.11072323913686e-05, 2.2107105905888602e-05,...",1
121498,"[2.174438577640103e-06, 3.5793652841675794e-06...",1


In [None]:
from collections import Counter
Counter(new_labels)

Counter({0: 13500, 1: 108000})

In [None]:
in_data = []
out_data = []

for i in range(len(predictions)):
  if new_labels[i]== 1:
    in_data.append([predictions[i],new_labels[i]])
  else:
    out_data.append([predictions[i],new_labels[i]])

print(len(in_data),len(out_data))

# import random 
# in_data = random.sample(in_data,(len(out_data)))
# print(len(in_data))

108000 13500


In [None]:
in_data[1]

[[0.00035242579178884625,
  0.020869748666882515,
  0.0014996699756011367,
  0.6293578147888184,
  0.0018116352148354053,
  0.00017902693070936948,
  0.3458864390850067,
  4.328813520260155e-05],
 1]

In [None]:
input = []
labels = []
for idata in in_data:
  input.append(idata[0])
  labels.append(idata[1])
  

for odata in out_data:
  input.append(odata[0])
  labels.append(odata[1])
# print(input,labels)
print(len(input),len(labels))

121500 121500


In [None]:
svm_train_ip, svm_test_ip, svm_train_lb, svm_test_lb = train_test_split(input, labels, test_size=0.20,stratify = labels)
print(len(svm_train_ip),len(svm_test_ip),len(svm_train_lb))

97200 24300 97200


In [None]:
from sklearn.svm import SVC
model = SVC(gamma='scale')

In [None]:
from sklearn.utils.class_weight import compute_class_weight
class_wts = compute_class_weight('balanced', np.unique(svm_train_lb), svm_train_lb)
class_wts ={0:class_wts[0], 1:class_wts[1]}
class_wts

{0: 4.5, 1: 0.5625}

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
clf = make_pipeline(StandardScaler(), SVC(gamma='auto',class_weight=class_wts))
clf.fit(svm_train_ip, svm_train_lb)

Pipeline(memory=None,
         steps=[('standardscaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('svc',
                 SVC(C=1.0, break_ties=False, cache_size=200,
                     class_weight={0: 4.5, 1: 0.5625}, coef0=0.0,
                     decision_function_shape='ovr', degree=3, gamma='auto',
                     kernel='rbf', max_iter=-1, probability=False,
                     random_state=None, shrinking=True, tol=0.001,
                     verbose=False))],
         verbose=False)

In [None]:
pred = clf.predict(svm_test_ip)
len(pred) ### svm fitted on whole data with weights

24300

In [None]:
clf.score(svm_test_ip,svm_test_lb) # with equal classes score is 82  # with weighted classes score is 92

0.6055144032921811

In [None]:
y_true = []
y_pred = []
for img, label in tqdm(zip(test_imgs, test_lbls)):
    img= img.reshape([-1,32,32,3])
    
    svm_output =[]
    for i in range(0,9):
      m = model_dict[i]
      y_p = m.predict(img)
      temp_new_y_pred = temp_scaling(y_p,temp_val[i])
      new_softmax_y_pred = models[i].apply_softmax(temp_new_y_pred) 
      new_softmax_y_pred = np.array(new_softmax_y_pred).tolist()  
      new_softmax_y_pred = np.array(new_softmax_y_pred[0]).reshape([1,-1])
      # print(new_softmax_y_pred)
      a = clf.predict(new_softmax_y_pred)
      # print(a)

      svm_output.append(a[0])
    # print(svm_output)
 
    a = Counter(list(svm_output))
    b = a.most_common()[0][0]
    # print(b)
    y_pred.append(b)
    if not label == 9:
      y_true.append(1)
    else:
      y_true.append(0)

10000it [51:24,  3.24it/s]


In [None]:
print(y_pred)
print(y_true)

[0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 

In [None]:
print(Counter(y_pred))
print(Counter(y_true))

Counter({1: 5558, 0: 4442})
Counter({1: 9000, 0: 1000})


In [None]:
from sklearn.metrics import roc_auc_score  ##### weighted SVM
roc_auc_score(y_true, y_pred)

0.4682222222222222

In [None]:
### predictions with weighted SVM
print(Counter(y_pred)) 
print(Counter(y_true))

Counter({1: 7362, 0: 2638})
Counter({1: 9026, 0: 974})


In [None]:
df1= pd.DataFrame(y_pred, columns=["y_pred"])
df2= pd.DataFrame(y_true, columns=["y_true"])
df = pd.concat([df1, df2], axis=1)
df

Unnamed: 0,y_pred,y_true
0,0,1
1,1,1
2,0,1
3,0,1
4,1,1
...,...,...
9995,0,1
9996,0,1
9997,0,1
9998,1,1


In [None]:
df.to_pickle("final_predictions.pkl")
df = pd.read_pickle('final_predictions.pkl')

In [None]:
y_true = df['y_true'].to_numpy().tolist()
y_pred = df['y_pred'].to_numpy().tolist()
print(y_true)
print(y_pred)

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 

# **8 as anomaly**
---



In [None]:
import os
print(os.getcwd())
os.chdir("/content/gdrive/My Drive/Colab Notebooks/Method 2/CIFAR/Leave one class out/leave 8/")
# os.chdir("/content/gdrive/My Drive/leave 8/")
print(os.listdir())

/content/gdrive/My Drive/Colab Notebooks/Method 2/CIFAR/Leave one class out/leave 8
['reference_vector_in_out.pkl', 'predictions_on_test.pkl', 'classifier0', 'classifier1', 'classifier2', 'classifier3', 'classifier4', 'classifier5', 'classifier8', 'classifier7', 'classifier6']


In [None]:
# train_imgs8, val_imgs8 , train_lbl8, val_lbl8 = remove_anomalous_class(train_imgs,train_lbls,val_req=True,anomalous_class=[8])
# print(train_imgs8.shape, val_imgs8.shape , train_lbl8.shape, val_lbl8.shape)
# open_file = open(os.path.join('Training_data.pkl'), "wb")
# pickle.dump((train_imgs8, val_imgs8 , train_lbl8, val_lbl8 ), open_file)

(31500, 32, 32, 3) (13500, 32, 32, 3) (31500, 1) (13500, 1)


In [None]:
open_file = open(os.path.join('Training_data.pkl'), "rb")
(train_imgs8, val_imgs8 , train_lbl8, val_lbl8 )= pickle.load(open_file)
print(train_imgs8.shape, val_imgs8.shape , train_lbl8.shape, val_lbl8.shape)

(31500, 32, 32, 3) (13500, 32, 32, 3) (31500, 1) (13500, 1)


In [None]:
print( np.unique(train_lbl8),len(np.unique(train_lbl8)))

[0 1 2 3 4 5 6 7 9] 9


In [None]:
models = []
for i in range(len(np.unique(train_lbl8))):
  models.append(Model(name='8anomaly:classifier'+str(i)))

names = [m.name for m in models]
names

['8anomaly:classifier0',
 '8anomaly:classifier1',
 '8anomaly:classifier2',
 '8anomaly:classifier3',
 '8anomaly:classifier4',
 '8anomaly:classifier5',
 '8anomaly:classifier6',
 '8anomaly:classifier7',
 '8anomaly:classifier8']

In [None]:
def remap_labels(train_lbl,val_lbl,unique_lbl):
  print(unique_lbl)
  index_map = {unique_lbl[0]:0, unique_lbl[1]:1,unique_lbl[2]:2,unique_lbl[3]:3,unique_lbl[4]:4,unique_lbl[5]:5,unique_lbl[6]:6,unique_lbl[7]:7}
  train_lbl = np.array([index_map[int(x)] for x in train_lbl ])
  val_lbl = np.array([index_map[int(x)] for x in val_lbl ])

  return train_lbl,val_lbl

In [None]:
labels = np.unique(train_lbl8)
labels

array([0, 1, 2, 3, 4, 5, 6, 7, 9], dtype=int32)

In [None]:
# for indx,i in enumerate(labels):
#   # if i == 9:
#     print(i)
#     train_imgs_wt_clas_i, train_lbls_wt_clas_i = remove_anomalous_class(train_imgs8,train_lbl8,val_req= False, anomalous_class = [i])
#     val_imgs_wt_clas_i, val_lbls__wt_clas_i = remove_anomalous_class(val_imgs8,val_lbl8,val_req= False, anomalous_class = [i])
#     unique_lbls = np.unique(train_lbls_wt_clas_i)
#     print(np.unique(train_lbls_wt_clas_i),np.unique(val_lbls__wt_clas_i))
#     (train_lbls_wt_clas_i,val_lbls__wt_clas_i) = remap_labels(train_lbls_wt_clas_i,val_lbls__wt_clas_i,unique_lbls)
#     models[indx].build_model(8)
#     models[indx].train(train_imgs_wt_clas_i, train_lbls_wt_clas_i, val_imgs_wt_clas_i, val_lbls__wt_clas_i)
#     models[indx].save_model('classifier'+str(indx))

In [None]:
train_loss = []
train_acc = []
val_loss = []
val_acc = []

for indx,i in enumerate(labels):
  # if i == 9:
      
    train_imgs_wt_clas_i, train_lbls_wt_clas_i = remove_anomalous_class(train_imgs8,train_lbl8,val_req= False, anomalous_class = [i])
    val_imgs_wt_clas_i, val_lbls__wt_clas_i = remove_anomalous_class(val_imgs8,val_lbl8,val_req= False, anomalous_class = [i])
    unique_lbls = np.unique(train_lbls_wt_clas_i)
    (train_lbls_wt_clas_i,val_lbls__wt_clas_i) = remap_labels(train_lbls_wt_clas_i,val_lbls__wt_clas_i,unique_lbls)
    # print(np.unique(train_lbls_wt_clas_i),np.unique(val_lbls__wt_clas_i))
    model = models[indx].load_model('classifier'+str(indx))
    train_ = model.evaluate(train_imgs_wt_clas_i, train_lbls_wt_clas_i)
    val_ = model.evaluate(val_imgs_wt_clas_i, val_lbls__wt_clas_i)
    train_loss.append(train_[0])
    train_acc.append(train_[1])
    val_loss.append(val_[0])
    val_acc.append(val_[1])

[1 2 3 4 5 6 7 9]
[0 2 3 4 5 6 7 9]
[0 1 3 4 5 6 7 9]
[0 1 2 4 5 6 7 9]
[0 1 2 3 5 6 7 9]
[0 1 2 3 4 6 7 9]
[0 1 2 3 4 5 7 9]
[0 1 2 3 4 5 6 9]
[0 1 2 3 4 5 6 7]


In [None]:
print('training accuracy average :',np.average(train_acc))
print('validation accuracy average :',np.average(val_acc))

training accuracy average : 0.8693254060215421
validation accuracy average : 0.7286666697925992


In [None]:
print(train_acc)
print(val_acc)

[0.8637857437133789, 0.8770714402198792, 0.8907142877578735, 0.8848214149475098, 0.9181428551673889, 0.8783214092254639, 0.8565000295639038, 0.8335000276565552, 0.821071445941925]
[0.7107499837875366, 0.718666672706604, 0.7512500286102295, 0.753000020980835, 0.7599999904632568, 0.7494999766349792, 0.7103333473205566, 0.7151666879653931, 0.6893333196640015]


In [None]:
classifiers = [models[i].load_model('classifier'+str(i)) for i in range(len(models))]

In [None]:
classifiers

[<tensorflow.python.keras.engine.sequential.Sequential at 0x7f0a536a6390>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7f09fe83d470>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7f09fde9c400>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7f09fddc0400>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7f09fca249b0>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7f09fc263080>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7f09fc2ae080>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7f09fb64f208>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7f09fa282978>]

In [None]:
classifiers[0].summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_18 (Conv2D)           (None, 32, 32, 32)        864       
_________________________________________________________________
batch_normalization_18 (Batc (None, 32, 32, 32)        128       
_________________________________________________________________
leaky_re_lu_18 (LeakyReLU)   (None, 32, 32, 32)        0         
_________________________________________________________________
conv2d_19 (Conv2D)           (None, 32, 32, 64)        18432     
_________________________________________________________________
batch_normalization_19 (Batc (None, 32, 32, 64)        256       
_________________________________________________________________
leaky_re_lu_19 (LeakyReLU)   (None, 32, 32, 64)        0         
_________________________________________________________________
conv2d_20 (Conv2D)           (None, 32, 32, 64)       

In [None]:
layer_name_list = [classifier.layers[-2:][0].name for classifier in classifiers]
layer_name_list

['dense_5',
 'dense_7',
 'dense_9',
 'dense_11',
 'dense_13',
 'dense_15',
 'dense_17',
 'dense_19',
 'dense_21']

In [None]:
from keras import Model
model_dict = {}

for i in range(len(models)):
  model_output_i = classifiers[i].get_layer(layer_name_list[i]).output
  m0 = Model(inputs=classifiers[i].input, outputs=model_output_i)
  model_dict[i] = m0

In [None]:
y_pred = []
for i,label  in enumerate(labels):
  val_imgs_wt_clas_i, val_lbls_wt_clas_i = remove_anomalous_class(val_imgs8,val_lbl8,val_req= False, anomalous_class = [label])
  m = model_dict[i]
  print(i,np.unique(val_lbls_wt_clas_i))
  # m.summary()
  y_p = m.predict(val_imgs_wt_clas_i)
  y_pred.append(y_p)

0 [1 2 3 4 5 6 7 9]
1 [0 2 3 4 5 6 7 9]
2 [0 1 3 4 5 6 7 9]
3 [0 1 2 4 5 6 7 9]
4 [0 1 2 3 5 6 7 9]
5 [0 1 2 3 4 6 7 9]
6 [0 1 2 3 4 5 7 9]
7 [0 1 2 3 4 5 6 9]
8 [0 1 2 3 4 5 6 7]


In [None]:
len(y_pred)

9

In [None]:
temp_val = []
for i,label in enumerate(labels):
  # if i == 8:
    val_imgs_wt_clas_i, val_lbls_wt_clas_i = remove_anomalous_class(val_imgs8,val_lbl8,val_req= False, anomalous_class = [label])
    print(np.unique(val_lbls_wt_clas_i),y_pred[i].shape)

    unique_lbls = np.unique(val_lbls_wt_clas_i)
    (val_lbls_wt_clas_i,val_lbls_wt_clas_i) = remap_labels(val_lbls_wt_clas_i,val_lbls_wt_clas_i,unique_lbls)
  
    temp = temp_cal(y_pred[i],val_lbls_wt_clas_i,len(unique_lbls))
    temp_val.append(temp)

[1 2 3 4 5 6 7 9] (12000, 8)
[1 2 3 4 5 6 7 9]
Temperature Initial value: 1.0
Temperature Final value: 2.3371341228485107
[0 2 3 4 5 6 7 9] (12000, 8)
[0 2 3 4 5 6 7 9]
Temperature Initial value: 1.0
Temperature Final value: 2.081923723220825
[0 1 3 4 5 6 7 9] (12000, 8)
[0 1 3 4 5 6 7 9]
Temperature Initial value: 1.0
Temperature Final value: 2.139570951461792
[0 1 2 4 5 6 7 9] (12000, 8)
[0 1 2 4 5 6 7 9]
Temperature Initial value: 1.0
Temperature Final value: 2.1890313625335693
[0 1 2 3 5 6 7 9] (12000, 8)
[0 1 2 3 5 6 7 9]
Temperature Initial value: 1.0
Temperature Final value: 1.9953128099441528
[0 1 2 3 4 6 7 9] (12000, 8)
[0 1 2 3 4 6 7 9]
Temperature Initial value: 1.0
Temperature Final value: 2.1096439361572266
[0 1 2 3 4 5 7 9] (12000, 8)
[0 1 2 3 4 5 7 9]
Temperature Initial value: 1.0
Temperature Final value: 2.1665432453155518
[0 1 2 3 4 5 6 9] (12000, 8)
[0 1 2 3 4 5 6 9]
Temperature Initial value: 1.0
Temperature Final value: 2.1177663803100586
[0 1 2 3 4 5 6 7] (12000, 

In [None]:
temp_val 

[<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.3371341>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.0819237>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.139571>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.1890314>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=1.9953128>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.109644>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.1665432>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.1177664>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.287348>]

In [None]:
temp_val = [2.3371341,2.0819237,2.139571,2.1890314,1.9953128,2.109644,2.166543,2.1177664,2.287348]

**Method 1**
---



In [None]:
# labels --> gives the non anomalous classes ex: here labels are 0 1 2 3 4 5 6 7 9

In [None]:
from tqdm import tqdm

entropy_in = 0
count_entr_in = 0
avg_in = [0,0,0,0,0,0,0,0,0]
count_in = [0,0,0,0,0,0,0,0,0]

entropy_out = 0
count_entr_out = 0
avg_out = [0,0,0,0,0,0,0,0,0]
count_out = [0,0,0,0,0,0,0,0,0]

for img, lbl in tqdm(zip(val_imgs8, val_lbl8)):
  img = img.reshape([-1, 32, 32, 3])

  for i,anmls_lbl in enumerate(labels):
    if not anmls_lbl == lbl:
      logits = model_dict[i](img)
      logits = tf.math.divide(logits, temp_val[i])
      pred = tf.nn.softmax(logits)
      max_val = np.max(pred)
      avg_in[i] = avg_in[i] + max_val
      count_in[i] = count_in[i] + 1

      entrpy = entropy(pred[0], base=len(pred[0]))
      entropy_in = entropy_in + entrpy
      count_entr_in = count_entr_in + 1 

    if anmls_lbl == lbl:
      logits = model_dict[i](img)
      logits = tf.math.divide(logits, temp_val[i])
      pred = tf.nn.softmax(logits)
      max_val = np.max(pred)
      avg_out[i] = avg_out[i] + max_val
      count_out[i] = count_out[i] + 1

      entrpy = entropy(pred[0], base=len(pred[0]))
      entropy_out = entropy_out + entrpy
      count_entr_out = count_entr_out + 1 


13500it [18:32, 12.13it/s]


In [None]:
classifier_avg_in = [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]
classifier_avg_out = [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]
for i in range(9):
  classifier_avg_in[i] = avg_in[i]/count_in[i]
  classifier_avg_out[i] = avg_out[i]/count_out[i]

treshold_value_in = 0.0
treshold_value_out = 0.0

for i in range(9):
  treshold_value_in = treshold_value_in + classifier_avg_in[i]
  treshold_value_out = treshold_value_out + classifier_avg_out[i]

treshold_value_in = treshold_value_in/len(classifier_avg_in)
treshold_value_out = treshold_value_out/len(classifier_avg_out)


In [None]:
print(classifier_avg_in)
print(classifier_avg_out)

print(treshold_value_in)
print(treshold_value_out)

entropy_ref_in_8 = entropy_in/count_entr_in
entropy_ref_out_8 = entropy_out/ count_entr_out

print(entropy_ref_in_8)
print(entropy_ref_out_8)

[0.7447037442798415, 0.7135843219694992, 0.751141863618046, 0.7637083080895245, 0.7566406482694049, 0.7498043323382735, 0.7194110998436809, 0.718959378151844, 0.7170880638050536]
[0.6800563244521618, 0.843279836277167, 0.5841164163748424, 0.6318720944325129, 0.5703352094391981, 0.5816669312020143, 0.5541618521511554, 0.6039093466500441, 0.7148440079589685]
0.7372268622627964
0.6404713354375627
0.35079637161983535
0.46113486762898714


In [None]:
open_file = open(os.path.join('reference_vector_in_out.pkl'), "wb")
pickle.dump((classifier_avg_in,classifier_avg_out,entropy_ref_in_8,entropy_ref_out_8), open_file)

In [None]:
open_file = open(os.path.join('reference_vector_in_out.pkl'), "rb")
ref_vector_in_8,ref_vector_out_8,entropy_ref_in_8,entropy_ref_out_8 = pickle.load(open_file)

In [None]:
max_sm_all_wt_8 = []
ood = 0
ind= 1
for data, label in tqdm(zip(test_imgs, test_lbls)):

  entr = 0
  img = data.reshape([-1, 32, 32, 3])
  prediction_sftmx = []
  prediction_lbl = []
  
  if label[0] == 8:
    lbl = ood
  else:
    lbl = ind
  
  for i,anmls_lbl in enumerate(labels):
    logits = model_dict[i](img)
    logits = tf.math.divide(logits, temp_val[i])
    pred = tf.nn.softmax(logits)

    prediction_sftmx.append(np.max(pred))

    pred_ind = tf.argmax(pred[0]).numpy()
    labels_removed_anmls = np.delete(labels,0)
    pred_lbl = labels_removed_anmls[pred_ind]   ### reverse mapping the actual label

    prediction_lbl.append(pred_lbl)

    entr = entr + entropy(pred[0], base=len(pred[0]))

  max_sm_all_wt_8.append([prediction_sftmx,prediction_lbl,label[0],lbl,entr/len(labels)])

0it [00:00, ?it/s]

In [None]:
open_file = open(os.path.join('predictions_on_test.pkl'), "wb")
pickle.dump(max_sm_all_wt_8, open_file)

In [None]:
open_file = open(os.path.join('predictions_on_test.pkl'), "rb")
max_sm_all_wt_8 = pickle.load(open_file)

euclidean and entroy (sim score + euclidean id/od)

In [None]:
from scipy.spatial import distance

id = 1
ood = 0
sim_score_in =[]
sim_score_out = []
sim_score_entr_in =[]
sim_score_entr_out = []
sim_score_add = []
y_true = []
y_pred=[]
y_entr_pred=[]
y_true_ood = []

for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_8):

  #print(avg_entr, max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood)
  dist_in = distance.euclidean(max_pred_each_cls,ref_vector_in_8)
  dist_out = distance.euclidean(max_pred_each_cls,ref_vector_out_8)

  dist_entr_in = distance.euclidean(avg_entr,entropy_ref_in_8)
  dist_entr_out = distance.euclidean(avg_entr,entropy_ref_out_8)
  
  sim_in = 1 / (1 + dist_in)
  sim_out = 1/ (1 + dist_out)
  sim_add = sim_in + sim_out

  sim_entr_in = 1 / (1 + dist_entr_in)
  sim_entr_out = 1/ (1 + dist_entr_out)

  sim_score_in.append(sim_in)
  sim_score_out.append(sim_out)
  sim_score_entr_in.append(sim_entr_in)
  sim_score_entr_out.append(sim_entr_out)
  sim_score_add.append(sim_add)

  if sim_in > sim_out:
    prd = id 
  else:
    prd = ood 

  if sim_entr_in > sim_entr_out:
    prd_e = id
  else:
    prd_e = ood 
  
  if lbl_id_or_ood == 1:
    label_true_ood = 0
  elif lbl_id_or_ood ==0:
    label_true_ood = 1

  
  y_pred.append(prd)
  y_entr_pred.append(prd_e)
  y_true.append(lbl_id_or_ood)
  y_true_ood.append(label_true_ood)

100%|██████████| 10000/10000 [00:01<00:00, 6272.92it/s]


In [None]:
from sklearn.metrics import roc_auc_score  ##### weighted SVM
print('Auroc with prediction as ID or OOD  ',roc_auc_score(y_true, y_pred))
print('Auroc prediction with sim score to in distribution only  :',roc_auc_score(y_true,sim_score_in))
print('Auroc prediction with sim score to ood distribution only  :',roc_auc_score(y_true_ood,sim_score_out))

print('\nAuroc prediction with sim added score to for in and ood distribution only  :',roc_auc_score(y_true_ood,sim_score_add))

print('\nAuroc with prediction as ID or OOD Entropy ',roc_auc_score(y_true, y_entr_pred))
print('Auroc prediction with sim score to in distribution only Entropy  :',roc_auc_score(y_true,sim_score_entr_in))
print('Auroc prediction with sim score to out distribution only Entropy  :',roc_auc_score(y_true_ood,sim_score_entr_out))


Auroc with prediction as ID or OOD   0.538888888888889
Auroc prediction with sim score to in distribution only  : 0.47915588888888894
Auroc prediction with sim score to ood distribution only  : 0.5682364444444445

Auroc prediction with sim added score to for in and ood distribution only  : 0.5568879999999999

Auroc with prediction as ID or OOD Entropy  0.5389999999999999
Auroc prediction with sim score to in distribution only Entropy  : 0.42880544444444446
Auroc prediction with sim score to out distribution only Entropy  : 0.5808275555555555


Decision Rule 

In [None]:
############################### simple decision rule , not taloinmg more than 7 or 8 just took majority prediction ##################

id = 1
ood = 0
y_true = []
y_pred=[]

for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_8):
  count = Counter(pred_lbl_each_cls)
  most_common= count.most_common(1)[0][0]

  if most_common == label_actual:
    pred = id
  else:
    pred = ood 
  y_pred.append(pred)
  y_true.append(lbl_id_or_ood)

print(' Auroc with simple decision rule about majority_:  ',roc_auc_score(y_true, y_pred))
  

100%|██████████| 10000/10000 [00:00<00:00, 152500.18it/s]

 Auroc with simple decision rule about majority_:   0.7272777777777778





In [None]:
############################### get the prediction ##################

id = 1
ood = 0
y_true = []
y_pred=[]

for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_8):

  dist_in = distance.euclidean(max_pred_each_cls,ref_vector_in_8)
  dist_out = distance.euclidean(max_pred_each_cls,ref_vector_out_8)

  sim_in = 1 / (1 + dist_in)
  sim_out = 1/ (1 + dist_out)

  sim_score(sim_in)

  count = Counter(pred_lbl_each_cls)
  most_common= count.most_common(1)[0][0]

  if most_common == label_actual:
    pred = id
  else:
    pred = ood 
  y_pred.append(pred)
  y_true.append(lbl_id_or_ood)

print(' Auroc with simple decision rule about majority_:  ',roc_auc_score(y_true, y_pred))
  

100%|██████████| 10000/10000 [00:00<00:00, 36508.81it/s]

 Auroc with simple decision rule about majority_:   0.40355555555555556





In [None]:
y_true_dec_rule = []
y_pred_mr_than8_ = []
y_pred_mr_than7_ = []
for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_8):
  values, counts = np.unique(pred_lbl_each_cls, return_counts=True)

  if np.max(counts) >= 7:
    y_pred_mr_than7_.append(id)
  else:
    y_pred_mr_than7_.append(ood)

  if np.max(counts) >= 8:
    y_pred_mr_than8_.append(id)
  else:
    y_pred_mr_than8_.append(ood)

  if label_actual ==8:
    y_true_dec_rule.append(ood)
  else:
    y_true_dec_rule.append(id)

print('\nAuroc for decision rule > 7 ',roc_auc_score(y_true_dec_rule,y_pred_mr_than7_))
print('Auroc for decision rule > 8 ',roc_auc_score(y_true_dec_rule,y_pred_mr_than8_))

100%|██████████| 10000/10000 [00:00<00:00, 24985.65it/s]


Auroc for decision rule > 7  0.40355555555555556
Auroc for decision rule > 8  0.39522222222222225





In [None]:
sim_score_in =[]
sim_score_entr_in =[]
y_true_ = []
y_pred_mr_than7_ = []
y_pred_mr_than8_ = []
y_entr_pred_mr_than7_ = []
y_entr_pred_mr_than8_ = []

for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_8):

  # print(avg_entr, max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood)
  dist_in = distance.euclidean(max_pred_each_cls,ref_vector_in_8)
  dist_out = distance.euclidean(max_pred_each_cls,ref_vector_out_8)

  dist_entr_in = distance.euclidean(avg_entr,entropy_ref_in_8)
  dist_entr_out = distance.euclidean(avg_entr,entropy_ref_out_8)
  
  sim_in = 1 / (1 + dist_in)
  sim_out = 1/ (1 + dist_out)

  sim_entr_in = 1 / (1 + dist_entr_in)
  sim_entr_out = 1/ (1 + dist_entr_out)

  sim_score_in.append(sim_in)
  sim_score_entr_in.append(sim_entr_in)

  values, counts = np.unique(pred_lbl_each_cls, return_counts=True)
#############################################
  if sim_in > sim_out:
    if  np.max(counts) >=7:
      y_pred_mr_than7_.append(id)
    else:
      y_pred_mr_than7_.append(ood)
  else:
    y_pred_mr_than7_.append(ood)
###############################################
  if sim_in > sim_out:
    if  np.max(counts) >=8:
      y_pred_mr_than8_.append(id)
    else:
      y_pred_mr_than8_.append(ood)
  else:
    y_pred_mr_than8_.append(ood)
##############################################
  if sim_entr_in > sim_entr_out:  
    if  np.max(counts) >=7:
      y_entr_pred_mr_than7_.append(id)
    else:
      y_entr_pred_mr_than7_.append(ood)
  else:
    y_entr_pred_mr_than7_.append(ood )
###############################################
  if sim_entr_in > sim_entr_out:  
    if  np.max(counts) >=8:
      y_entr_pred_mr_than8_.append(id)
    else:
      y_entr_pred_mr_than8_.append(ood)
  else:
    y_entr_pred_mr_than8_.append(ood )

  if label_actual == 8:
    y_true_.append(ood)
  else:
    y_true_.append(id)


100%|██████████| 10000/10000 [00:01<00:00, 7960.82it/s]


In [None]:
print('Decision Rule(>=7) + Reference Vector ',roc_auc_score(y_true_,y_pred_mr_than7_))
print('Decision Rule(>=8) + Reference Vector ',roc_auc_score(y_true_,y_pred_mr_than8_))
print('Decision Rule(>=7) + Entropy Reference Value ',roc_auc_score(y_true_,y_entr_pred_mr_than7_))
print('Decision Rule(>=8) + Entropy Reference Value ',roc_auc_score(y_true_,y_entr_pred_mr_than8_))

Decision Rule(>=7) + Reference Vector  0.42861111111111105
Decision Rule(>=8) + Reference Vector  0.40655555555555556
Decision Rule(>=7) + Entropy Reference Value  0.4568888888888889
Decision Rule(>=8) + Entropy Reference Value  0.4261666666666667


**Method 2**
---






In [None]:
##########################  after appying temperature scaling and softmax  ###########################################
########################## the y_pred used is In + ood distribution for the ensemble classifier ############################
id = 1
ood = 0

new_dataset = []

for img, label in tqdm(zip(val_imgs8, val_lbl8)):
    img = img.reshape([-1,32,32,3])
    for i,lbl in enumerate(labels):
      if label == lbl:
        new_label = ood
      else :
        new_label = id

      m = model_dict[i]
      y_p = m.predict(img)
      temp_new_y_pred = temp_scaling(y_p,temp_val[i])
      new_softmax_y_pred = models[i].apply_softmax(temp_new_y_pred) 
      new_softmax_y_pred = np.array(new_softmax_y_pred).tolist()  
      new_softmax_y_pred = new_softmax_y_pred[0] 
      new_dataset.append([new_softmax_y_pred,new_label])
      

0it [00:00, ?it/s]



13500it [1:21:10,  2.77it/s]


In [None]:
df = pd.DataFrame(new_dataset, columns=["predictions", "label"])
predictions = df['predictions'].to_numpy()
new_labels = df['label'].to_numpy()

In [None]:
df.to_pickle("predictions_dataset.pkl")

In [None]:
df = pd.read_pickle('predictions_dataset.pkl')
df

Unnamed: 0,predictions,label
0,"[0.7514774799346924, 0.07023820281028748, 0.00...",1
1,"[0.02117880992591381, 0.0002242798072984442, 7...",0
2,"[0.02842085063457489, 0.045984990894794464, 0....",1
3,"[0.5566503405570984, 0.07701873779296875, 0.19...",1
4,"[0.009021895006299019, 0.8602408766746521, 0.0...",1
...,...,...
121495,"[0.01587953418493271, 0.020534485578536987, 0....",0
121496,"[0.00024105727788992226, 9.990726539399475e-05...",1
121497,"[0.013296177610754967, 0.001057616900652647, 0...",1
121498,"[0.016943851485848427, 0.002913502510637045, 0...",1


In [None]:
from collections import Counter
Counter(new_labels)

Counter({0: 13500, 1: 108000})

In [None]:
in_data = []
out_data = []

for i in range(len(predictions)):
  if new_labels[i]== 1:
    in_data.append([predictions[i],new_labels[i]])
  else:
    out_data.append([predictions[i],new_labels[i]])

print(len(in_data),len(out_data))

# import random 
# in_data = random.sample(in_data,(len(out_data)))
# print(len(in_data))

108000 13500


In [None]:
in_data[1]

In [None]:
input = []
labels = []
for idata in in_data:
  input.append(idata[0])
  labels.append(idata[1])
  

for odata in out_data:
  input.append(odata[0])
  labels.append(odata[1])
# print(input,labels)
print(len(input),len(labels))

121500 121500


In [None]:
svm_train_ip, svm_test_ip, svm_train_lb, svm_test_lb = train_test_split(input, labels, test_size=0.20,stratify = labels)
print(len(svm_train_ip),len(svm_test_ip),len(svm_train_lb))

97200 24300 97200


In [None]:
from sklearn.svm import SVC
model = SVC(gamma='scale')

In [None]:
from sklearn.utils.class_weight import compute_class_weight
class_wts = compute_class_weight('balanced', np.unique(svm_train_lb), svm_train_lb)
class_wts ={0:class_wts[0], 1:class_wts[1]}
class_wts

{0: 4.5, 1: 0.5625}

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
clf = make_pipeline(StandardScaler(), SVC(gamma='auto',class_weight=class_wts))
clf.fit(svm_train_ip, svm_train_lb)

Pipeline(memory=None,
         steps=[('standardscaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('svc',
                 SVC(C=1.0, break_ties=False, cache_size=200,
                     class_weight={0: 4.5, 1: 0.5625}, coef0=0.0,
                     decision_function_shape='ovr', degree=3, gamma='auto',
                     kernel='rbf', max_iter=-1, probability=False,
                     random_state=None, shrinking=True, tol=0.001,
                     verbose=False))],
         verbose=False)

In [None]:
pred = clf.predict(svm_test_ip)
len(pred) ### svm fitted on whole data with weights

24300

In [None]:
clf.score(svm_test_ip,svm_test_lb) # with equal classes score is 82  # with weighted classes score is 92

0.5922222222222222

In [None]:
y_true = []
y_pred = []
for img, label in tqdm(zip(test_imgs, test_lbls)):
    img= img.reshape([-1,32,32,3])
    
    svm_output =[]
    for i in range(0,9):
      m = model_dict[i]
      y_p = m.predict(img)
      temp_new_y_pred = temp_scaling(y_p,temp_val[i])
      new_softmax_y_pred = models[i].apply_softmax(temp_new_y_pred) 
      new_softmax_y_pred = np.array(new_softmax_y_pred).tolist()  
      new_softmax_y_pred = np.array(new_softmax_y_pred[0]).reshape([1,-1])
      # print(new_softmax_y_pred)
      a = clf.predict(new_softmax_y_pred)
      # print(a)

      svm_output.append(a[0])
    # print(svm_output)
 
    a = Counter(list(svm_output))
    b = a.most_common()[0][0]
    # print(b)
    y_pred.append(b)
    if not label == 8:
      y_true.append(1)
    else:
      y_true.append(0)

10000it [1:05:25,  2.55it/s]


In [None]:
print(y_pred)
print(y_true)

[1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 

In [None]:
print(Counter(y_pred))
print(Counter(y_true))

Counter({1: 5363, 0: 4637})
Counter({1: 9000, 0: 1000})


In [None]:
from sklearn.metrics import roc_auc_score  ##### weighted SVM
roc_auc_score(y_true, y_pred)

0.5196111111111111

In [None]:
### predictions with weighted SVM
print(Counter(y_pred)) 
print(Counter(y_true))

Counter({1: 5363, 0: 4637})
Counter({1: 9000, 0: 1000})


In [None]:
df1= pd.DataFrame(y_pred, columns=["y_pred"])
df2= pd.DataFrame(y_true, columns=["y_true"])
df = pd.concat([df1, df2], axis=1)
df

Unnamed: 0,y_pred,y_true
0,1,1
1,0,0
2,0,0
3,0,1
4,1,1
...,...,...
9995,0,0
9996,0,1
9997,0,1
9998,0,1


In [None]:
df.to_pickle("final_predictions.pkl")
df = pd.read_pickle('final_predictions.pkl')

In [None]:
y_true = df['y_true'].to_numpy().tolist()
y_pred = df['y_pred'].to_numpy().tolist()
print(y_true)
print(y_pred)

[1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 

# 7 as anomaly

In [None]:
import os
print(os.getcwd())
os.chdir("/content/gdrive/My Drive/Colab Notebooks/Method 2/CIFAR/Leave one class out/leave 7/")
# os.chdir("/content/gdrive/My Drive/leave 7/")
print(os.listdir())

/content/gdrive/My Drive/Colab Notebooks/Method 2/CIFAR/Leave one class out/leave 8
['predictions_on_test.pkl', 'reference_vector_in_out.pkl']


In [None]:
# train_imgs7, val_imgs7 , train_lbl7, val_lbl7 = remove_anomalous_class(train_imgs,train_lbls,val_req=True,anomalous_class=[7])
# print(train_imgs7.shape, val_imgs7.shape , train_lbl7.shape, val_lbl7.shape)
# open_file = open(os.path.join('Training_data.pkl'), "wb")
# pickle.dump((train_imgs7, val_imgs7 , train_lbl7, val_lbl7 ), open_file)

(31500, 32, 32, 3) (13500, 32, 32, 3) (31500, 1) (13500, 1)


In [None]:
open_file = open(os.path.join('Training_data.pkl'), "rb")
(train_imgs7, val_imgs7 , train_lbl7, val_lbl7 )= pickle.load(open_file)
print(train_imgs7.shape, val_imgs7.shape , train_lbl7.shape, val_lbl7.shape)

(31500, 32, 32, 3) (13500, 32, 32, 3) (31500, 1) (13500, 1)


In [None]:
print( np.unique(train_lbl7),len(np.unique(train_lbl7)))

[0 1 2 3 4 5 6 8 9] 9


In [None]:
models = []
for i in range(len(np.unique(train_lbl7))):
  models.append(Model(name='7anomaly:classifier'+str(i)))

names = [m.name for m in models]
names

['7anomaly:classifier0',
 '7anomaly:classifier1',
 '7anomaly:classifier2',
 '7anomaly:classifier3',
 '7anomaly:classifier4',
 '7anomaly:classifier5',
 '7anomaly:classifier6',
 '7anomaly:classifier7',
 '7anomaly:classifier8']

In [None]:
def remap_labels(train_lbl,val_lbl,unique_lbl):
  print(unique_lbl)
  index_map = {unique_lbl[0]:0, unique_lbl[1]:1,unique_lbl[2]:2,unique_lbl[3]:3,unique_lbl[4]:4,unique_lbl[5]:5,unique_lbl[6]:6,unique_lbl[7]:7}
  train_lbl = np.array([index_map[int(x)] for x in train_lbl ])
  val_lbl = np.array([index_map[int(x)] for x in val_lbl ])

  return train_lbl,val_lbl

In [None]:
labels = np.unique(train_lbl7)
labels

array([0, 1, 2, 3, 4, 5, 6, 8, 9], dtype=int32)

In [None]:
# for indx,i in enumerate(labels):
#   # if i == 9:
      
#     train_imgs_wt_clas_i, train_lbls_wt_clas_i = remove_anomalous_class(train_imgs7,train_lbl7,val_req= False, anomalous_class = [i])
#     val_imgs_wt_clas_i, val_lbls__wt_clas_i = remove_anomalous_class(val_imgs7,val_lbl7,val_req= False, anomalous_class = [i])
#     unique_lbls = np.unique(train_lbls_wt_clas_i)
#     (train_lbls_wt_clas_i,val_lbls__wt_clas_i) = remap_labels(train_lbls_wt_clas_i,val_lbls__wt_clas_i,unique_lbls)
#     print(np.unique(train_lbls_wt_clas_i),np.unique(val_lbls__wt_clas_i))
#     models[indx].build_model(8)
#     models[indx].train(train_imgs_wt_clas_i, train_lbls_wt_clas_i, val_imgs_wt_clas_i, val_lbls__wt_clas_i)
#     models[indx].save_model('classifier'+str(indx))

In [None]:
train_loss = []
train_acc = []
val_loss = []
val_acc = []

for indx,i in enumerate(labels):
  # if i == 9:
      
    train_imgs_wt_clas_i, train_lbls_wt_clas_i = remove_anomalous_class(train_imgs7,train_lbl7,val_req= False, anomalous_class = [i])
    val_imgs_wt_clas_i, val_lbls__wt_clas_i = remove_anomalous_class(val_imgs7,val_lbl7,val_req= False, anomalous_class = [i])
    unique_lbls = np.unique(train_lbls_wt_clas_i)
    (train_lbls_wt_clas_i,val_lbls__wt_clas_i) = remap_labels(train_lbls_wt_clas_i,val_lbls__wt_clas_i,unique_lbls)
    # print(np.unique(train_lbls_wt_clas_i),np.unique(val_lbls__wt_clas_i))
    model = models[indx].load_model('classifier'+str(indx))
    train_ = model.evaluate(train_imgs_wt_clas_i, train_lbls_wt_clas_i)
    val_ = model.evaluate(val_imgs_wt_clas_i, val_lbls__wt_clas_i)
    train_loss.append(train_[0])
    train_acc.append(train_[1])
    val_loss.append(val_[0])
    val_acc.append(val_[1])

[1 2 3 4 5 6 8 9]
[0 2 3 4 5 6 8 9]
[0 1 3 4 5 6 8 9]
[0 1 2 4 5 6 8 9]
[0 1 2 3 5 6 8 9]
[0 1 2 3 4 6 8 9]
[0 1 2 3 4 5 8 9]
[0 1 2 3 4 5 6 9]
[0 1 2 3 4 5 6 8]


In [None]:
print('training accuracy average :',np.average(train_acc))
print('validation accuracy average :',np.average(val_acc))

training accuracy average : 0.8386706444952223
validation accuracy average : 0.8395555549197726


In [None]:
print(train_acc)
print(val_acc)

[0.8293928503990173, 0.8193214535713196, 0.8518571257591248, 0.883821427822113, 0.848642885684967, 0.8552142977714539, 0.8525000214576721, 0.7854285836219788, 0.8218571543693542]
[0.8297500014305115, 0.8227499723434448, 0.8462499976158142, 0.8825833201408386, 0.8484166860580444, 0.856083333492279, 0.8585833311080933, 0.7924166917800903, 0.8191666603088379]


In [None]:
classifiers = [models[i].load_model('classifier'+str(i)) for i in range(len(models))]

In [None]:
classifiers

In [None]:
layer_name_list = [classifier.layers[-2:][0].name for classifier in classifiers]
layer_name_list

['dense_1',
 'dense_3',
 'dense_5',
 'dense_7',
 'dense_9',
 'dense_11',
 'dense_13',
 'dense_15',
 'dense_17']

In [None]:
from keras import Model
model_dict = {}

for i in range(len(models)):
  model_output_i = classifiers[i].get_layer(layer_name_list[i]).output
  m0 = Model(inputs=classifiers[i].input, outputs=model_output_i)
  model_dict[i] = m0

In [None]:
y_pred = []
for i,label  in enumerate(labels):
  val_imgs_wt_clas_i, val_lbls_wt_clas_i = remove_anomalous_class(val_imgs7,val_lbl7,val_req= False, anomalous_class = [label])
  m = model_dict[i]
  print(i,np.unique(val_lbls_wt_clas_i))
  # m.summary()
  y_p = m.predict(val_imgs_wt_clas_i)
  y_pred.append(y_p)

0 [1 2 3 4 5 6 8 9]
1 [0 2 3 4 5 6 8 9]
2 [0 1 3 4 5 6 8 9]
3 [0 1 2 4 5 6 8 9]
4 [0 1 2 3 5 6 8 9]
5 [0 1 2 3 4 6 8 9]
6 [0 1 2 3 4 5 8 9]
7 [0 1 2 3 4 5 6 9]
8 [0 1 2 3 4 5 6 8]


In [None]:
len(y_pred)

9

In [None]:
temp_val = []
for i,label in enumerate(labels):
  # if i == 8:
    val_imgs_wt_clas_i, val_lbls_wt_clas_i = remove_anomalous_class(val_imgs7,val_lbl7,val_req= False, anomalous_class = [label])
    print(np.unique(val_lbls_wt_clas_i),y_pred[i].shape)

    unique_lbls = np.unique(val_lbls_wt_clas_i)
    (val_lbls_wt_clas_i,val_lbls_wt_clas_i) = remap_labels(val_lbls_wt_clas_i,val_lbls_wt_clas_i,unique_lbls)
  
    temp = temp_cal(y_pred[i],val_lbls_wt_clas_i,len(unique_lbls))
    temp_val.append(temp)

[1 2 3 4 5 6 8 9] (12000, 8)
[1 2 3 4 5 6 8 9]
Temperature Initial value: 1.0
Temperature Final value: 2.09006667137146
[0 2 3 4 5 6 8 9] (12000, 8)
[0 2 3 4 5 6 8 9]
Temperature Initial value: 1.0
Temperature Final value: 2.1367156505584717
[0 1 3 4 5 6 8 9] (12000, 8)
[0 1 3 4 5 6 8 9]
Temperature Initial value: 1.0
Temperature Final value: 2.2199161052703857
[0 1 2 4 5 6 8 9] (12000, 8)
[0 1 2 4 5 6 8 9]
Temperature Initial value: 1.0
Temperature Final value: 2.0220091342926025
[0 1 2 3 5 6 8 9] (12000, 8)
[0 1 2 3 5 6 8 9]
Temperature Initial value: 1.0
Temperature Final value: 1.9407479763031006
[0 1 2 3 4 6 8 9] (12000, 8)
[0 1 2 3 4 6 8 9]
Temperature Initial value: 1.0
Temperature Final value: 1.9514660835266113
[0 1 2 3 4 5 8 9] (12000, 8)
[0 1 2 3 4 5 8 9]
Temperature Initial value: 1.0
Temperature Final value: 2.233781099319458
[0 1 2 3 4 5 6 9] (12000, 8)
[0 1 2 3 4 5 6 9]
Temperature Initial value: 1.0
Temperature Final value: 2.175659418106079
[0 1 2 3 4 5 6 8] (12000, 8)

In [None]:
temp_val  ### it was list , idf tesnsor is displayed. the code hs run

[<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.0900667>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.1367157>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.219916>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.0220091>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=1.940748>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=1.9514661>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.233781>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.1756594>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.1226878>]

In [None]:
temp_val = [2.0900667,2.1367157,2.219916,2.0220091,1.940748,1.9514661,2.233781,2.1756594,2.1226878]

**Method 1**
---



In [None]:
# labels --> gives the non anomalous classes ex: here labels are 0 1 2 3 4 5 6 7 9

In [None]:
from tqdm import tqdm

entropy_in = 0
count_entr_in = 0
avg_in = [0,0,0,0,0,0,0,0,0]
count_in = [0,0,0,0,0,0,0,0,0]

entropy_out = 0
count_entr_out = 0
avg_out = [0,0,0,0,0,0,0,0,0]
count_out = [0,0,0,0,0,0,0,0,0]

for img, lbl in tqdm(zip(val_imgs7, val_lbl7)):
  img = img.reshape([-1, 32, 32, 3])

  for i,anmls_lbl in enumerate(labels):
    if not anmls_lbl == lbl:
      logits = model_dict[i](img)
      logits = tf.math.divide(logits, temp_val[i])
      pred = tf.nn.softmax(logits)
      max_val = np.max(pred)
      avg_in[i] = avg_in[i] + max_val
      count_in[i] = count_in[i] + 1

      entrpy = entropy(pred[0], base=len(pred[0]))
      entropy_in = entropy_in + entrpy
      count_entr_in = count_entr_in + 1 

    if anmls_lbl == lbl:
      logits = model_dict[i](img)
      logits = tf.math.divide(logits, temp_val[i])
      pred = tf.nn.softmax(logits)
      max_val = np.max(pred)
      avg_out[i] = avg_out[i] + max_val
      count_out[i] = count_out[i] + 1

      entrpy = entropy(pred[0], base=len(pred[0]))
      entropy_out = entropy_out + entrpy
      count_entr_out = count_entr_out + 1 


13500it [18:24, 12.23it/s]


In [None]:
classifier_avg_in = [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]
classifier_avg_out = [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]
for i in range(9):
  classifier_avg_in[i] = avg_in[i]/count_in[i]
  classifier_avg_out[i] = avg_out[i]/count_out[i]

treshold_value_in = 0.0
treshold_value_out = 0.0

for i in range(9):
  treshold_value_in = treshold_value_in + classifier_avg_in[i]
  treshold_value_out = treshold_value_out + classifier_avg_out[i]

treshold_value_in = treshold_value_in/len(classifier_avg_in)
treshold_value_out = treshold_value_out/len(classifier_avg_out)


In [None]:
print(classifier_avg_in)
print(classifier_avg_out)

print(treshold_value_in)
print(treshold_value_out)

entropy_ref_in_7 = entropy_in/count_entr_in
entropy_ref_out_7 = entropy_out/ count_entr_out

print(entropy_ref_in_7)
print(entropy_ref_out_7)

[0.7529683401448031, 0.7421929782852531, 0.7887351045732697, 0.8059035551585257, 0.7759936836026609, 0.7838684013336897, 0.7742825755787393, 0.7224755025232832, 0.7381705360586445]
[0.6538507265547912, 0.7024856757024924, 0.6088091837366422, 0.6724368843734264, 0.5627503734529018, 0.6280661062498888, 0.6005390967726707, 0.5482979671259721, 0.7053185583651066]
0.7649545196954298
0.6313949524815435
0.31825021373072365
0.4749488777462869


In [None]:
open_file = open(os.path.join('reference_vector_in_out.pkl'), "wb")
pickle.dump((classifier_avg_in,classifier_avg_out,entropy_ref_in_7,entropy_ref_out_7), open_file)

In [None]:
open_file = open(os.path.join('reference_vector_in_out.pkl'), "rb")
ref_vector_in_7,ref_vector_out_7,entropy_ref_in_7,entropy_ref_out_7 = pickle.load(open_file)

In [None]:
max_sm_all_wt_7 = []
ood = 0
ind= 1
for data, label in tqdm(zip(test_imgs, test_lbls)):

  entr = 0
  img = data.reshape([-1, 32, 32, 3])
  prediction_sftmx = []
  prediction_lbl = []
  
  if label[0] == 7:
    lbl = ood
  else:
    lbl = ind
  
  for i,anmls_lbl in enumerate(labels):
    logits = model_dict[i](img)
    logits = tf.math.divide(logits, temp_val[i])
    pred = tf.nn.softmax(logits)

    prediction_sftmx.append(np.max(pred))

    pred_ind = tf.argmax(pred[0]).numpy()
    labels_removed_anmls = np.delete(labels,0)
    pred_lbl = labels_removed_anmls[pred_ind]   ### reverse mapping the actual label

    prediction_lbl.append(pred_lbl)

    entr = entr + entropy(pred[0], base=len(pred[0]))

  max_sm_all_wt_7.append([prediction_sftmx,prediction_lbl,label[0],lbl,entr/len(labels)])

10000it [13:58, 11.93it/s]


In [None]:
open_file = open(os.path.join('predictions_on_test.pkl'), "wb")
pickle.dump(max_sm_all_wt_7, open_file)

In [None]:
open_file = open(os.path.join('predictions_on_test.pkl'), "rb")
max_sm_all_wt_7 = pickle.load(open_file)

euclidean and entroy (sim score + euclidean id/od)

In [None]:
from scipy.spatial import distance

id = 1
ood = 0
sim_score_in =[]
sim_score_out = []
sim_score_entr_in =[]
sim_score_entr_out = []
sim_score_add = []
y_true = []
y_pred=[]
y_entr_pred=[]
y_true_ood = []

for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_7):

  #print(avg_entr, max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood)
  dist_in = distance.euclidean(max_pred_each_cls,ref_vector_in_7)
  dist_out = distance.euclidean(max_pred_each_cls,ref_vector_out_7)

  dist_entr_in = distance.euclidean(avg_entr,entropy_ref_in_7)
  dist_entr_out = distance.euclidean(avg_entr,entropy_ref_out_7)
  
  sim_in = 1 / (1 + dist_in)
  sim_out = 1/ (1 + dist_out)
  sim_add = sim_in + sim_out

  sim_entr_in = 1 / (1 + dist_entr_in)
  sim_entr_out = 1/ (1 + dist_entr_out)

  sim_score_in.append(sim_in)
  sim_score_out.append(sim_out)
  sim_score_entr_in.append(sim_entr_in)
  sim_score_entr_out.append(sim_entr_out)
  sim_score_add.append(sim_add)

  if sim_in > sim_out:
    prd = id 
  else:
    prd = ood 

  if sim_entr_in > sim_entr_out:
    prd_e = id
  else:
    prd_e = ood 
  
  if lbl_id_or_ood == 1:
    label_true_ood = 0
  elif lbl_id_or_ood ==0:
    label_true_ood = 1

  
  y_pred.append(prd)
  y_entr_pred.append(prd_e)
  y_true.append(lbl_id_or_ood)
  y_true_ood.append(label_true_ood)

100%|██████████| 10000/10000 [00:01<00:00, 6891.01it/s]


In [None]:
from sklearn.metrics import roc_auc_score  ##### weighted SVM
print('Auroc with prediction as ID or OOD  ',roc_auc_score(y_true, y_pred))
print('Auroc prediction with sim score to in distribution only  :',roc_auc_score(y_true,sim_score_in))
print('Auroc prediction with sim score to ood distribution only  :',roc_auc_score(y_true_ood,sim_score_out))

print('\nAuroc prediction with sim added score to for in and ood distribution only  :',roc_auc_score(y_true_ood,sim_score_add))

print('\nAuroc with prediction as ID or OOD Entropy ',roc_auc_score(y_true, y_entr_pred))
print('Auroc prediction with sim score to in distribution only Entropy  :',roc_auc_score(y_true,sim_score_entr_in))
print('Auroc prediction with sim score to out distribution only Entropy  :',roc_auc_score(y_true_ood,sim_score_entr_out))


Auroc with prediction as ID or OOD   0.6815555555555556
Auroc prediction with sim score to in distribution only  : 0.6655147777777777
Auroc prediction with sim score to ood distribution only  : 0.6464668888888888

Auroc prediction with sim added score to for in and ood distribution only  : 0.5018665555555555

Auroc with prediction as ID or OOD Entropy  0.6964444444444444
Auroc prediction with sim score to in distribution only Entropy  : 0.610096
Auroc prediction with sim score to out distribution only Entropy  : 0.6870474444444444


Decision Rule 

In [None]:
############################### simple decision rule , not taloinmg more than 7 or 8 just took majority prediction ##################

id = 1
ood = 0
y_true = []
y_pred=[]

for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_7):
  count = Counter(pred_lbl_each_cls)
  most_common= count.most_common(1)[0][0]

  if most_common == label_actual:
    pred = id
  else:
    pred = ood 
  y_pred.append(pred)
  y_true.append(lbl_id_or_ood)

print(' Auroc with simple decision rule about majority_:  ',roc_auc_score(y_true, y_pred))
  

100%|██████████| 10000/10000 [00:00<00:00, 159263.66it/s]

 Auroc with simple decision rule about majority_:   0.7355





In [None]:
y_true_dec_rule = []
y_pred_mr_than8_ = []
y_pred_mr_than7_ = []
for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_7):
  values, counts = np.unique(pred_lbl_each_cls, return_counts=True)

  if np.max(counts) >= 7:
    y_pred_mr_than7_.append(id)
  else:
    y_pred_mr_than7_.append(ood)

  if np.max(counts) >= 8:
    y_pred_mr_than8_.append(id)
  else:
    y_pred_mr_than8_.append(ood)

  if label_actual == 7:
    y_true_dec_rule.append(ood)
  else:
    y_true_dec_rule.append(id)

print('\nAuroc for decision rule > 7 ',roc_auc_score(y_true_dec_rule,y_pred_mr_than7_))
print('Auroc for decision rule > 8 ',roc_auc_score(y_true_dec_rule,y_pred_mr_than8_))

100%|██████████| 10000/10000 [00:00<00:00, 19420.58it/s]


Auroc for decision rule > 7  0.6389444444444444
Auroc for decision rule > 8  0.5532777777777778





In [None]:
sim_score_in =[]
sim_score_entr_in =[]
y_true_ = []
y_pred_mr_than7_ = []
y_pred_mr_than8_ = []
y_entr_pred_mr_than7_ = []
y_entr_pred_mr_than8_ = []

for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_7):

  # print(avg_entr, max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood)
  dist_in = distance.euclidean(max_pred_each_cls,ref_vector_in_7)
  dist_out = distance.euclidean(max_pred_each_cls,ref_vector_out_7)

  dist_entr_in = distance.euclidean(avg_entr,entropy_ref_in_7)
  dist_entr_out = distance.euclidean(avg_entr,entropy_ref_out_7)
  
  sim_in = 1 / (1 + dist_in)
  sim_out = 1/ (1 + dist_out)

  sim_entr_in = 1 / (1 + dist_entr_in)
  sim_entr_out = 1/ (1 + dist_entr_out)

  sim_score_in.append(sim_in)
  sim_score_entr_in.append(sim_entr_in)

  values, counts = np.unique(pred_lbl_each_cls, return_counts=True)
#############################################
  if sim_in > sim_out:
    if  np.max(counts) >=7:
      y_pred_mr_than7_.append(id)
    else:
      y_pred_mr_than7_.append(ood)
  else:
    y_pred_mr_than7_.append(ood)
###############################################
  if sim_in > sim_out:
    if  np.max(counts) >=8:
      y_pred_mr_than8_.append(id)
    else:
      y_pred_mr_than8_.append(ood)
  else:
    y_pred_mr_than8_.append(ood)
##############################################
  if sim_entr_in > sim_entr_out:  
    if  np.max(counts) >=7:
      y_entr_pred_mr_than7_.append(id)
    else:
      y_entr_pred_mr_than7_.append(ood)
  else:
    y_entr_pred_mr_than7_.append(ood )
###############################################
  if sim_entr_in > sim_entr_out:  
    if  np.max(counts) >=8:
      y_entr_pred_mr_than8_.append(id)
    else:
      y_entr_pred_mr_than8_.append(ood)
  else:
    y_entr_pred_mr_than8_.append(ood )

  if label_actual == 7:
    y_true_.append(ood)
  else:
    y_true_.append(id)


100%|██████████| 10000/10000 [00:01<00:00, 5571.23it/s]


In [None]:
print('Decision Rule(>=7) + Reference Vector ',roc_auc_score(y_true_,y_pred_mr_than7_))
print('Decision Rule(>=8) + Reference Vector ',roc_auc_score(y_true_,y_pred_mr_than8_))
print('Decision Rule(>=7) + Entropy Reference Value ',roc_auc_score(y_true_,y_entr_pred_mr_than7_))
print('Decision Rule(>=8) + Entropy Reference Value ',roc_auc_score(y_true_,y_entr_pred_mr_than8_))

Decision Rule(>=7) + Reference Vector  0.6337222222222222
Decision Rule(>=8) + Reference Vector  0.5513333333333333
Decision Rule(>=7) + Entropy Reference Value  0.6278888888888889
Decision Rule(>=8) + Entropy Reference Value  0.5495555555555556


**Method 2**
---



Normal weighted SVM




In [None]:
#########################  after appying temperature scaling and softmax  ###########################################
######################### the y_pred used is In + ood distribution for the ensemble classifier ############################
id = 1
ood = 0

new_dataset = []

for img, label in tqdm(zip(val_imgs7, val_lbl7)):
    img = img.reshape([-1,32,32,3])
    for i,lbl in enumerate(labels):
      if label == lbl:
        new_label = ood
      else :
        new_label = id

      m = model_dict[i]
      y_p = m.predict(img)
      temp_new_y_pred = temp_scaling(y_p,temp_val[i])
      new_softmax_y_pred = models[i].apply_softmax(temp_new_y_pred) 
      new_softmax_y_pred = np.array(new_softmax_y_pred).tolist()  
      new_softmax_y_pred = new_softmax_y_pred[0] 
      new_dataset.append([new_softmax_y_pred,new_label])
      

0it [00:00, ?it/s]



13500it [1:06:44,  3.37it/s]


In [None]:
df = pd.DataFrame(new_dataset, columns=["predictions", "label"])
predictions = df['predictions'].to_numpy()
new_labels = df['label'].to_numpy()

In [None]:
df.to_pickle("predictions_dataset.pkl")

In [None]:
df = pd.read_pickle('predictions_dataset.pkl')
df

Unnamed: 0,predictions,label
0,"[0.0006662760279141366, 0.03042178601026535, 0...",1
1,"[0.0007441784837283194, 0.011668458580970764, ...",1
2,"[0.00011140484275529161, 4.954008909408003e-05...",1
3,"[2.0139041225775145e-05, 1.2311524187680334e-0...",1
4,"[0.0010511927539482713, 0.0001922359224408865,...",1
...,...,...
121495,"[0.009468583390116692, 0.001008165767416358, 0...",1
121496,"[0.007320940028876066, 0.0043786088936030865, ...",1
121497,"[0.0014624235918745399, 4.300913496990688e-05,...",1
121498,"[8.398467616643757e-05, 5.723839421989396e-05,...",1


In [None]:
from collections import Counter
Counter(new_labels)

Counter({0: 13500, 1: 108000})

In [None]:
in_data = []
out_data = []

for i in range(len(predictions)):
  if new_labels[i]== 1:
    in_data.append([predictions[i],new_labels[i]])
  else:
    out_data.append([predictions[i],new_labels[i]])

print(len(in_data),len(out_data))

# import random 
# in_data = random.sample(in_data,(len(out_data)))
# print(len(in_data))

108000 13500


In [None]:
in_data[1]

[[0.0007441784837283194,
  0.011668458580970764,
  0.01295829750597477,
  0.025754621252417564,
  0.00714450515806675,
  0.9409600496292114,
  0.0003947654040530324,
  0.00037502526538446546],
 1]

In [None]:
input = []
labels = []
for idata in in_data:
  input.append(idata[0])
  labels.append(idata[1])
  

for odata in out_data:
  input.append(odata[0])
  labels.append(odata[1])
# print(input,labels)
print(len(input),len(labels))

121500 121500


In [None]:
svm_train_ip, svm_test_ip, svm_train_lb, svm_test_lb = train_test_split(input, labels, test_size=0.20,stratify = labels)
print(len(svm_train_ip),len(svm_test_ip),len(svm_train_lb))

97200 24300 97200


In [None]:
from sklearn.svm import SVC
model = SVC(gamma='scale')

In [None]:
from sklearn.utils.class_weight import compute_class_weight
class_wts = compute_class_weight('balanced', np.unique(svm_train_lb), svm_train_lb)
class_wts ={0:class_wts[0], 1:class_wts[1]}
class_wts

{0: 4.5, 1: 0.5625}

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
clf = make_pipeline(StandardScaler(), SVC(gamma='auto',class_weight=class_wts))
clf.fit(svm_train_ip, svm_train_lb)

Pipeline(memory=None,
         steps=[('standardscaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('svc',
                 SVC(C=1.0, break_ties=False, cache_size=200,
                     class_weight={0: 4.5, 1: 0.5625}, coef0=0.0,
                     decision_function_shape='ovr', degree=3, gamma='auto',
                     kernel='rbf', max_iter=-1, probability=False,
                     random_state=None, shrinking=True, tol=0.001,
                     verbose=False))],
         verbose=False)

In [None]:
pred = clf.predict(svm_test_ip)
len(pred) ### svm fitted on whole data with weights

24300

In [None]:
clf.score(svm_test_ip,svm_test_lb) 

0.6455967078189301

In [None]:
y_true = []
y_pred = []
for img, label in tqdm(zip(test_imgs, test_lbls)):
    img= img.reshape([-1,32,32,3])
    
    svm_output =[]
    for i in range(0,9):
      m = model_dict[i]
      y_p = m.predict(img)
      temp_new_y_pred = temp_scaling(y_p,temp_val[i])
      new_softmax_y_pred = models[i].apply_softmax(temp_new_y_pred) 
      new_softmax_y_pred = np.array(new_softmax_y_pred).tolist()  
      new_softmax_y_pred = np.array(new_softmax_y_pred[0]).reshape([1,-1])
      # print(new_softmax_y_pred)
      a = clf.predict(new_softmax_y_pred)
      # print(a)

      svm_output.append(a[0])
    # print(svm_output)
 
    a = Counter(list(svm_output))
    b = a.most_common()[0][0]
    # print(b)
    y_pred.append(b)
    if not label == 7:
      y_true.append(1)
    else:
      y_true.append(0)

10000it [53:36,  3.11it/s]


In [None]:
print(y_pred)
print(y_true)

[1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 

In [None]:
print(Counter(y_pred))
print(Counter(y_true))

Counter({1: 5845, 0: 4155})
Counter({1: 9000, 0: 1000})


In [None]:
from sklearn.metrics import roc_auc_score  ##### weighted SVM
roc_auc_score(y_true, y_pred)

0.7236111111111112

In [None]:
df1= pd.DataFrame(y_pred, columns=["y_pred"])
df2= pd.DataFrame(y_true, columns=["y_true"])
df = pd.concat([df1, df2], axis=1)
df

Unnamed: 0,y_pred,y_true
0,1,1
1,1,1
2,1,1
3,1,1
4,1,1
...,...,...
9995,0,1
9996,0,1
9997,1,1
9998,0,1


In [None]:
df.to_pickle("final_predictions.pkl")
df = pd.read_pickle('final_predictions.pkl')

In [None]:
y_true = df['y_true'].to_numpy().tolist()
y_pred = df['y_pred'].to_numpy().tolist()
print(y_true)
print(y_pred)

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 

# 6 as anomaly

In [None]:
import os
print(os.getcwd())
os.chdir("/content/gdrive/My Drive/Colab Notebooks/Method 2/CIFAR/Leave one class out/leave 6/")
# os.chdir("/content/gdrive/My Drive/leave 6/")
print(os.listdir())

/content/gdrive/My Drive/Colab Notebooks/Method 2/CIFAR/Leave one class out/leave 6
['predictions_on_test.pkl', 'reference_vector_in_out.pkl']


In [None]:
# train_imgs6, val_imgs6 , train_lbl6, val_lbl6 = remove_anomalous_class(train_imgs,train_lbls,val_req=True,anomalous_class=[6])
# print(train_imgs6.shape, val_imgs6.shape , train_lbl6.shape, val_lbl6.shape)
# open_file = open(os.path.join('Training_data.pkl'), "wb")
# pickle.dump((train_imgs6, val_imgs6 , train_lbl6, val_lbl6 ), open_file)

(31500, 32, 32, 3) (13500, 32, 32, 3) (31500, 1) (13500, 1)


In [None]:
open_file = open(os.path.join('Training_data.pkl'), "rb")
(train_imgs6, val_imgs6 , train_lbl6, val_lbl6 )= pickle.load(open_file)
print(train_imgs6.shape, val_imgs6.shape , train_lbl6.shape, val_lbl6.shape)

(31500, 32, 32, 3) (13500, 32, 32, 3) (31500, 1) (13500, 1)


In [None]:
print( np.unique(train_lbl6),len(np.unique(train_lbl6)))

[0 1 2 3 4 5 7 8 9] 9


In [None]:
models = []
for i in range(len(np.unique(train_lbl6))):
  models.append(Model(name='6anomaly:classifier'+str(i)))

names = [m.name for m in models]
names

['6anomaly:classifier0',
 '6anomaly:classifier1',
 '6anomaly:classifier2',
 '6anomaly:classifier3',
 '6anomaly:classifier4',
 '6anomaly:classifier5',
 '6anomaly:classifier6',
 '6anomaly:classifier7',
 '6anomaly:classifier8']

In [None]:
def remap_labels(train_lbl,val_lbl,unique_lbl):
  print(unique_lbl)
  index_map = {unique_lbl[0]:0, unique_lbl[1]:1,unique_lbl[2]:2,unique_lbl[3]:3,unique_lbl[4]:4,unique_lbl[5]:5,unique_lbl[6]:6,unique_lbl[7]:7}
  train_lbl = np.array([index_map[int(x)] for x in train_lbl ])
  val_lbl = np.array([index_map[int(x)] for x in val_lbl ])

  return train_lbl,val_lbl

In [None]:
labels = np.unique(train_lbl6)
labels

array([0, 1, 2, 3, 4, 5, 7, 8, 9], dtype=int32)

In [None]:
# for indx,i in enumerate(labels):
#   # if i == 9:
      
#     train_imgs_wt_clas_i, train_lbls_wt_clas_i = remove_anomalous_class(train_imgs6,train_lbl6,val_req= False, anomalous_class = [i])
#     val_imgs_wt_clas_i, val_lbls__wt_clas_i = remove_anomalous_class(val_imgs6,val_lbl6,val_req= False, anomalous_class = [i])
#     unique_lbls = np.unique(train_lbls_wt_clas_i)
#     (train_lbls_wt_clas_i,val_lbls__wt_clas_i) = remap_labels(train_lbls_wt_clas_i,val_lbls__wt_clas_i,unique_lbls)
#     print(np.unique(train_lbls_wt_clas_i),np.unique(val_lbls__wt_clas_i))
#     models[indx].build_model(8)
#     models[indx].train(train_imgs_wt_clas_i, train_lbls_wt_clas_i, val_imgs_wt_clas_i, val_lbls__wt_clas_i)
#     models[indx].save_model('classifier'+str(indx))

In [None]:
train_loss = []
train_acc = []
val_loss = []
val_acc = []

for indx,i in enumerate(labels):
  # if i == 9:
      
    train_imgs_wt_clas_i, train_lbls_wt_clas_i = remove_anomalous_class(train_imgs6,train_lbl6,val_req= False, anomalous_class = [i])
    val_imgs_wt_clas_i, val_lbls__wt_clas_i = remove_anomalous_class(val_imgs6,val_lbl6,val_req= False, anomalous_class = [i])
    unique_lbls = np.unique(train_lbls_wt_clas_i)
    (train_lbls_wt_clas_i,val_lbls__wt_clas_i) = remap_labels(train_lbls_wt_clas_i,val_lbls__wt_clas_i,unique_lbls)
    # print(np.unique(train_lbls_wt_clas_i),np.unique(val_lbls__wt_clas_i))
    model = models[indx].load_model('classifier'+str(indx))
    train_ = model.evaluate(train_imgs_wt_clas_i, train_lbls_wt_clas_i)
    val_ = model.evaluate(val_imgs_wt_clas_i, val_lbls__wt_clas_i)
    train_loss.append(train_[0])
    train_acc.append(train_[1])
    val_loss.append(val_[0])
    val_acc.append(val_[1])

[1 2 3 4 5 7 8 9]
[0 2 3 4 5 7 8 9]
[0 1 3 4 5 7 8 9]
[0 1 2 4 5 7 8 9]
[0 1 2 3 5 7 8 9]
[0 1 2 3 4 7 8 9]
[0 1 2 3 4 5 8 9]
[0 1 2 3 4 5 7 9]
[0 1 2 3 4 5 7 8]


In [None]:
print('training accuracy average :',np.average(train_acc))
print('validation accuracy average :',np.average(val_acc))

training accuracy average : 0.8417261838912964
validation accuracy average : 0.840444438987308


In [None]:
print(train_acc)
print(val_acc)

In [None]:
classifiers = [models[i].load_model('classifier'+str(i)) for i in range(len(models))]

In [None]:
classifiers

[<tensorflow.python.keras.engine.sequential.Sequential at 0x7f731a04e9b0>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7f72c02471d0>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7f72b086cdd8>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7f72a5ddd208>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7f72b008f470>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7f72a4a399e8>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7f72a42bb710>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7f729b65ea58>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7f729ac78240>]

In [None]:
layer_name_list = [classifier.layers[-2:][0].name for classifier in classifiers]
layer_name_list

['dense_1',
 'dense_3',
 'dense_5',
 'dense_7',
 'dense_9',
 'dense_11',
 'dense_13',
 'dense_15',
 'dense_17']

In [None]:
from keras import Model
model_dict = {}

for i in range(len(models)):
  model_output_i = classifiers[i].get_layer(layer_name_list[i]).output
  m0 = Model(inputs=classifiers[i].input, outputs=model_output_i)
  model_dict[i] = m0

In [None]:
y_pred = []
for i,label  in enumerate(labels):
  val_imgs_wt_clas_i, val_lbls_wt_clas_i = remove_anomalous_class(val_imgs6,val_lbl6,val_req= False, anomalous_class = [label])
  m = model_dict[i]
  print(i,np.unique(val_lbls_wt_clas_i))
  # m.summary()
  y_p = m.predict(val_imgs_wt_clas_i)
  y_pred.append(y_p)

0 [1 2 3 4 5 7 8 9]
1 [0 2 3 4 5 7 8 9]
2 [0 1 3 4 5 7 8 9]
3 [0 1 2 4 5 7 8 9]
4 [0 1 2 3 5 7 8 9]
5 [0 1 2 3 4 7 8 9]
6 [0 1 2 3 4 5 8 9]
7 [0 1 2 3 4 5 7 9]
8 [0 1 2 3 4 5 7 8]


In [None]:
len(y_pred)

9

In [None]:
temp_val = []
for i,label in enumerate(labels):
  # if i == 8:
    val_imgs_wt_clas_i, val_lbls_wt_clas_i = remove_anomalous_class(val_imgs6,val_lbl6,val_req= False, anomalous_class = [label])
    print(np.unique(val_lbls_wt_clas_i),y_pred[i].shape)

    unique_lbls = np.unique(val_lbls_wt_clas_i)
    (val_lbls_wt_clas_i,val_lbls_wt_clas_i) = remap_labels(val_lbls_wt_clas_i,val_lbls_wt_clas_i,unique_lbls)
  
    temp = temp_cal(y_pred[i],val_lbls_wt_clas_i,len(unique_lbls))
    temp_val.append(temp)

[1 2 3 4 5 7 8 9] (12000, 8)
[1 2 3 4 5 7 8 9]
Temperature Initial value: 1.0
Temperature Final value: 1.6970902681350708
[0 2 3 4 5 7 8 9] (12000, 8)
[0 2 3 4 5 7 8 9]
Temperature Initial value: 1.0
Temperature Final value: 1.4235918521881104
[0 1 3 4 5 7 8 9] (12000, 8)
[0 1 3 4 5 7 8 9]
Temperature Initial value: 1.0
Temperature Final value: 1.6054538488388062
[0 1 2 4 5 7 8 9] (12000, 8)
[0 1 2 4 5 7 8 9]
Temperature Initial value: 1.0
Temperature Final value: 1.486562967300415
[0 1 2 3 5 7 8 9] (12000, 8)
[0 1 2 3 5 7 8 9]
Temperature Initial value: 1.0
Temperature Final value: 1.6344951391220093
[0 1 2 3 4 7 8 9] (12000, 8)
[0 1 2 3 4 7 8 9]
Temperature Initial value: 1.0
Temperature Final value: 1.5588122606277466
[0 1 2 3 4 5 8 9] (12000, 8)
[0 1 2 3 4 5 8 9]
Temperature Initial value: 1.0
Temperature Final value: 1.542219638824463
[0 1 2 3 4 5 7 9] (12000, 8)
[0 1 2 3 4 5 7 9]
Temperature Initial value: 1.0
Temperature Final value: 1.3975123167037964
[0 1 2 3 4 5 7 8] (12000, 

In [None]:
temp_val = [1.6970903,1.4235919,1.6054538,1.486563,1.6344951,1.5588123,1.5422196,1.3975123,1.5239877]

**Method 1**
---



In [None]:
# labels --> gives the non anomalous classes ex: here labels are 0 1 2 3 4 5 6 7 9

In [None]:
from tqdm import tqdm

entropy_in = 0
count_entr_in = 0
avg_in = [0,0,0,0,0,0,0,0,0]
count_in = [0,0,0,0,0,0,0,0,0]

entropy_out = 0
count_entr_out = 0
avg_out = [0,0,0,0,0,0,0,0,0]
count_out = [0,0,0,0,0,0,0,0,0]

for img, lbl in tqdm(zip(val_imgs6, val_lbl6)):
  img = img.reshape([-1, 32, 32, 3])

  for i,anmls_lbl in enumerate(labels):
    if not anmls_lbl == lbl:
      logits = model_dict[i](img)
      logits = tf.math.divide(logits, temp_val[i])
      pred = tf.nn.softmax(logits)
      max_val = np.max(pred)
      avg_in[i] = avg_in[i] + max_val
      count_in[i] = count_in[i] + 1

      entrpy = entropy(pred[0], base=len(pred[0]))
      entropy_in = entropy_in + entrpy
      count_entr_in = count_entr_in + 1 

    if anmls_lbl == lbl:
      logits = model_dict[i](img)
      logits = tf.math.divide(logits, temp_val[i])
      pred = tf.nn.softmax(logits)
      max_val = np.max(pred)
      avg_out[i] = avg_out[i] + max_val
      count_out[i] = count_out[i] + 1

      entrpy = entropy(pred[0], base=len(pred[0]))
      entropy_out = entropy_out + entrpy
      count_entr_out = count_entr_out + 1 


13500it [17:51, 12.60it/s]


In [None]:
classifier_avg_in = [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]
classifier_avg_out = [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]
for i in range(9):
  classifier_avg_in[i] = avg_in[i]/count_in[i]
  classifier_avg_out[i] = avg_out[i]/count_out[i]

treshold_value_in = 0.0
treshold_value_out = 0.0

for i in range(9):
  treshold_value_in = treshold_value_in + classifier_avg_in[i]
  treshold_value_out = treshold_value_out + classifier_avg_out[i]

treshold_value_in = treshold_value_in/len(classifier_avg_in)
treshold_value_out = treshold_value_out/len(classifier_avg_out)


In [None]:
print(classifier_avg_in)
print(classifier_avg_out)

print(treshold_value_in)
print(treshold_value_out)

entropy_ref_in_6 = entropy_in/count_entr_in
entropy_ref_out_6 = entropy_out/ count_entr_out

print(entropy_ref_in_6)
print(entropy_ref_out_6)

[0.820683627144744, 0.8438607627016803, 0.8141445354570945, 0.8576260032244026, 0.807254978346328, 0.8685143183258672, 0.836962711221228, 0.847462523351113, 0.7952074745483696]
[0.7621131336490313, 0.7909757884045442, 0.632596829354763, 0.6948917918403943, 0.7249059960842132, 0.7562177034815153, 0.688512085199356, 0.7844052521089713, 0.7896234942773978]
0.8324129927023143
0.7360268971555762
0.22202129013333982
0.33664362481059146


In [None]:
open_file = open(os.path.join('reference_vector_in_out.pkl'), "wb")
pickle.dump((classifier_avg_in,classifier_avg_out,entropy_ref_in_6,entropy_ref_out_6), open_file)

In [None]:
open_file = open(os.path.join('reference_vector_in_out.pkl'), "rb")
ref_vector_in_6,ref_vector_out_6,entropy_ref_in_6,entropy_ref_out_6 = pickle.load(open_file)

In [None]:
max_sm_all_wt_6 = []
ood = 0
ind= 1
for data, label in tqdm(zip(test_imgs, test_lbls)):

  entr = 0
  img = data.reshape([-1, 32, 32, 3])
  prediction_sftmx = []
  prediction_lbl = []
  
  if label[0] == 6:
    lbl = ood
  else:
    lbl = ind
  
  for i,anmls_lbl in enumerate(labels):
    logits = model_dict[i](img)
    logits = tf.math.divide(logits, temp_val[i])
    pred = tf.nn.softmax(logits)

    prediction_sftmx.append(np.max(pred))

    pred_ind = tf.argmax(pred[0]).numpy()
    labels_removed_anmls = np.delete(labels,0)
    pred_lbl = labels_removed_anmls[pred_ind]   ### reverse mapping the actual label

    prediction_lbl.append(pred_lbl)

    entr = entr + entropy(pred[0], base=len(pred[0]))

  max_sm_all_wt_6.append([prediction_sftmx,prediction_lbl,label[0],lbl,entr/len(labels)])

10000it [13:34, 12.28it/s]


In [None]:
open_file = open(os.path.join('predictions_on_test.pkl'), "wb")
pickle.dump(max_sm_all_wt_6, open_file)

In [None]:
open_file = open(os.path.join('predictions_on_test.pkl'), "rb")
max_sm_all_wt_6 = pickle.load(open_file)

euclidean and entroy (sim score + euclidean id/od)

In [None]:
from scipy.spatial import distance

id = 1
ood = 0
sim_score_in =[]
sim_score_out = []
sim_score_entr_in =[]
sim_score_entr_out = []
sim_score_add = []
y_true = []
y_pred=[]
y_entr_pred=[]
y_true_ood = []

for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_6):

  #print(avg_entr, max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood)
  dist_in = distance.euclidean(max_pred_each_cls,ref_vector_in_6)
  dist_out = distance.euclidean(max_pred_each_cls,ref_vector_out_6)

  dist_entr_in = distance.euclidean(avg_entr,entropy_ref_in_6)
  dist_entr_out = distance.euclidean(avg_entr,entropy_ref_out_6)
  
  sim_in = 1 / (1 + dist_in)
  sim_out = 1/ (1 + dist_out)
  sim_add = sim_in + sim_out

  sim_entr_in = 1 / (1 + dist_entr_in)
  sim_entr_out = 1/ (1 + dist_entr_out)

  sim_score_in.append(sim_in)
  sim_score_out.append(sim_out)
  sim_score_entr_in.append(sim_entr_in)
  sim_score_entr_out.append(sim_entr_out)
  sim_score_add.append(sim_add)

  if sim_in > sim_out:
    prd = id 
  else:
    prd = ood 

  if sim_entr_in > sim_entr_out:
    prd_e = id
  else:
    prd_e = ood 
  
  if lbl_id_or_ood == 1:
    label_true_ood = 0
  elif lbl_id_or_ood ==0:
    label_true_ood = 1

  
  y_pred.append(prd)
  y_entr_pred.append(prd_e)
  y_true.append(lbl_id_or_ood)
  y_true_ood.append(label_true_ood)

100%|██████████| 10000/10000 [00:01<00:00, 5907.24it/s]


In [None]:
from sklearn.metrics import roc_auc_score  ##### weighted SVM
print('Auroc with prediction as ID or OOD  ',roc_auc_score(y_true, y_pred))
print('Auroc prediction with sim score to in distribution only  :',roc_auc_score(y_true,sim_score_in))
print('Auroc prediction with sim score to ood distribution only  :',roc_auc_score(y_true_ood,sim_score_out))

print('\nAuroc prediction with sim added score to for in and ood distribution only  :',roc_auc_score(y_true_ood,sim_score_add))

print('\nAuroc with prediction as ID or OOD Entropy ',roc_auc_score(y_true, y_entr_pred))
print('Auroc prediction with sim score to in distribution only Entropy  :',roc_auc_score(y_true,sim_score_entr_in))
print('Auroc prediction with sim score to out distribution only Entropy  :',roc_auc_score(y_true_ood,sim_score_entr_out))


Auroc with prediction as ID or OOD   0.6756111111111112
Auroc prediction with sim score to in distribution only  : 0.6704483333333333
Auroc prediction with sim score to ood distribution only  : 0.6093142222222222

Auroc prediction with sim added score to for in and ood distribution only  : 0.4343346666666667

Auroc with prediction as ID or OOD Entropy  0.7021666666666666
Auroc prediction with sim score to in distribution only Entropy  : 0.5846659999999999
Auroc prediction with sim score to out distribution only Entropy  : 0.6938365555555556


Decision Rule 

In [None]:
############################### simple decision rule , not taloinmg more than 7 or 8 just took majority prediction ##################

id = 1
ood = 0
y_true = []
y_pred=[]

for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_6):
  count = Counter(pred_lbl_each_cls)
  most_common= count.most_common(1)[0][0]

  if most_common == label_actual:
    pred = id
  else:
    pred = ood 
  y_pred.append(pred)
  y_true.append(lbl_id_or_ood)

print(' Auroc with simple decision rule about majority_:  ',roc_auc_score(y_true, y_pred))
  

100%|██████████| 10000/10000 [00:00<00:00, 161187.95it/s]

 Auroc with simple decision rule about majority_:   0.7355





In [None]:
id = 1
ood = 0

In [None]:
y_true_dec_rule = []
y_pred_mr_than8_ = []
y_pred_mr_than7_ = []
for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_6):
  values, counts = np.unique(pred_lbl_each_cls, return_counts=True)

  if np.max(counts) >= 7:
    y_pred_mr_than7_.append(id)
  else:
    y_pred_mr_than7_.append(ood)

  if np.max(counts) >= 8:
    y_pred_mr_than8_.append(id)
  else:
    y_pred_mr_than8_.append(ood)

  if label_actual == 6:
    y_true_dec_rule.append(ood)
  else:
    y_true_dec_rule.append(id)

print('\nAuroc for decision rule > 7 ',roc_auc_score(y_true_dec_rule,y_pred_mr_than7_))
print('Auroc for decision rule > 8 ',roc_auc_score(y_true_dec_rule,y_pred_mr_than8_))

100%|██████████| 10000/10000 [00:00<00:00, 26266.14it/s]


Auroc for decision rule > 7  0.6482222222222221
Auroc for decision rule > 8  0.552





In [None]:
sim_score_in =[]
sim_score_entr_in =[]
y_true_ = []
y_pred_mr_than7_ = []
y_pred_mr_than8_ = []
y_entr_pred_mr_than7_ = []
y_entr_pred_mr_than8_ = []

for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_6):

  # print(avg_entr, max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood)
  dist_in = distance.euclidean(max_pred_each_cls,ref_vector_in_6)
  dist_out = distance.euclidean(max_pred_each_cls,ref_vector_out_6)

  dist_entr_in = distance.euclidean(avg_entr,entropy_ref_in_6)
  dist_entr_out = distance.euclidean(avg_entr,entropy_ref_out_6)
  
  sim_in = 1 / (1 + dist_in)
  sim_out = 1/ (1 + dist_out)

  sim_entr_in = 1 / (1 + dist_entr_in)
  sim_entr_out = 1/ (1 + dist_entr_out)

  sim_score_in.append(sim_in)
  sim_score_entr_in.append(sim_entr_in)

  values, counts = np.unique(pred_lbl_each_cls, return_counts=True)
#############################################
  if sim_in > sim_out:
    if  np.max(counts) >=7:
      y_pred_mr_than7_.append(id)
    else:
      y_pred_mr_than7_.append(ood)
  else:
    y_pred_mr_than7_.append(ood)
###############################################
  if sim_in > sim_out:
    if  np.max(counts) >=8:
      y_pred_mr_than8_.append(id)
    else:
      y_pred_mr_than8_.append(ood)
  else:
    y_pred_mr_than8_.append(ood)
##############################################
  if sim_entr_in > sim_entr_out:  
    if  np.max(counts) >=7:
      y_entr_pred_mr_than7_.append(id)
    else:
      y_entr_pred_mr_than7_.append(ood)
  else:
    y_entr_pred_mr_than7_.append(ood )
###############################################
  if sim_entr_in > sim_entr_out:  
    if  np.max(counts) >=8:
      y_entr_pred_mr_than8_.append(id)
    else:
      y_entr_pred_mr_than8_.append(ood)
  else:
    y_entr_pred_mr_than8_.append(ood )

  if label_actual == 6:
    y_true_.append(ood)
  else:
    y_true_.append(id)


100%|██████████| 10000/10000 [00:01<00:00, 8438.38it/s]


In [None]:
print('Decision Rule(>=7) + Reference Vector ',roc_auc_score(y_true_,y_pred_mr_than7_))
print('Decision Rule(>=8) + Reference Vector ',roc_auc_score(y_true_,y_pred_mr_than8_))
print('Decision Rule(>=7) + Entropy Reference Value ',roc_auc_score(y_true_,y_entr_pred_mr_than7_))
print('Decision Rule(>=8) + Entropy Reference Value ',roc_auc_score(y_true_,y_entr_pred_mr_than8_))

Decision Rule(>=7) + Reference Vector  0.6501666666666667
Decision Rule(>=8) + Reference Vector  0.5483333333333333
Decision Rule(>=7) + Entropy Reference Value  0.6501111111111112
Decision Rule(>=8) + Entropy Reference Value  0.5485


**Method 2**
---



Normal weighted SVM




In [None]:
#########################  after appying temperature scaling and softmax  ###########################################
######################### the y_pred used is In + ood distribution for the ensemble classifier ############################
id = 1
ood = 0

new_dataset = []

for img, label in tqdm(zip(val_imgs6, val_lbl6)):
    img = img.reshape([-1,32,32,3])
    for i,lbl in enumerate(labels):
      if label == lbl:
        new_label = ood
      else :
        new_label = id

      m = model_dict[i]
      y_p = m.predict(img)
      temp_new_y_pred = temp_scaling(y_p,temp_val[i])
      new_softmax_y_pred = models[i].apply_softmax(temp_new_y_pred) 
      new_softmax_y_pred = np.array(new_softmax_y_pred).tolist()  
      new_softmax_y_pred = new_softmax_y_pred[0] 
      new_dataset.append([new_softmax_y_pred,new_label])
      

0it [00:00, ?it/s]



13500it [1:04:50,  3.47it/s]


In [None]:
# df = pd.DataFrame(new_dataset, columns=["predictions", "label"])
predictions = df['predictions'].to_numpy()
new_labels = df['label'].to_numpy()

In [None]:
df.to_pickle("predictions_dataset.pkl")

In [None]:
df = pd.read_pickle('predictions_dataset.pkl')
df

Unnamed: 0,predictions,label
0,"[0.00032780299079604447, 0.0002651762624736875...",1
1,"[4.9854173994390294e-05, 7.144168421291397e-07...",1
2,"[0.0003905177873093635, 2.873797939173528e-06,...",1
3,"[0.0014698188751935959, 0.0005127178737893701,...",1
4,"[0.0009138170280493796, 1.2465586678445106e-06...",1
...,...,...
121495,"[0.0010775388218462467, 6.393775038304739e-07,...",1
121496,"[2.8785655104002217e-06, 1.1480154604726067e-0...",1
121497,"[8.96109122550115e-05, 1.0216504620075284e-07,...",1
121498,"[0.0004994510090909898, 1.2335743804214871e-06...",1


In [None]:
from collections import Counter
Counter(new_labels)

Counter({0: 13500, 1: 108000})

In [None]:
in_data = []
out_data = []

for i in range(len(predictions)):
  if new_labels[i]== 1:
    in_data.append([predictions[i],new_labels[i]])
  else:
    out_data.append([predictions[i],new_labels[i]])

print(len(in_data),len(out_data))

# import random 
# in_data = random.sample(in_data,(len(out_data)))
# print(len(in_data))

108000 13500


In [None]:
in_data[1]

[[4.9854173994390294e-05,
  7.144168421291397e-07,
  2.2372459795860777e-07,
  4.902953421037637e-08,
  1.9500798771332484e-06,
  2.107005236950954e-08,
  0.9999465942382812,
  4.30821160080086e-07],
 1]

In [None]:
input = []
labels = []
for idata in in_data:
  input.append(idata[0])
  labels.append(idata[1])
  

for odata in out_data:
  input.append(odata[0])
  labels.append(odata[1])
# print(input,labels)
print(len(input),len(labels))

121500 121500


In [None]:
svm_train_ip, svm_test_ip, svm_train_lb, svm_test_lb = train_test_split(input, labels, test_size=0.20,stratify = labels)
print(len(svm_train_ip),len(svm_test_ip),len(svm_train_lb))

97200 24300 97200


In [None]:
from sklearn.svm import SVC
model = SVC(gamma='scale')

In [None]:
from sklearn.utils.class_weight import compute_class_weight
class_wts = compute_class_weight('balanced', np.unique(svm_train_lb), svm_train_lb)
class_wts ={0:class_wts[0], 1:class_wts[1]}
class_wts

{0: 4.5, 1: 0.5625}

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
clf = make_pipeline(StandardScaler(), SVC(gamma='auto',class_weight=class_wts))
clf.fit(svm_train_ip, svm_train_lb)

Pipeline(memory=None,
         steps=[('standardscaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('svc',
                 SVC(C=1.0, break_ties=False, cache_size=200,
                     class_weight={0: 4.5, 1: 0.5625}, coef0=0.0,
                     decision_function_shape='ovr', degree=3, gamma='auto',
                     kernel='rbf', max_iter=-1, probability=False,
                     random_state=None, shrinking=True, tol=0.001,
                     verbose=False))],
         verbose=False)

In [None]:
pred = clf.predict(svm_test_ip)
len(pred) ### svm fitted on whole data with weights

24300

In [None]:
clf.score(svm_test_ip,svm_test_lb) 

0.7069958847736626

In [None]:
y_true = []
y_pred = []
for img, label in tqdm(zip(test_imgs, test_lbls)):
    img= img.reshape([-1,32,32,3])
    
    svm_output =[]
    for i in range(0,9):
      m = model_dict[i]
      y_p = m.predict(img)
      temp_new_y_pred = temp_scaling(y_p,temp_val[i])
      new_softmax_y_pred = models[i].apply_softmax(temp_new_y_pred) 
      new_softmax_y_pred = np.array(new_softmax_y_pred).tolist()  
      new_softmax_y_pred = np.array(new_softmax_y_pred[0]).reshape([1,-1])
      # print(new_softmax_y_pred)
      a = clf.predict(new_softmax_y_pred)
      # print(a)

      svm_output.append(a[0])
    # print(svm_output)
 
    a = Counter(list(svm_output))
    b = a.most_common()[0][0]
    # print(b)
    y_pred.append(b)
    if not label == 6:
      y_true.append(1)
    else:
      y_true.append(0)

0it [00:00, ?it/s]



10000it [1:13:13,  2.28it/s]


In [None]:
print(y_pred)
print(y_true)

[1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 

In [None]:
print(Counter(y_pred))
print(Counter(y_true))

Counter({1: 6775, 0: 3225})
Counter({1: 9000, 0: 1000})


In [None]:
from sklearn.metrics import roc_auc_score  ##### weighted SVM
roc_auc_score(y_true, y_pred)

0.6713888888888889

In [None]:
df1= pd.DataFrame(y_pred, columns=["y_pred"])
df2= pd.DataFrame(y_true, columns=["y_true"])
df = pd.concat([df1, df2], axis=1)
df

In [None]:
df.to_pickle("final_predictions.pkl")
df = pd.read_pickle('final_predictions.pkl')

In [None]:
y_true = df['y_true'].to_numpy().tolist()
y_pred = df['y_pred'].to_numpy().tolist()
print(y_true)
print(y_pred)

[1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 

# 5 as anomaly

In [None]:
import os
print(os.getcwd())
os.chdir("/content/gdrive/My Drive/Colab Notebooks/Method 2/CIFAR/Leave one class out/leave 5/")
# os.chdir("/content/gdrive/My Drive/leave 8/")
print(os.listdir())

/content/gdrive/My Drive/Colab Notebooks/Method 2/CIFAR/Leave one class out/leave 5
['predictions_on_test.pkl', 'reference_vector_in_out.pkl']


In [None]:
# train_imgs5, val_imgs5 , train_lbl5, val_lbl5 = remove_anomalous_class(train_imgs,train_lbls,val_req=True,anomalous_class=[5])
# print(train_imgs5.shape, val_imgs5.shape , train_lbl5.shape, val_lbl5.shape)
# open_file = open(os.path.join('Training_data.pkl'), "wb")
# pickle.dump((train_imgs5, val_imgs5 , train_lbl5, val_lbl5 ), open_file)

(31500, 32, 32, 3) (13500, 32, 32, 3) (31500, 1) (13500, 1)


In [None]:
open_file = open(os.path.join('Training_data.pkl'), "rb")
(train_imgs5, val_imgs5 , train_lbl5, val_lbl5 )= pickle.load(open_file)
print(train_imgs5.shape, val_imgs5.shape , train_lbl5.shape, val_lbl5.shape)

(31500, 32, 32, 3) (13500, 32, 32, 3) (31500, 1) (13500, 1)


In [None]:
print( np.unique(train_lbl5),len(np.unique(train_lbl5)))

[0 1 2 3 4 6 7 8 9] 9


In [None]:
models = []
for i in range(len(np.unique(train_lbl5))):
  models.append(Model(name='5anomaly:classifier'+str(i)))

names = [m.name for m in models]
names

['5anomaly:classifier0',
 '5anomaly:classifier1',
 '5anomaly:classifier2',
 '5anomaly:classifier3',
 '5anomaly:classifier4',
 '5anomaly:classifier5',
 '5anomaly:classifier6',
 '5anomaly:classifier7',
 '5anomaly:classifier8']

In [None]:
def remap_labels(train_lbl,val_lbl,unique_lbl):
  print(unique_lbl)
  index_map = {unique_lbl[0]:0, unique_lbl[1]:1,unique_lbl[2]:2,unique_lbl[3]:3,unique_lbl[4]:4,unique_lbl[5]:5,unique_lbl[6]:6,unique_lbl[7]:7}
  train_lbl = np.array([index_map[int(x)] for x in train_lbl ])
  val_lbl = np.array([index_map[int(x)] for x in val_lbl ])

  return train_lbl,val_lbl

In [None]:
labels = np.unique(train_lbl5)
labels

array([0, 1, 2, 3, 4, 6, 7, 8, 9], dtype=int32)

In [None]:
# for indx,i in enumerate(labels):
#   # if i == 9:
#     print(i)
#     train_imgs_wt_clas_i, train_lbls_wt_clas_i = remove_anomalous_class(train_imgs5,train_lbl5,val_req= False, anomalous_class = [i])
#     val_imgs_wt_clas_i, val_lbls__wt_clas_i = remove_anomalous_class(val_imgs5,val_lbl5,val_req= False, anomalous_class = [i])
#     unique_lbls = np.unique(train_lbls_wt_clas_i)
#     print(np.unique(train_lbls_wt_clas_i),np.unique(val_lbls__wt_clas_i))
#     (train_lbls_wt_clas_i,val_lbls__wt_clas_i) = remap_labels(train_lbls_wt_clas_i,val_lbls__wt_clas_i,unique_lbls)
#     models[indx].build_model(8)
#     models[indx].train(train_imgs_wt_clas_i, train_lbls_wt_clas_i, val_imgs_wt_clas_i, val_lbls__wt_clas_i)
#     models[indx].save_model('classifier'+str(indx))

In [None]:
train_loss = []
train_acc = []
val_loss = []
val_acc = []

for indx,i in enumerate(labels):
  # if i == 9:
      
    train_imgs_wt_clas_i, train_lbls_wt_clas_i = remove_anomalous_class(train_imgs5,train_lbl5,val_req= False, anomalous_class = [i])
    val_imgs_wt_clas_i, val_lbls__wt_clas_i = remove_anomalous_class(val_imgs5,val_lbl5,val_req= False, anomalous_class = [i])
    unique_lbls = np.unique(train_lbls_wt_clas_i)
    (train_lbls_wt_clas_i,val_lbls__wt_clas_i) = remap_labels(train_lbls_wt_clas_i,val_lbls__wt_clas_i,unique_lbls)
    # print(np.unique(train_lbls_wt_clas_i),np.unique(val_lbls__wt_clas_i))
    model = models[indx].load_model('classifier'+str(indx))
    train_ = model.evaluate(train_imgs_wt_clas_i, train_lbls_wt_clas_i)
    val_ = model.evaluate(val_imgs_wt_clas_i, val_lbls__wt_clas_i)
    train_loss.append(train_[0])
    train_acc.append(train_[1])
    val_loss.append(val_[0])
    val_acc.append(val_[1])

[1 2 3 4 6 7 8 9]
[0 2 3 4 6 7 8 9]
[0 1 3 4 6 7 8 9]
[0 1 2 4 6 7 8 9]
[0 1 2 3 6 7 8 9]
[0 1 2 3 4 7 8 9]
[0 1 2 3 4 6 8 9]
[0 1 2 3 4 6 7 9]
[0 1 2 3 4 6 7 8]


In [None]:
print('training accuracy average :',np.average(train_acc))
print('validation accuracy average :',np.average(val_acc))

training accuracy average : 0.8872023887104459
validation accuracy average : 0.7546481423907809


In [None]:
print(train_acc)
print(val_acc)

[0.8977500200271606, 0.7802857160568237, 0.9327142834663391, 0.8713928461074829, 0.9246071577072144, 0.8983928561210632, 0.8652142882347107, 0.9247857332229614, 0.8896785974502563]
[0.7725833058357239, 0.6758333444595337, 0.8018333315849304, 0.7527499794960022, 0.7957500219345093, 0.7585833072662354, 0.721666693687439, 0.7690833210945129, 0.7437499761581421]


In [None]:
classifiers = [models[i].load_model('classifier'+str(i)) for i in range(len(models))]

In [None]:
classifiers

[<tensorflow.python.keras.engine.sequential.Sequential at 0x7f02a8151978>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7f023ec1ecc0>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7f023e9c5898>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7f023365f7b8>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7f0232c5ad30>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7f023224c588>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7f02298d86d8>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7f0228eae160>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7f022872f048>]

In [None]:
layer_name_list = [classifier.layers[-2:][0].name for classifier in classifiers]
layer_name_list

['dense_1',
 'dense_3',
 'dense_5',
 'dense_7',
 'dense_9',
 'dense_11',
 'dense_13',
 'dense_15',
 'dense_17']

In [None]:
from keras import Model
model_dict = {}

for i in range(len(models)):
  model_output_i = classifiers[i].get_layer(layer_name_list[i]).output
  m0 = Model(inputs=classifiers[i].input, outputs=model_output_i)
  model_dict[i] = m0

In [None]:
y_pred = []
for i,label  in enumerate(labels):
  val_imgs_wt_clas_i, val_lbls_wt_clas_i = remove_anomalous_class(val_imgs5,val_lbl5,val_req= False, anomalous_class = [label])
  m = model_dict[i]
  print(i,np.unique(val_lbls_wt_clas_i))
  # m.summary()
  y_p = m.predict(val_imgs_wt_clas_i)
  y_pred.append(y_p)

0 [1 2 3 4 6 7 8 9]
1 [0 2 3 4 6 7 8 9]
2 [0 1 3 4 6 7 8 9]
3 [0 1 2 4 6 7 8 9]
4 [0 1 2 3 6 7 8 9]
5 [0 1 2 3 4 7 8 9]
6 [0 1 2 3 4 6 8 9]
7 [0 1 2 3 4 6 7 9]
8 [0 1 2 3 4 6 7 8]


In [None]:
len(y_pred)

9

In [None]:
# temp_val = []
# for i,label in enumerate(labels):
#   # if i == 8:
#     val_imgs_wt_clas_i, val_lbls_wt_clas_i = remove_anomalous_class(val_imgs5,val_lbl5,val_req= False, anomalous_class = [label])
#     print(np.unique(val_lbls_wt_clas_i),y_pred[i].shape)

#     unique_lbls = np.unique(val_lbls_wt_clas_i)
#     (val_lbls_wt_clas_i,val_lbls_wt_clas_i) = remap_labels(val_lbls_wt_clas_i,val_lbls_wt_clas_i,unique_lbls)
  
#     temp = temp_cal(y_pred[i],val_lbls_wt_clas_i,len(unique_lbls))
#     temp_val.append(temp)

In [None]:
temp_val 

[<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=1.9739809>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.3422043>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.0955324>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.3424997>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.014806>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.168053>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.176198>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.218129>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.2586148>]

In [None]:
temp_val = [1.9739809,2.3422043,2.0955324,2.3424997,2.014806,2.168053,2.176198,2.218129,2.2586148]

**Method 1**
---



In [None]:
# labels --> gives the non anomalous classes ex: here labels are 0 1 2 3 4 5 6 7 9

In [None]:
from tqdm import tqdm

entropy_in = 0
count_entr_in = 0
avg_in = [0,0,0,0,0,0,0,0,0]
count_in = [0,0,0,0,0,0,0,0,0]

entropy_out = 0
count_entr_out = 0
avg_out = [0,0,0,0,0,0,0,0,0]
count_out = [0,0,0,0,0,0,0,0,0]

for img, lbl in tqdm(zip(val_imgs5, val_lbl5)):
  img = img.reshape([-1, 32, 32, 3])

  for i,anmls_lbl in enumerate(labels):
    if not anmls_lbl == lbl:
      logits = model_dict[i](img)
      logits = tf.math.divide(logits, temp_val[i])
      pred = tf.nn.softmax(logits)
      max_val = np.max(pred)
      avg_in[i] = avg_in[i] + max_val
      count_in[i] = count_in[i] + 1

      entrpy = entropy(pred[0], base=len(pred[0]))
      entropy_in = entropy_in + entrpy
      count_entr_in = count_entr_in + 1 

    if anmls_lbl == lbl:
      logits = model_dict[i](img)
      logits = tf.math.divide(logits, temp_val[i])
      pred = tf.nn.softmax(logits)
      max_val = np.max(pred)
      avg_out[i] = avg_out[i] + max_val
      count_out[i] = count_out[i] + 1

      entrpy = entropy(pred[0], base=len(pred[0]))
      entropy_out = entropy_out + entrpy
      count_entr_out = count_entr_out + 1 


13500it [17:27, 12.89it/s]


In [None]:
classifier_avg_in = [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]
classifier_avg_out = [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]
for i in range(9):
  classifier_avg_in[i] = avg_in[i]/count_in[i]
  classifier_avg_out[i] = avg_out[i]/count_out[i]

treshold_value_in = 0.0
treshold_value_out = 0.0

for i in range(9):
  treshold_value_in = treshold_value_in + classifier_avg_in[i]
  treshold_value_out = treshold_value_out + classifier_avg_out[i]

treshold_value_in = treshold_value_in/len(classifier_avg_in)
treshold_value_out = treshold_value_out/len(classifier_avg_out)


In [None]:
print(classifier_avg_in)
print(classifier_avg_out)

print(treshold_value_in)
print(treshold_value_out)

entropy_ref_in_5 = entropy_in/count_entr_in
entropy_ref_out_5 = entropy_out/ count_entr_out

print(entropy_ref_in_5)
print(entropy_ref_out_5)

[0.7720946137135227, 0.7005797019029657, 0.8017076351493597, 0.7774761314975719, 0.7931171158204476, 0.761193291897575, 0.730984258826822, 0.7782378639231126, 0.7568129632373651]
[0.6662297526995341, 0.6162412576874097, 0.6330355843206247, 0.6654160855710507, 0.5933033785025279, 0.6574322979450226, 0.6484679195582866, 0.7214648718237877, 0.6707891456186771]
0.763578175107638
0.6524866993029912
0.31529655332504736
0.4452300682476704


In [None]:
open_file = open(os.path.join('reference_vector_in_out.pkl'), "wb")
pickle.dump((classifier_avg_in,classifier_avg_out,entropy_ref_in_5,entropy_ref_out_5), open_file)

In [None]:
open_file = open(os.path.join('reference_vector_in_out.pkl'), "rb")
ref_vector_in_5,ref_vector_out_5,entropy_ref_in_5,entropy_ref_out_5 = pickle.load(open_file)

In [None]:
max_sm_all_wt_5 = []
ood = 0
ind= 1
for data, label in tqdm(zip(test_imgs, test_lbls)):

  entr = 0
  img = data.reshape([-1, 32, 32, 3])
  prediction_sftmx = []
  prediction_lbl = []
  
  if label[0] == 5:
    lbl = ood
  else:
    lbl = ind
  
  for i,anmls_lbl in enumerate(labels):
    logits = model_dict[i](img)
    logits = tf.math.divide(logits, temp_val[i])
    pred = tf.nn.softmax(logits)

    prediction_sftmx.append(np.max(pred))

    pred_ind = tf.argmax(pred[0]).numpy()
    labels_removed_anmls = np.delete(labels,0)
    pred_lbl = labels_removed_anmls[pred_ind]   ### reverse mapping the actual label

    prediction_lbl.append(pred_lbl)

    entr = entr + entropy(pred[0], base=len(pred[0]))

  max_sm_all_wt_5.append([prediction_sftmx,prediction_lbl,label[0],lbl,entr/len(labels)])

10000it [13:07, 12.69it/s]


In [None]:
open_file = open(os.path.join('predictions_on_test.pkl'), "wb")
pickle.dump(max_sm_all_wt_5, open_file)

In [None]:
open_file = open(os.path.join('predictions_on_test.pkl'), "rb")
max_sm_all_wt_5 = pickle.load(open_file)

euclidean and entroy (sim score + euclidean id/od)

In [None]:
from scipy.spatial import distance

id = 1
ood = 0
sim_score_in =[]
sim_score_out = []
sim_score_entr_in =[]
sim_score_entr_out = []
sim_score_add = []
y_true = []
y_pred=[]
y_entr_pred=[]
y_true_ood = []

for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_5):

  #print(avg_entr, max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood)
  dist_in = distance.euclidean(max_pred_each_cls,ref_vector_in_5)
  dist_out = distance.euclidean(max_pred_each_cls,ref_vector_out_5)

  dist_entr_in = distance.euclidean(avg_entr,entropy_ref_in_5)
  dist_entr_out = distance.euclidean(avg_entr,entropy_ref_out_5)
  
  sim_in = 1 / (1 + dist_in)
  sim_out = 1/ (1 + dist_out)
  sim_add = sim_in + sim_out

  sim_entr_in = 1 / (1 + dist_entr_in)
  sim_entr_out = 1/ (1 + dist_entr_out)

  sim_score_in.append(sim_in)
  sim_score_out.append(sim_out)
  sim_score_entr_in.append(sim_entr_in)
  sim_score_entr_out.append(sim_entr_out)
  sim_score_add.append(sim_add)

  if sim_in > sim_out:
    prd = id 
  else:
    prd = ood 

  if sim_entr_in > sim_entr_out:
    prd_e = id
  else:
    prd_e = ood 
  
  if lbl_id_or_ood == 1:
    label_true_ood = 0
  elif lbl_id_or_ood ==0:
    label_true_ood = 1

  
  y_pred.append(prd)
  y_entr_pred.append(prd_e)
  y_true.append(lbl_id_or_ood)
  y_true_ood.append(label_true_ood)

100%|██████████| 10000/10000 [00:01<00:00, 6480.65it/s]


In [None]:
from sklearn.metrics import roc_auc_score  ##### weighted SVM
print('Auroc with prediction as ID or OOD  ',roc_auc_score(y_true, y_pred))
print('Auroc prediction with sim score to in distribution only  :',roc_auc_score(y_true,sim_score_in))
print('Auroc prediction with sim score to ood distribution only  :',roc_auc_score(y_true_ood,sim_score_out))

print('\nAuroc prediction with sim added score to for in and ood distribution only  :',roc_auc_score(y_true_ood,sim_score_add))

print('\nAuroc with prediction as ID or OOD Entropy ',roc_auc_score(y_true, y_entr_pred))
print('Auroc prediction with sim score to in distribution only Entropy  :',roc_auc_score(y_true,sim_score_entr_in))
print('Auroc prediction with sim score to out distribution only Entropy  :',roc_auc_score(y_true_ood,sim_score_entr_out))


Auroc with prediction as ID or OOD   0.6762777777777778
Auroc prediction with sim score to in distribution only  : 0.5872691111111111
Auroc prediction with sim score to ood distribution only  : 0.6941405555555556

Auroc prediction with sim added score to for in and ood distribution only  : 0.5809113333333333

Auroc with prediction as ID or OOD Entropy  0.6817777777777778
Auroc prediction with sim score to in distribution only Entropy  : 0.5268941111111112
Auroc prediction with sim score to out distribution only Entropy  : 0.7051186666666667


Decision Rule 

In [None]:
############################### simple decision rule , not taloinmg more than 7 or 8 just took majority prediction ##################

id = 1
ood = 0
y_true = []
y_pred=[]

for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_5):
  count = Counter(pred_lbl_each_cls)
  most_common= count.most_common(1)[0][0]

  if most_common == label_actual:
    pred = id
  else:
    pred = ood 
  y_pred.append(pred)
  y_true.append(lbl_id_or_ood)

print(' Auroc with simple decision rule about majority_:  ',roc_auc_score(y_true, y_pred))
  

100%|██████████| 10000/10000 [00:00<00:00, 175916.38it/s]

 Auroc with simple decision rule about majority_:   0.7371666666666666





In [None]:
id = 1
ood = 0

In [None]:
y_true_dec_rule = []
y_pred_mr_than8_ = []
y_pred_mr_than7_ = []
for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_5):
  values, counts = np.unique(pred_lbl_each_cls, return_counts=True)

  if np.max(counts) >= 7:
    y_pred_mr_than7_.append(id)
  else:
    y_pred_mr_than7_.append(ood)

  if np.max(counts) >= 8:
    y_pred_mr_than8_.append(id)
  else:
    y_pred_mr_than8_.append(ood)

  if label_actual == 5:
    y_true_dec_rule.append(ood)
  else:
    y_true_dec_rule.append(id)

print('\nAuroc for decision rule > 7 ',roc_auc_score(y_true_dec_rule,y_pred_mr_than7_))
print('Auroc for decision rule > 8 ',roc_auc_score(y_true_dec_rule,y_pred_mr_than8_))

100%|██████████| 10000/10000 [00:00<00:00, 27787.28it/s]


Auroc for decision rule > 7  0.6338333333333332
Auroc for decision rule > 8  0.5507222222222221





In [None]:
sim_score_in =[]
sim_score_entr_in =[]
y_true_ = []
y_pred_mr_than7_ = []
y_pred_mr_than8_ = []
y_entr_pred_mr_than7_ = []
y_entr_pred_mr_than8_ = []

for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_5):

  # print(avg_entr, max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood)
  dist_in = distance.euclidean(max_pred_each_cls,ref_vector_in_5)
  dist_out = distance.euclidean(max_pred_each_cls,ref_vector_out_5)

  dist_entr_in = distance.euclidean(avg_entr,entropy_ref_in_5)
  dist_entr_out = distance.euclidean(avg_entr,entropy_ref_out_5)
  
  sim_in = 1 / (1 + dist_in)
  sim_out = 1/ (1 + dist_out)

  sim_entr_in = 1 / (1 + dist_entr_in)
  sim_entr_out = 1/ (1 + dist_entr_out)

  sim_score_in.append(sim_in)
  sim_score_entr_in.append(sim_entr_in)

  values, counts = np.unique(pred_lbl_each_cls, return_counts=True)
#############################################
  if sim_in > sim_out:
    if  np.max(counts) >=7:
      y_pred_mr_than7_.append(id)
    else:
      y_pred_mr_than7_.append(ood)
  else:
    y_pred_mr_than7_.append(ood)
###############################################
  if sim_in > sim_out:
    if  np.max(counts) >=8:
      y_pred_mr_than8_.append(id)
    else:
      y_pred_mr_than8_.append(ood)
  else:
    y_pred_mr_than8_.append(ood)
##############################################
  if sim_entr_in > sim_entr_out:  
    if  np.max(counts) >=7:
      y_entr_pred_mr_than7_.append(id)
    else:
      y_entr_pred_mr_than7_.append(ood)
  else:
    y_entr_pred_mr_than7_.append(ood )
###############################################
  if sim_entr_in > sim_entr_out:  
    if  np.max(counts) >=8:
      y_entr_pred_mr_than8_.append(id)
    else:
      y_entr_pred_mr_than8_.append(ood)
  else:
    y_entr_pred_mr_than8_.append(ood )

  if label_actual == 5:
    y_true_.append(ood)
  else:
    y_true_.append(id)


100%|██████████| 10000/10000 [00:01<00:00, 5978.98it/s]


In [None]:
print('Decision Rule(>=7) + Reference Vector ',roc_auc_score(y_true_,y_pred_mr_than7_))
print('Decision Rule(>=8) + Reference Vector ',roc_auc_score(y_true_,y_pred_mr_than8_))
print('Decision Rule(>=7) + Entropy Reference Value ',roc_auc_score(y_true_,y_entr_pred_mr_than7_))
print('Decision Rule(>=8) + Entropy Reference Value ',roc_auc_score(y_true_,y_entr_pred_mr_than8_))

Decision Rule(>=7) + Reference Vector  0.6316666666666666
Decision Rule(>=8) + Reference Vector  0.5475000000000001
Decision Rule(>=7) + Entropy Reference Value  0.6271111111111111
Decision Rule(>=8) + Entropy Reference Value  0.5459999999999999


**Method 2**
---



Normal weighted SVM




In [None]:
##########################  after appying temperature scaling and softmax  ###########################################
########################## the y_pred used is In + ood distribution for the ensemble classifier ############################
id = 1
ood = 0

new_dataset = []

for img, label in tqdm(zip(val_imgs5, val_lbl5)):
    img = img.reshape([-1,32,32,3])
    for i,lbl in enumerate(labels):
      if label == lbl:
        new_label = ood
      else :
        new_label = id

      m = model_dict[i]
      y_p = m.predict(img)
      temp_new_y_pred = temp_scaling(y_p,temp_val[i])
      new_softmax_y_pred = models[i].apply_softmax(temp_new_y_pred) 
      new_softmax_y_pred = np.array(new_softmax_y_pred).tolist()  
      new_softmax_y_pred = new_softmax_y_pred[0] 
      new_dataset.append([new_softmax_y_pred,new_label])
      

0it [00:00, ?it/s]



13500it [1:03:49,  3.53it/s]


In [None]:
# df = pd.DataFrame(new_dataset, columns=["predictions", "label"])
predictions = df['predictions'].to_numpy()
new_labels = df['label'].to_numpy()

In [None]:
df.to_pickle("predictions_dataset.pkl")

In [None]:
df = pd.read_pickle('predictions_dataset.pkl')
df

Unnamed: 0,predictions,label
0,"[0.06053653359413147, 0.5988858342170715, 0.00...",0
1,"[0.9278675317764282, 0.01654912158846855, 0.00...",1
2,"[0.8851527571678162, 0.015081945806741714, 0.0...",1
3,"[0.6913420557975769, 0.003346933051943779, 0.0...",1
4,"[0.9483387470245361, 0.0012119452003389597, 0....",1
...,...,...
121495,"[0.9882175326347351, 2.1458537958096713e-05, 0...",1
121496,"[0.632042646408081, 0.0014252845430746675, 0.1...",1
121497,"[0.8195347189903259, 0.0004496020555961877, 0....",1
121498,"[0.9395357966423035, 0.0002134643727913499, 0....",1


In [None]:
from collections import Counter
Counter(new_labels)

Counter({0: 13500, 1: 108000})

In [None]:
in_data = []
out_data = []

for i in range(len(predictions)):
  if new_labels[i]== 1:
    in_data.append([predictions[i],new_labels[i]])
  else:
    out_data.append([predictions[i],new_labels[i]])

print(len(in_data),len(out_data))

# import random 
# in_data = random.sample(in_data,(len(out_data)))
# print(len(in_data))

108000 13500


In [None]:
in_data[1]

[[0.8851527571678162,
  0.015081945806741714,
  0.0006530193495564163,
  0.029048869386315346,
  0.0007205545553006232,
  0.0028667955193668604,
  0.06240762025117874,
  0.00406847707927227],
 1]

In [None]:
input = []
labels = []
for idata in in_data:
  input.append(idata[0])
  labels.append(idata[1])
  

for odata in out_data:
  input.append(odata[0])
  labels.append(odata[1])
# print(input,labels)
print(len(input),len(labels))

121500 121500


In [None]:
svm_train_ip, svm_test_ip, svm_train_lb, svm_test_lb = train_test_split(input, labels, test_size=0.20,stratify = labels)
print(len(svm_train_ip),len(svm_test_ip),len(svm_train_lb))

97200 24300 97200


In [None]:
from sklearn.svm import SVC
model = SVC(gamma='scale')

In [None]:
from sklearn.utils.class_weight import compute_class_weight
class_wts = compute_class_weight('balanced', np.unique(svm_train_lb), svm_train_lb)
class_wts ={0:class_wts[0], 1:class_wts[1]}
class_wts

{0: 4.5, 1: 0.5625}

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
clf = make_pipeline(StandardScaler(), SVC(gamma='auto',class_weight=class_wts))
clf.fit(svm_train_ip, svm_train_lb)

Pipeline(memory=None,
         steps=[('standardscaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('svc',
                 SVC(C=1.0, break_ties=False, cache_size=200,
                     class_weight={0: 4.5, 1: 0.5625}, coef0=0.0,
                     decision_function_shape='ovr', degree=3, gamma='auto',
                     kernel='rbf', max_iter=-1, probability=False,
                     random_state=None, shrinking=True, tol=0.001,
                     verbose=False))],
         verbose=False)

In [None]:
pred = clf.predict(svm_test_ip)
len(pred) ### svm fitted on whole data with weights

24300

In [None]:
clf.score(svm_test_ip,svm_test_lb) # with equal classes score is 82  # with weighted classes score is 92

0.6694650205761317

In [None]:
y_true = []
y_pred = []
for img, label in tqdm(zip(test_imgs, test_lbls)):
    img= img.reshape([-1,32,32,3])
    
    svm_output =[]
    for i in range(0,9):
      m = model_dict[i]
      y_p = m.predict(img)
      temp_new_y_pred = temp_scaling(y_p,temp_val[i])
      new_softmax_y_pred = models[i].apply_softmax(temp_new_y_pred) 
      new_softmax_y_pred = np.array(new_softmax_y_pred).tolist()  
      new_softmax_y_pred = np.array(new_softmax_y_pred[0]).reshape([1,-1])
      # print(new_softmax_y_pred)
      a = clf.predict(new_softmax_y_pred)
      # print(a)

      svm_output.append(a[0])
    # print(svm_output)
 
    a = Counter(list(svm_output))
    b = a.most_common()[0][0]
    # print(b)
    y_pred.append(b)
    if not label == 5:
      y_true.append(1)
    else:
      y_true.append(0)

0it [00:00, ?it/s]



10000it [51:19,  3.25it/s]


In [None]:
print(y_pred)
print(y_true)

[1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 

In [None]:
print(Counter(y_pred))
print(Counter(y_true))

Counter({1: 6209, 0: 3791})
Counter({1: 9000, 0: 1000})


In [None]:
from sklearn.metrics import roc_auc_score  ##### weighted SVM
roc_auc_score(y_true, y_pred)

0.6405

In [None]:
### predictions with weighted SVM
print(Counter(y_pred)) 
print(Counter(y_true))

Counter({1: 6209, 0: 3791})
Counter({1: 9000, 0: 1000})


In [None]:
df1= pd.DataFrame(y_pred, columns=["y_pred"])
df2= pd.DataFrame(y_true, columns=["y_true"])
df = pd.concat([df1, df2], axis=1)
df

Unnamed: 0,y_pred,y_true
0,1,1
1,1,1
2,0,1
3,0,1
4,1,1
...,...,...
9995,0,1
9996,1,1
9997,0,0
9998,0,1


In [None]:
df.to_pickle("final_predictions.pkl")
df = pd.read_pickle('final_predictions.pkl')

In [None]:
y_true = df['y_true'].to_numpy().tolist()
y_pred = df['y_pred'].to_numpy().tolist()
print(y_true)
print(y_pred)

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 

# 4 as anomaly

In [None]:
import os
print(os.getcwd())
os.chdir("/content/gdrive/My Drive/Colab Notebooks/Method 2/CIFAR/Leave one class out/leave 4/")
# os.chdir("/content/gdrive/My Drive/leave 6/")
print(os.listdir())

/content/gdrive/My Drive/Colab Notebooks/Method 2/CIFAR/Leave one class out/leave 4
['predictions_on_test.pkl', 'reference_vector_in_out.pkl']


In [None]:
# train_imgs4, val_imgs4 , train_lbl4, val_lbl4 = remove_anomalous_class(train_imgs,train_lbls,val_req=True,anomalous_class=[4])
# print(train_imgs4.shape, val_imgs4.shape , train_lbl4.shape, val_lbl4.shape)
# open_file = open(os.path.join('Training_data.pkl'), "wb")
# pickle.dump((train_imgs4, val_imgs4 , train_lbl4, val_lbl4 ), open_file)

(31500, 32, 32, 3) (13500, 32, 32, 3) (31500, 1) (13500, 1)


In [None]:
open_file = open(os.path.join('Training_data.pkl'), "rb")
(train_imgs4, val_imgs4 , train_lbl4, val_lbl4 )= pickle.load(open_file)
print(train_imgs4.shape, val_imgs4.shape , train_lbl4.shape, val_lbl4.shape)

(31500, 32, 32, 3) (13500, 32, 32, 3) (31500, 1) (13500, 1)


In [None]:
print( np.unique(train_lbl4),len(np.unique(train_lbl4)))

[0 1 2 3 5 6 7 8 9] 9


In [None]:
models = []
for i in range(len(np.unique(train_lbl4))):
  models.append(Model(name='4anomaly:classifier'+str(i)))

names = [m.name for m in models]
names

['4anomaly:classifier0',
 '4anomaly:classifier1',
 '4anomaly:classifier2',
 '4anomaly:classifier3',
 '4anomaly:classifier4',
 '4anomaly:classifier5',
 '4anomaly:classifier6',
 '4anomaly:classifier7',
 '4anomaly:classifier8']

In [None]:
def remap_labels(train_lbl,val_lbl,unique_lbl):
  print(unique_lbl)
  index_map = {unique_lbl[0]:0, unique_lbl[1]:1,unique_lbl[2]:2,unique_lbl[3]:3,unique_lbl[4]:4,unique_lbl[5]:5,unique_lbl[6]:6,unique_lbl[7]:7}
  train_lbl = np.array([index_map[int(x)] for x in train_lbl ])
  val_lbl = np.array([index_map[int(x)] for x in val_lbl ])

  return train_lbl,val_lbl

In [None]:
labels = np.unique(train_lbl4)
labels

array([0, 1, 2, 3, 5, 6, 7, 8, 9], dtype=int32)

In [None]:
# for indx,i in enumerate(labels):
#   # if i == 9:
      
#     train_imgs_wt_clas_i, train_lbls_wt_clas_i = remove_anomalous_class(train_imgs4,train_lbl4,val_req= False, anomalous_class = [i])
#     val_imgs_wt_clas_i, val_lbls__wt_clas_i = remove_anomalous_class(val_imgs4,val_lbl4,val_req= False, anomalous_class = [i])
#     unique_lbls = np.unique(train_lbls_wt_clas_i)
#     (train_lbls_wt_clas_i,val_lbls__wt_clas_i) = remap_labels(train_lbls_wt_clas_i,val_lbls__wt_clas_i,unique_lbls)
#     print(np.unique(train_lbls_wt_clas_i),np.unique(val_lbls__wt_clas_i))
#     models[indx].build_model(8)
#     models[indx].train(train_imgs_wt_clas_i, train_lbls_wt_clas_i, val_imgs_wt_clas_i, val_lbls__wt_clas_i)
#     models[indx].save_model('classifier'+str(indx))

In [None]:
train_loss = []
train_acc = []
val_loss = []
val_acc = []

for indx,i in enumerate(labels):
  # if i == 9:
      
    train_imgs_wt_clas_i, train_lbls_wt_clas_i = remove_anomalous_class(train_imgs4,train_lbl4,val_req= False, anomalous_class = [i])
    val_imgs_wt_clas_i, val_lbls__wt_clas_i = remove_anomalous_class(val_imgs4,val_lbl4,val_req= False, anomalous_class = [i])
    unique_lbls = np.unique(train_lbls_wt_clas_i)
    (train_lbls_wt_clas_i,val_lbls__wt_clas_i) = remap_labels(train_lbls_wt_clas_i,val_lbls__wt_clas_i,unique_lbls)
    # print(np.unique(train_lbls_wt_clas_i),np.unique(val_lbls__wt_clas_i))
    model = models[indx].load_model('classifier'+str(indx))
    train_ = model.evaluate(train_imgs_wt_clas_i, train_lbls_wt_clas_i)
    val_ = model.evaluate(val_imgs_wt_clas_i, val_lbls__wt_clas_i)
    train_loss.append(train_[0])
    train_acc.append(train_[1])
    val_loss.append(val_[0])
    val_acc.append(val_[1])

[1 2 3 5 6 7 8 9]
[0 2 3 5 6 7 8 9]
[0 1 3 5 6 7 8 9]
[0 1 2 5 6 7 8 9]
[0 1 2 3 6 7 8 9]
[0 1 2 3 5 7 8 9]
[0 1 2 3 5 6 8 9]
[0 1 2 3 5 6 7 9]
[0 1 2 3 5 6 7 8]


In [None]:
print('training accuracy average :',np.average(train_acc))
print('validation accuracy average :',np.average(val_acc))

training accuracy average : 0.8973134954770406
validation accuracy average : 0.7629351748360528


In [None]:
print(train_acc)
print(val_acc)

[0.8592143058776855, 0.8716785907745361, 0.9228571653366089, 0.928678572177887, 0.9114999771118164, 0.9005714058876038, 0.8671071529388428, 0.899071455001831, 0.915142834186554]
[0.7327499985694885, 0.7445833086967468, 0.7896666526794434, 0.8026666641235352, 0.7802500128746033, 0.7680000066757202, 0.7337499856948853, 0.7619166374206543, 0.7528333067893982]


In [None]:
classifiers = [models[i].load_model('classifier'+str(i)) for i in range(len(models))]

In [None]:
classifiers

[<tensorflow.python.keras.engine.sequential.Sequential at 0x7fa4705bb910>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7fa4704d2f50>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7fa4704cd7d0>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7fa4704bc850>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7fa4704b2cd0>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7fa470520750>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7fa41663d510>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7fa41668c250>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7fa40bad5e10>]

In [None]:
layer_name_list = [classifier.layers[-2:][0].name for classifier in classifiers]
layer_name_list

['dense_1',
 'dense_3',
 'dense_5',
 'dense_7',
 'dense_9',
 'dense_11',
 'dense_13',
 'dense_15',
 'dense_17']

In [None]:
from keras import Model
model_dict = {}

for i in range(len(models)):
  model_output_i = classifiers[i].get_layer(layer_name_list[i]).output
  m0 = Model(inputs=classifiers[i].input, outputs=model_output_i)
  model_dict[i] = m0

In [None]:
y_pred = []
for i,label  in enumerate(labels):
  val_imgs_wt_clas_i, val_lbls_wt_clas_i = remove_anomalous_class(val_imgs4,val_lbl4,val_req= False, anomalous_class = [label])
  m = model_dict[i]
  print(label,np.unique(val_lbls_wt_clas_i))
  # m.summary()
  y_p = m.predict(val_imgs_wt_clas_i)
  y_pred.append(y_p)

0 [1 2 3 5 6 7 8 9]
1 [0 2 3 5 6 7 8 9]
2 [0 1 3 5 6 7 8 9]
3 [0 1 2 5 6 7 8 9]
5 [0 1 2 3 6 7 8 9]
6 [0 1 2 3 5 7 8 9]
7 [0 1 2 3 5 6 8 9]
8 [0 1 2 3 5 6 7 9]
9 [0 1 2 3 5 6 7 8]


In [None]:
len(y_pred)

9

In [None]:
temp_val = []
for i,label in enumerate(labels):
  # if i == 8:
    val_imgs_wt_clas_i, val_lbls_wt_clas_i = remove_anomalous_class(val_imgs4,val_lbl4,val_req= False, anomalous_class = [label])
    print(np.unique(val_lbls_wt_clas_i),y_pred[i].shape)

    unique_lbls = np.unique(val_lbls_wt_clas_i)
    (val_lbls_wt_clas_i,val_lbls_wt_clas_i) = remap_labels(val_lbls_wt_clas_i,val_lbls_wt_clas_i,unique_lbls)
  
    temp = temp_cal(y_pred[i],val_lbls_wt_clas_i,len(unique_lbls))
    temp_val.append(temp)

[1 2 3 5 6 7 8 9] (12000, 8)
[1 2 3 5 6 7 8 9]
Temperature Initial value: 1.0
Temperature Final value: 2.2976980209350586
[0 2 3 5 6 7 8 9] (12000, 8)
[0 2 3 5 6 7 8 9]
Temperature Initial value: 1.0
Temperature Final value: 2.0789387226104736
[0 1 3 5 6 7 8 9] (12000, 8)
[0 1 3 5 6 7 8 9]
Temperature Initial value: 1.0
Temperature Final value: 2.0154008865356445
[0 1 2 5 6 7 8 9] (12000, 8)
[0 1 2 5 6 7 8 9]
Temperature Initial value: 1.0
Temperature Final value: 2.1836836338043213
[0 1 2 3 6 7 8 9] (12000, 8)
[0 1 2 3 6 7 8 9]
Temperature Initial value: 1.0
Temperature Final value: 2.12571120262146
[0 1 2 3 5 7 8 9] (12000, 8)
[0 1 2 3 5 7 8 9]
Temperature Initial value: 1.0
Temperature Final value: 1.9480950832366943
[0 1 2 3 5 6 8 9] (12000, 8)
[0 1 2 3 5 6 8 9]
Temperature Initial value: 1.0
Temperature Final value: 2.204376220703125
[0 1 2 3 5 6 7 9] (12000, 8)
[0 1 2 3 5 6 7 9]
Temperature Initial value: 1.0
Temperature Final value: 1.8560916185379028
[0 1 2 3 5 6 7 8] (12000, 8

In [None]:
temp_val

[<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.297698>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.0789387>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.015401>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.1836836>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.1257112>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=1.9480951>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.2043762>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=1.8560916>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.2226255>]

In [None]:
temp_val = [2.297698,2.0789387,2.015401,2.1836836,2.1257112,1.9480951,2.2043762,1.8560916,2.2226255]

**Method 1**
---



In [None]:
# labels --> gives the non anomalous classes ex: here labels are 0 1 2 3 4 5 6 7 9

In [None]:
from tqdm import tqdm

entropy_in = 0
count_entr_in = 0
avg_in = [0,0,0,0,0,0,0,0,0]
count_in = [0,0,0,0,0,0,0,0,0]

entropy_out = 0
count_entr_out = 0
avg_out = [0,0,0,0,0,0,0,0,0]
count_out = [0,0,0,0,0,0,0,0,0]

for img, lbl in tqdm(zip(val_imgs4, val_lbl4)):
  img = img.reshape([-1, 32, 32, 3])

  for i,anmls_lbl in enumerate(labels):
    if not anmls_lbl == lbl:
      logits = model_dict[i](img)
      logits = tf.math.divide(logits, temp_val[i])
      pred = tf.nn.softmax(logits)
      max_val = np.max(pred)
      avg_in[i] = avg_in[i] + max_val
      count_in[i] = count_in[i] + 1

      entrpy = entropy(pred[0], base=len(pred[0]))
      entropy_in = entropy_in + entrpy
      count_entr_in = count_entr_in + 1 

    if anmls_lbl == lbl:
      logits = model_dict[i](img)
      logits = tf.math.divide(logits, temp_val[i])
      pred = tf.nn.softmax(logits)
      max_val = np.max(pred)
      avg_out[i] = avg_out[i] + max_val
      count_out[i] = count_out[i] + 1

      entrpy = entropy(pred[0], base=len(pred[0]))
      entropy_out = entropy_out + entrpy
      count_entr_out = count_entr_out + 1 


13500it [18:04, 12.44it/s]


In [None]:
classifier_avg_in = [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]
classifier_avg_out = [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]
for i in range(9):
  classifier_avg_in[i] = avg_in[i]/count_in[i]
  classifier_avg_out[i] = avg_out[i]/count_out[i]

treshold_value_in = 0.0
treshold_value_out = 0.0

for i in range(9):
  treshold_value_in = treshold_value_in + classifier_avg_in[i]
  treshold_value_out = treshold_value_out + classifier_avg_out[i]

treshold_value_in = treshold_value_in/len(classifier_avg_in)
treshold_value_out = treshold_value_out/len(classifier_avg_out)


In [None]:
print(classifier_avg_in)
print(classifier_avg_out)

print(treshold_value_in)
print(treshold_value_out)

entropy_ref_in_4 = entropy_in/count_entr_in
entropy_ref_out_4 = entropy_out/ count_entr_out

print(entropy_ref_in_4)
print(entropy_ref_out_4)

[0.7546551184045772, 0.748217892413338, 0.7889474377719065, 0.804927050717175, 0.7720610727916162, 0.7625493102421363, 0.7479394470738868, 0.7589491735200087, 0.7631102531502644]
[0.6837335515121619, 0.7016880629161993, 0.6085633652408918, 0.6306739911735058, 0.6988095740477244, 0.6540117044548194, 0.5654309450387954, 0.7046142220298449, 0.6789801613092422]
0.7668174173427676
0.6585006197470206
0.3098283695286689
0.43893798281883956


In [None]:
open_file = open(os.path.join('reference_vector_in_out.pkl'), "wb")
pickle.dump((classifier_avg_in,classifier_avg_out,entropy_ref_in_4,entropy_ref_out_4), open_file)

In [None]:
open_file = open(os.path.join('reference_vector_in_out.pkl'), "rb")
ref_vector_in_4,ref_vector_out_4,entropy_ref_in_4,entropy_ref_out_4 = pickle.load(open_file)

In [None]:
max_sm_all_wt_4 = []
ood = 0
ind= 1
for data, label in tqdm(zip(test_imgs, test_lbls)):

  entr = 0
  img = data.reshape([-1, 32, 32, 3])
  prediction_sftmx = []
  prediction_lbl = []
  
  if label[0] == 4:
    lbl = ood
  else:
    lbl = ind
  
  for i,anmls_lbl in enumerate(labels):
    logits = model_dict[i](img)
    logits = tf.math.divide(logits, temp_val[i])
    pred = tf.nn.softmax(logits)

    prediction_sftmx.append(np.max(pred))

    pred_ind = tf.argmax(pred[0]).numpy()
    labels_removed_anmls = np.delete(labels,0)
    pred_lbl = labels_removed_anmls[pred_ind]   ### reverse mapping the actual label

    prediction_lbl.append(pred_lbl)

    entr = entr + entropy(pred[0], base=len(pred[0]))

  max_sm_all_wt_4.append([prediction_sftmx,prediction_lbl,label[0],lbl,entr/len(labels)])

10000it [13:47, 12.09it/s]


In [None]:
open_file = open(os.path.join('predictions_on_test.pkl'), "wb")
pickle.dump(max_sm_all_wt_4, open_file)

In [None]:
open_file = open(os.path.join('predictions_on_test.pkl'), "rb")
max_sm_all_wt_4 = pickle.load(open_file)

euclidean and entroy (sim score + euclidean id/od)

In [None]:
from scipy.spatial import distance

id = 1
ood = 0
sim_score_in =[]
sim_score_out = []
sim_score_entr_in =[]
sim_score_entr_out = []
sim_score_add = []
y_true = []
y_pred=[]
y_entr_pred=[]
y_true_ood = []

for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_4):

  #print(avg_entr, max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood)
  dist_in = distance.euclidean(max_pred_each_cls,ref_vector_in_4)
  dist_out = distance.euclidean(max_pred_each_cls,ref_vector_out_4)

  dist_entr_in = distance.euclidean(avg_entr,entropy_ref_in_4)
  dist_entr_out = distance.euclidean(avg_entr,entropy_ref_out_4)
  
  sim_in = 1 / (1 + dist_in)
  sim_out = 1/ (1 + dist_out)
  sim_add = sim_in + sim_out

  sim_entr_in = 1 / (1 + dist_entr_in)
  sim_entr_out = 1/ (1 + dist_entr_out)

  sim_score_in.append(sim_in)
  sim_score_out.append(sim_out)
  sim_score_entr_in.append(sim_entr_in)
  sim_score_entr_out.append(sim_entr_out)
  sim_score_add.append(sim_add)

  if sim_in > sim_out:
    prd = id 
  else:
    prd = ood 

  if sim_entr_in > sim_entr_out:
    prd_e = id
  else:
    prd_e = ood 
  
  if lbl_id_or_ood == 1:
    label_true_ood = 0
  elif lbl_id_or_ood ==0:
    label_true_ood = 1

  
  y_pred.append(prd)
  y_entr_pred.append(prd_e)
  y_true.append(lbl_id_or_ood)
  y_true_ood.append(label_true_ood)

100%|██████████| 10000/10000 [00:01<00:00, 5933.43it/s]


In [None]:
from sklearn.metrics import roc_auc_score  ##### weighted SVM
print('Auroc with prediction as ID or OOD  ',roc_auc_score(y_true, y_pred))
print('Auroc prediction with sim score to in distribution only  :',roc_auc_score(y_true,sim_score_in))
print('Auroc prediction with sim score to ood distribution only  :',roc_auc_score(y_true_ood,sim_score_out))

print('\nAuroc prediction with sim added score to for in and ood distribution only  :',roc_auc_score(y_true_ood,sim_score_add))

print('\nAuroc with prediction as ID or OOD Entropy ',roc_auc_score(y_true, y_entr_pred))
print('Auroc prediction with sim score to in distribution only Entropy  :',roc_auc_score(y_true,sim_score_entr_in))
print('Auroc prediction with sim score to out distribution only Entropy  :',roc_auc_score(y_true_ood,sim_score_entr_out))


Auroc with prediction as ID or OOD   0.7322777777777778
Auroc prediction with sim score to in distribution only  : 0.6921683333333333
Auroc prediction with sim score to ood distribution only  : 0.6640006666666667

Auroc prediction with sim added score to for in and ood distribution only  : 0.48793288888888886

Auroc with prediction as ID or OOD Entropy  0.7400555555555556
Auroc prediction with sim score to in distribution only Entropy  : 0.6809873333333333
Auroc prediction with sim score to out distribution only Entropy  : 0.6387094444444444


Decision Rule 

In [None]:
############################### simple decision rule , not taloinmg more than 7 or 8 just took majority prediction ##################

id = 1
ood = 0
y_true = []
y_pred=[]

for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_4):
  count = Counter(pred_lbl_each_cls)
  most_common= count.most_common(1)[0][0]

  if most_common == label_actual:
    pred = id
  else:
    pred = ood 
  y_pred.append(pred)
  y_true.append(lbl_id_or_ood)

print(' Auroc with simple decision rule about majority_:  ',roc_auc_score(y_true, y_pred))
  

100%|██████████| 10000/10000 [00:00<00:00, 173608.17it/s]

 Auroc with simple decision rule about majority_:   0.7436666666666667





In [None]:
id = 1
ood = 0

In [None]:
y_true_dec_rule = []
y_pred_mr_than8_ = []
y_pred_mr_than7_ = []
for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_4):
  values, counts = np.unique(pred_lbl_each_cls, return_counts=True)

  if np.max(counts) >= 7:
    y_pred_mr_than7_.append(id)
  else:
    y_pred_mr_than7_.append(ood)

  if np.max(counts) >= 8:
    y_pred_mr_than8_.append(id)
  else:
    y_pred_mr_than8_.append(ood)

  if label_actual == 4:
    y_true_dec_rule.append(ood)
  else:
    y_true_dec_rule.append(id)

print('\nAuroc for decision rule > 7 ',roc_auc_score(y_true_dec_rule,y_pred_mr_than7_))
print('Auroc for decision rule > 8 ',roc_auc_score(y_true_dec_rule,y_pred_mr_than8_))

100%|██████████| 10000/10000 [00:00<00:00, 25389.60it/s]



Auroc for decision rule > 7  0.6421111111111112
Auroc for decision rule > 8  0.5592222222222223


In [None]:
sim_score_in =[]
sim_score_entr_in =[]
y_true_ = []
y_pred_mr_than7_ = []
y_pred_mr_than8_ = []
y_entr_pred_mr_than7_ = []
y_entr_pred_mr_than8_ = []

for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_4):

  # print(avg_entr, max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood)
  dist_in = distance.euclidean(max_pred_each_cls,ref_vector_in_4)
  dist_out = distance.euclidean(max_pred_each_cls,ref_vector_out_4)

  dist_entr_in = distance.euclidean(avg_entr,entropy_ref_in_4)
  dist_entr_out = distance.euclidean(avg_entr,entropy_ref_out_4)
  
  sim_in = 1 / (1 + dist_in)
  sim_out = 1/ (1 + dist_out)

  sim_entr_in = 1 / (1 + dist_entr_in)
  sim_entr_out = 1/ (1 + dist_entr_out)

  sim_score_in.append(sim_in)
  sim_score_entr_in.append(sim_entr_in)

  values, counts = np.unique(pred_lbl_each_cls, return_counts=True)
#############################################
  if sim_in > sim_out:
    if  np.max(counts) >=7:
      y_pred_mr_than7_.append(id)
    else:
      y_pred_mr_than7_.append(ood)
  else:
    y_pred_mr_than7_.append(ood)
###############################################
  if sim_in > sim_out:
    if  np.max(counts) >=8:
      y_pred_mr_than8_.append(id)
    else:
      y_pred_mr_than8_.append(ood)
  else:
    y_pred_mr_than8_.append(ood)
##############################################
  if sim_entr_in > sim_entr_out:  
    if  np.max(counts) >=7:
      y_entr_pred_mr_than7_.append(id)
    else:
      y_entr_pred_mr_than7_.append(ood)
  else:
    y_entr_pred_mr_than7_.append(ood )
###############################################
  if sim_entr_in > sim_entr_out:  
    if  np.max(counts) >=8:
      y_entr_pred_mr_than8_.append(id)
    else:
      y_entr_pred_mr_than8_.append(ood)
  else:
    y_entr_pred_mr_than8_.append(ood )

  if label_actual == 4:
    y_true_.append(ood)
  else:
    y_true_.append(id)


100%|██████████| 10000/10000 [00:01<00:00, 6810.17it/s]


In [None]:
print('Decision Rule(>=7) + Reference Vector ',roc_auc_score(y_true_,y_pred_mr_than7_))
print('Decision Rule(>=8) + Reference Vector ',roc_auc_score(y_true_,y_pred_mr_than8_))
print('Decision Rule(>=7) + Entropy Reference Value ',roc_auc_score(y_true_,y_entr_pred_mr_than7_))
print('Decision Rule(>=8) + Entropy Reference Value ',roc_auc_score(y_true_,y_entr_pred_mr_than8_))

Decision Rule(>=7) + Reference Vector  0.645
Decision Rule(>=8) + Reference Vector  0.5580555555555555
Decision Rule(>=7) + Entropy Reference Value  0.6416666666666667
Decision Rule(>=8) + Entropy Reference Value  0.5568888888888889


**Method 2**
---



Normal weighted SVM




In [None]:
#########################  after appying temperature scaling and softmax  ###########################################
######################### the y_pred used is In + ood distribution for the ensemble classifier ############################
id = 1
ood = 0

new_dataset = []

for img, label in tqdm(zip(val_imgs4, val_lbl4)):
    img = img.reshape([-1,32,32,3])
    for i,lbl in enumerate(labels):
      if label == lbl:
        new_label = ood
      else :
        new_label = id

      m = model_dict[i]
      y_p = m.predict(img)
      temp_new_y_pred = temp_scaling(y_p,temp_val[i])
      new_softmax_y_pred = models[i].apply_softmax(temp_new_y_pred) 
      new_softmax_y_pred = np.array(new_softmax_y_pred).tolist()  
      new_softmax_y_pred = new_softmax_y_pred[0] 
      new_dataset.append([new_softmax_y_pred,new_label])
      

0it [00:00, ?it/s]



13500it [1:04:33,  3.49it/s]


In [None]:
df = pd.DataFrame(new_dataset, columns=["predictions", "label"])
predictions = df['predictions'].to_numpy()
new_labels = df['label'].to_numpy()

In [None]:
df.to_pickle("predictions_dataset.pkl")

In [None]:
df = pd.read_pickle('predictions_dataset.pkl')
df

Unnamed: 0,predictions,label
0,"[0.0010170280002057552, 0.8080684542655945, 0....",1
1,"[0.20009845495224, 0.6804748773574829, 0.04488...",1
2,"[0.25030115246772766, 0.0033070615027099848, 0...",0
3,"[0.007537683006376028, 0.00013322258018888533,...",1
4,"[0.005520546343177557, 0.001355655724182725, 0...",1
...,...,...
121495,"[0.001107115182094276, 0.0005691215046681464, ...",0
121496,"[0.014249563217163086, 0.0007176174549385905, ...",1
121497,"[0.00885818898677826, 2.966395641124109e-06, 0...",1
121498,"[0.0250957440584898, 0.00041510944720357656, 0...",1


In [None]:
from collections import Counter
Counter(new_labels)

Counter({0: 13500, 1: 108000})

In [None]:
in_data = []
out_data = []

for i in range(len(predictions)):
  if new_labels[i]== 1:
    in_data.append([predictions[i],new_labels[i]])
  else:
    out_data.append([predictions[i],new_labels[i]])

print(len(in_data),len(out_data))

# import random 
# in_data = random.sample(in_data,(len(out_data)))
# print(len(in_data))

108000 13500


In [None]:
in_data[1]

[[0.20009845495224,
  0.6804748773574829,
  0.04488324373960495,
  0.004525261931121349,
  0.008939871564507484,
  0.0017065599095076323,
  0.05924014374613762,
  0.00013152870815247297],
 1]

In [None]:
input = []
labels = []
for idata in in_data:
  input.append(idata[0])
  labels.append(idata[1])
  

for odata in out_data:
  input.append(odata[0])
  labels.append(odata[1])
# print(input,labels)
print(len(input),len(labels))

121500 121500


In [None]:
svm_train_ip, svm_test_ip, svm_train_lb, svm_test_lb = train_test_split(input, labels, test_size=0.20,stratify = labels)
print(len(svm_train_ip),len(svm_test_ip),len(svm_train_lb))

97200 24300 97200


In [None]:
from sklearn.svm import SVC
model = SVC(gamma='scale')

In [None]:
from sklearn.utils.class_weight import compute_class_weight
class_wts = compute_class_weight('balanced', np.unique(svm_train_lb), svm_train_lb)
class_wts ={0:class_wts[0], 1:class_wts[1]}
class_wts

{0: 4.5, 1: 0.5625}

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
clf = make_pipeline(StandardScaler(), SVC(gamma='auto',class_weight=class_wts))
clf.fit(svm_train_ip, svm_train_lb)

Pipeline(memory=None,
         steps=[('standardscaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('svc',
                 SVC(C=1.0, break_ties=False, cache_size=200,
                     class_weight={0: 4.5, 1: 0.5625}, coef0=0.0,
                     decision_function_shape='ovr', degree=3, gamma='auto',
                     kernel='rbf', max_iter=-1, probability=False,
                     random_state=None, shrinking=True, tol=0.001,
                     verbose=False))],
         verbose=False)

In [None]:
pred = clf.predict(svm_test_ip)
len(pred) ### svm fitted on whole data with weights

24300

In [None]:
clf.score(svm_test_ip,svm_test_lb) 

0.6525514403292181

In [None]:
y_true = []
y_pred = []
for img, label in tqdm(zip(test_imgs, test_lbls)):
    img= img.reshape([-1,32,32,3])
    
    svm_output =[]
    for i in range(0,9):
      m = model_dict[i]
      y_p = m.predict(img)
      temp_new_y_pred = temp_scaling(y_p,temp_val[i])
      new_softmax_y_pred = models[i].apply_softmax(temp_new_y_pred) 
      new_softmax_y_pred = np.array(new_softmax_y_pred).tolist()  
      new_softmax_y_pred = np.array(new_softmax_y_pred[0]).reshape([1,-1])
      # print(new_softmax_y_pred)
      a = clf.predict(new_softmax_y_pred)
      # print(a)

      svm_output.append(a[0])
    # print(svm_output)
 
    a = Counter(list(svm_output))
    b = a.most_common()[0][0]
    # print(b)
    y_pred.append(b)
    if not label == 4:
      y_true.append(1)
    else:
      y_true.append(0)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
5001it [26:03,  3.16it/s][A
5002it [26:03,  3.16it/s][A
5003it [26:03,  3.11it/s][A
5004it [26:04,  3.08it/s][A
5005it [26:04,  3.06it/s][A
5006it [26:04,  3.06it/s][A
5007it [26:05,  3.11it/s][A
5008it [26:05,  3.13it/s][A
5009it [26:05,  3.14it/s][A
5010it [26:05,  3.12it/s][A
5011it [26:06,  3.14it/s][A
5012it [26:06,  3.13it/s][A
5013it [26:06,  3.13it/s][A
5014it [26:07,  3.17it/s][A
5015it [26:07,  3.18it/s][A
5016it [26:07,  3.20it/s][A
5017it [26:08,  3.14it/s][A
5018it [26:08,  3.14it/s][A
5019it [26:08,  3.14it/s][A
5020it [26:09,  3.15it/s][A
5021it [26:09,  3.12it/s][A
5022it [26:09,  3.12it/s][A
5023it [26:10,  3.14it/s][A
5024it [26:10,  3.15it/s][A
5025it [26:10,  3.12it/s][A
5026it [26:11,  3.11it/s][A
5027it [26:11,  3.15it/s][A
5028it [26:11,  3.13it/s][A
5029it [26:12,  3.16it/s][A
5030it [26:12,  3.15it/s][A
5031it [26:12,  3.11it/s][A
5032it [26:12,  3.15it/s][A
5033it 

In [None]:
print(y_pred)
print(y_true)

[0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 

In [None]:
print(Counter(y_pred))
print(Counter(y_true))

Counter({1: 5831, 0: 4169})
Counter({1: 9000, 0: 1000})


In [None]:
from sklearn.metrics import roc_auc_score  ##### weighted SVM
roc_auc_score(y_true, y_pred)

0.6561666666666666

In [None]:
df1= pd.DataFrame(y_pred, columns=["y_pred"])
df2= pd.DataFrame(y_true, columns=["y_true"])
df = pd.concat([df1, df2], axis=1)
df

Unnamed: 0,y_pred,y_true
0,0,1
1,1,1
2,0,1
3,1,1
4,1,1
...,...,...
9995,0,1
9996,1,1
9997,0,1
9998,1,1


In [None]:
df.to_pickle("final_predictions.pkl")
df = pd.read_pickle('final_predictions.pkl')

In [None]:
y_true = df['y_true'].to_numpy().tolist()
y_pred = df['y_pred'].to_numpy().tolist()
print(y_true)
print(y_pred)

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 

# 3 as anomaly

In [None]:
import os
print(os.getcwd())
os.chdir("/content/gdrive/My Drive/Colab Notebooks/Method 2/CIFAR/Leave one class out/leave 3/")
# os.chdir("/content/gdrive/My Drive/leave 6/")
print(os.listdir())

/content/gdrive/My Drive/Colab Notebooks/Method 2/CIFAR/Leave one class out/leave 3
['predictions_on_test.pkl', 'reference_vector_in_out.pkl']


In [None]:
# train_imgs3, val_imgs3 , train_lbl3, val_lbl3 = remove_anomalous_class(train_imgs,train_lbls,val_req=True,anomalous_class=[3])
# print(train_imgs3.shape, val_imgs3.shape , train_lbl3.shape, val_lbl3.shape)
# open_file = open(os.path.join('Training_data.pkl'), "wb")
# pickle.dump((train_imgs3, val_imgs3 , train_lbl3, val_lbl3 ), open_file)

(31500, 32, 32, 3) (13500, 32, 32, 3) (31500, 1) (13500, 1)


In [None]:
open_file = open(os.path.join('Training_data.pkl'), "rb")
(train_imgs3, val_imgs3 , train_lbl3, val_lbl3 )= pickle.load(open_file)
print(train_imgs3.shape, val_imgs3.shape , train_lbl3.shape, val_lbl3.shape)

(31500, 32, 32, 3) (13500, 32, 32, 3) (31500, 1) (13500, 1)


In [None]:
print( np.unique(train_lbl3),len(np.unique(train_lbl3)))

[0 1 2 4 5 6 7 8 9] 9


In [None]:
models = []
for i in range(len(np.unique(train_lbl3))):
  models.append(Model(name='3anomaly:classifier'+str(i)))

names = [m.name for m in models]
names

['3anomaly:classifier0',
 '3anomaly:classifier1',
 '3anomaly:classifier2',
 '3anomaly:classifier3',
 '3anomaly:classifier4',
 '3anomaly:classifier5',
 '3anomaly:classifier6',
 '3anomaly:classifier7',
 '3anomaly:classifier8']

In [None]:
def remap_labels(train_lbl,val_lbl,unique_lbl):
  print(unique_lbl)
  index_map = {unique_lbl[0]:0, unique_lbl[1]:1,unique_lbl[2]:2,unique_lbl[3]:3,unique_lbl[4]:4,unique_lbl[5]:5,unique_lbl[6]:6,unique_lbl[7]:7}
  train_lbl = np.array([index_map[int(x)] for x in train_lbl ])
  val_lbl = np.array([index_map[int(x)] for x in val_lbl ])

  return train_lbl,val_lbl

In [None]:
labels = np.unique(train_lbl3)
labels

array([0, 1, 2, 4, 5, 6, 7, 8, 9], dtype=int32)

In [None]:
# for indx,i in enumerate(labels):
#   # if i == 9:
      
#     train_imgs_wt_clas_i, train_lbls_wt_clas_i = remove_anomalous_class(train_imgs3,train_lbl3,val_req= False, anomalous_class = [i])
#     val_imgs_wt_clas_i, val_lbls__wt_clas_i = remove_anomalous_class(val_imgs3,val_lbl3,val_req= False, anomalous_class = [i])
#     unique_lbls = np.unique(train_lbls_wt_clas_i)
#     print('before remapping',np.unique(train_lbls_wt_clas_i),np.unique(val_lbls__wt_clas_i))
#     (train_lbls_wt_clas_i,val_lbls__wt_clas_i) = remap_labels(train_lbls_wt_clas_i,val_lbls__wt_clas_i,unique_lbls)
#     print(np.unique(train_lbls_wt_clas_i),np.unique(val_lbls__wt_clas_i))
#     models[indx].build_model(8)
#     models[indx].train(train_imgs_wt_clas_i, train_lbls_wt_clas_i, val_imgs_wt_clas_i, val_lbls__wt_clas_i)
#     models[indx].save_model('classifier'+str(indx))

In [None]:
train_loss = []
train_acc = []
val_loss = []
val_acc = []

for indx,i in enumerate(labels):
  # if i == 9:
      
    train_imgs_wt_clas_i, train_lbls_wt_clas_i = remove_anomalous_class(train_imgs3,train_lbl3,val_req= False, anomalous_class = [i])
    val_imgs_wt_clas_i, val_lbls__wt_clas_i = remove_anomalous_class(val_imgs3,val_lbl3,val_req= False, anomalous_class = [i])
    unique_lbls = np.unique(train_lbls_wt_clas_i)
    (train_lbls_wt_clas_i,val_lbls__wt_clas_i) = remap_labels(train_lbls_wt_clas_i,val_lbls__wt_clas_i,unique_lbls)
    # print(np.unique(train_lbls_wt_clas_i),np.unique(val_lbls__wt_clas_i))
    model = models[indx].load_model('classifier'+str(indx))
    train_ = model.evaluate(train_imgs_wt_clas_i, train_lbls_wt_clas_i)
    val_ = model.evaluate(val_imgs_wt_clas_i, val_lbls__wt_clas_i)
    train_loss.append(train_[0])
    train_acc.append(train_[1])
    val_loss.append(val_[0])
    val_acc.append(val_[1])

[1 2 4 5 6 7 8 9]
[0 2 4 5 6 7 8 9]
[0 1 4 5 6 7 8 9]
[0 1 2 5 6 7 8 9]
[0 1 2 4 6 7 8 9]
[0 1 2 4 5 7 8 9]
[0 1 2 4 5 6 8 9]
[0 1 2 4 5 6 7 9]
[0 1 2 4 5 6 7 8]


In [None]:
print('training accuracy average :',np.average(train_acc))
print('validation accuracy average :',np.average(val_acc))

training accuracy average : 0.8986031810442606
validation accuracy average : 0.7764074073897468


In [None]:
print(train_acc)
print(val_acc)

[0.8471428751945496, 0.874750018119812, 0.9120000004768372, 0.9027857184410095, 0.8771785497665405, 0.9272857308387756, 0.916357159614563, 0.9272500276565552, 0.9026785492897034]
[0.7361666560173035, 0.749916672706604, 0.796583354473114, 0.7860833406448364, 0.7684166431427002, 0.7887499928474426, 0.796833336353302, 0.7796666622161865, 0.7852500081062317]


In [None]:
classifiers = [models[i].load_model('classifier'+str(i)) for i in range(len(models))]

In [None]:
classifiers

[<tensorflow.python.keras.engine.sequential.Sequential at 0x7fd8b27dc990>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7fd8b11be050>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7fd8b6301d10>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7fd8b11eaa50>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7fd8b4e8f650>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7fd8b2708a50>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7fd8afd83f10>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7fd8b1086410>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7fd8af221ed0>]

In [None]:
layer_name_list = [classifier.layers[-2:][0].name for classifier in classifiers]
layer_name_list

['dense_3',
 'dense_5',
 'dense_7',
 'dense_9',
 'dense_11',
 'dense_13',
 'dense_15',
 'dense_17',
 'dense_19']

In [None]:
from keras import Model
model_dict = {}

for i in range(len(models)):
  model_output_i = classifiers[i].get_layer(layer_name_list[i]).output
  m0 = Model(inputs=classifiers[i].input, outputs=model_output_i)
  model_dict[i] = m0

In [None]:
y_pred = []
for i,label  in enumerate(labels):
  val_imgs_wt_clas_i, val_lbls_wt_clas_i = remove_anomalous_class(val_imgs3,val_lbl3,val_req= False, anomalous_class = [label])
  m = model_dict[i]
  print(label,np.unique(val_lbls_wt_clas_i))
  # m.summary()
  y_p = m.predict(val_imgs_wt_clas_i)
  y_pred.append(y_p)

0 [1 2 4 5 6 7 8 9]
1 [0 2 4 5 6 7 8 9]
2 [0 1 4 5 6 7 8 9]
4 [0 1 2 5 6 7 8 9]
5 [0 1 2 4 6 7 8 9]
6 [0 1 2 4 5 7 8 9]
7 [0 1 2 4 5 6 8 9]
8 [0 1 2 4 5 6 7 9]
9 [0 1 2 4 5 6 7 8]


In [None]:
len(y_pred)

9

In [None]:
temp_val = []
for i,label in enumerate(labels):
  # if i == 8:
    val_imgs_wt_clas_i, val_lbls_wt_clas_i = remove_anomalous_class(val_imgs3,val_lbl3,val_req= False, anomalous_class = [label])
    print(np.unique(val_lbls_wt_clas_i),y_pred[i].shape)

    unique_lbls = np.unique(val_lbls_wt_clas_i)
    (val_lbls_wt_clas_i,val_lbls_wt_clas_i) = remap_labels(val_lbls_wt_clas_i,val_lbls_wt_clas_i,unique_lbls)
  
    temp = temp_cal(y_pred[i],val_lbls_wt_clas_i,len(unique_lbls))
    temp_val.append(temp)

In [None]:
temp_val

[<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.276033>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.1995564>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.147684>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.1348383>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.267925>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.042225>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=1.9683046>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.090059>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=1.8560942>]

In [None]:
temp_val = [2.276033,2.1995564,2.147684,2.1348383,2.267925,2.042225,1.9683046,2.090059,1.8560942]

**Method 1**
---



In [None]:
# labels --> gives the non anomalous classes ex: here labels are 0 1 2 3 4 5 6 7 9

In [None]:
from tqdm import tqdm

entropy_in = 0
count_entr_in = 0
avg_in = [0,0,0,0,0,0,0,0,0]
count_in = [0,0,0,0,0,0,0,0,0]

entropy_out = 0
count_entr_out = 0
avg_out = [0,0,0,0,0,0,0,0,0]
count_out = [0,0,0,0,0,0,0,0,0]

for img, lbl in tqdm(zip(val_imgs3, val_lbl3)):
  img = img.reshape([-1, 32, 32, 3])

  for i,anmls_lbl in enumerate(labels):
    if not anmls_lbl == lbl:
      logits = model_dict[i](img)
      logits = tf.math.divide(logits, temp_val[i])
      pred = tf.nn.softmax(logits)
      max_val = np.max(pred)
      avg_in[i] = avg_in[i] + max_val
      count_in[i] = count_in[i] + 1

      entrpy = entropy(pred[0], base=len(pred[0]))
      entropy_in = entropy_in + entrpy
      count_entr_in = count_entr_in + 1 

    if anmls_lbl == lbl:
      logits = model_dict[i](img)
      logits = tf.math.divide(logits, temp_val[i])
      pred = tf.nn.softmax(logits)
      max_val = np.max(pred)
      avg_out[i] = avg_out[i] + max_val
      count_out[i] = count_out[i] + 1

      entrpy = entropy(pred[0], base=len(pred[0]))
      entropy_out = entropy_out + entrpy
      count_entr_out = count_entr_out + 1 


In [None]:
classifier_avg_in = [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]
classifier_avg_out = [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]
for i in range(9):
  classifier_avg_in[i] = avg_in[i]/count_in[i]
  classifier_avg_out[i] = avg_out[i]/count_out[i]

treshold_value_in = 0.0
treshold_value_out = 0.0

for i in range(9):
  treshold_value_in = treshold_value_in + classifier_avg_in[i]
  treshold_value_out = treshold_value_out + classifier_avg_out[i]

treshold_value_in = treshold_value_in/len(classifier_avg_in)
treshold_value_out = treshold_value_out/len(classifier_avg_out)


In [None]:
print(classifier_avg_in)
print(classifier_avg_out)

print(treshold_value_in)
print(treshold_value_out)

entropy_ref_in_3 = entropy_in/count_entr_in
entropy_ref_out_3 = entropy_out/ count_entr_out

print(entropy_ref_in_3)
print(entropy_ref_out_3)

[0.7501892118851344, 0.7596409913860261, 0.7978991115652024, 0.7912408508012693, 0.7847143651681642, 0.7859532431041201, 0.7874377566153804, 0.7789744241833687, 0.7772557054969171]
[0.7246537010570367, 0.7046174917419752, 0.6460616075396538, 0.598983429312706, 0.6575559097131093, 0.6473199038902918, 0.6608945380151272, 0.6965498741964499, 0.7161806341807048]
0.779256184467287
0.672535232183006
0.2956545846560194
0.4244816606140079


In [None]:
open_file = open(os.path.join('reference_vector_in_out.pkl'), "wb")
pickle.dump((classifier_avg_in,classifier_avg_out,entropy_ref_in_3,entropy_ref_out_3), open_file)

In [None]:
open_file = open(os.path.join('reference_vector_in_out.pkl'), "rb")
ref_vector_in_3,ref_vector_out_3,entropy_ref_in_3,entropy_ref_out_3 = pickle.load(open_file)

In [None]:
max_sm_all_wt_3 = []
ood = 0
ind= 1
for data, label in tqdm(zip(test_imgs, test_lbls)):

  entr = 0
  img = data.reshape([-1, 32, 32, 3])
  prediction_sftmx = []
  prediction_lbl = []
  
  if label[0] == 3:
    lbl = ood
  else:
    lbl = ind
  
  for i,anmls_lbl in enumerate(labels):
    logits = model_dict[i](img)
    logits = tf.math.divide(logits, temp_val[i])
    pred = tf.nn.softmax(logits)

    prediction_sftmx.append(np.max(pred))

    pred_ind = tf.argmax(pred[0]).numpy()
    labels_removed_anmls = np.delete(labels,0)
    pred_lbl = labels_removed_anmls[pred_ind]   ### reverse mapping the actual label

    prediction_lbl.append(pred_lbl)

    entr = entr + entropy(pred[0], base=len(pred[0]))

  max_sm_all_wt_3.append([prediction_sftmx,prediction_lbl,label[0],lbl,entr/len(labels)])

In [None]:
open_file = open(os.path.join('predictions_on_test.pkl'), "wb")
pickle.dump(max_sm_all_wt_3, open_file)

In [None]:
open_file = open(os.path.join('predictions_on_test.pkl'), "rb")
max_sm_all_wt_3 = pickle.load(open_file)

euclidean and entroy (sim score + euclidean id/od)

In [None]:
from scipy.spatial import distance

id = 1
ood = 0
sim_score_in =[]
sim_score_out = []
sim_score_entr_in =[]
sim_score_entr_out = []
sim_score_add = []
y_true = []
y_pred=[]
y_entr_pred=[]
y_true_ood = []

for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_3):

  #print(avg_entr, max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood)
  dist_in = distance.euclidean(max_pred_each_cls,ref_vector_in_3)
  dist_out = distance.euclidean(max_pred_each_cls,ref_vector_out_3)

  dist_entr_in = distance.euclidean(avg_entr,entropy_ref_in_3)
  dist_entr_out = distance.euclidean(avg_entr,entropy_ref_out_3)
  
  sim_in = 1 / (1 + dist_in)
  sim_out = 1/ (1 + dist_out)
  sim_add = sim_in + sim_out

  sim_entr_in = 1 / (1 + dist_entr_in)
  sim_entr_out = 1/ (1 + dist_entr_out)

  sim_score_in.append(sim_in)
  sim_score_out.append(sim_out)
  sim_score_entr_in.append(sim_entr_in)
  sim_score_entr_out.append(sim_entr_out)
  sim_score_add.append(sim_add)

  if sim_in > sim_out:
    prd = id 
  else:
    prd = ood 

  if sim_entr_in > sim_entr_out:
    prd_e = id
  else:
    prd_e = ood 
  
  if lbl_id_or_ood == 1:
    label_true_ood = 0
  elif lbl_id_or_ood ==0:
    label_true_ood = 1

  
  y_pred.append(prd)
  y_entr_pred.append(prd_e)
  y_true.append(lbl_id_or_ood)
  y_true_ood.append(label_true_ood)

100%|██████████| 10000/10000 [00:01<00:00, 6372.50it/s]


In [None]:
from sklearn.metrics import roc_auc_score  ##### weighted SVM
print('Auroc with prediction as ID or OOD  ',roc_auc_score(y_true, y_pred))
print('Auroc prediction with sim score to in distribution only  :',roc_auc_score(y_true,sim_score_in))
print('Auroc prediction with sim score to ood distribution only  :',roc_auc_score(y_true_ood,sim_score_out))

print('\nAuroc prediction with sim added score to for in and ood distribution only  :',roc_auc_score(y_true_ood,sim_score_add))

print('\nAuroc with prediction as ID or OOD Entropy ',roc_auc_score(y_true, y_entr_pred))
print('Auroc prediction with sim score to in distribution only Entropy  :',roc_auc_score(y_true,sim_score_entr_in))
print('Auroc prediction with sim score to out distribution only Entropy  :',roc_auc_score(y_true_ood,sim_score_entr_out))

Auroc with prediction as ID or OOD   0.6999444444444445
Auroc prediction with sim score to in distribution only  : 0.6861731111111111
Auroc prediction with sim score to ood distribution only  : 0.6236944444444443

Auroc prediction with sim added score to for in and ood distribution only  : 0.45428322222222217

Auroc with prediction as ID or OOD Entropy  0.7076111111111111
Auroc prediction with sim score to in distribution only Entropy  : 0.637313888888889
Auroc prediction with sim score to out distribution only Entropy  : 0.6358385555555555


Decision Rule 

In [None]:
############################### simple decision rule , not taloinmg more than 7 or 8 just took majority prediction ##################

id = 1
ood = 0
y_true = []
y_pred=[]

for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_3):
  count = Counter(pred_lbl_each_cls)
  most_common= count.most_common(1)[0][0]

  if most_common == label_actual:
    pred = id
  else:
    pred = ood 
  y_pred.append(pred)
  y_true.append(lbl_id_or_ood)

print(' Auroc with simple decision rule about majority_:  ',roc_auc_score(y_true, y_pred))
  


100%|██████████| 10000/10000 [00:00<00:00, 182791.80it/s]

 Auroc with simple decision rule about majority_:   0.7336666666666667





In [None]:
id = 1
ood = 0

In [None]:
y_true_dec_rule = []
y_pred_mr_than8_3 = []
y_pred_mr_than7_3 = []
for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_3):
  values, counts = np.unique(pred_lbl_each_cls, return_counts=True)

  if np.max(counts) >= 7:
    y_pred_mr_than7_3.append(id)
  else:
    y_pred_mr_than7_3.append(ood)

  if np.max(counts) >= 8:
    y_pred_mr_than8_3.append(id)
  else:
    y_pred_mr_than8_3.append(ood)

  if label_actual == 3:
    y_true_dec_rule.append(ood)
  else:
    y_true_dec_rule.append(id)

print('\nAuroc for decision rule > 7 ',roc_auc_score(y_true_dec_rule,y_pred_mr_than7_3))
print('Auroc for decision rule > 8 ',roc_auc_score(y_true_dec_rule,y_pred_mr_than8_3))

100%|██████████| 10000/10000 [00:00<00:00, 26643.02it/s]


Auroc for decision rule > 7  0.6318888888888888
Auroc for decision rule > 8  0.5469444444444445





In [None]:
sim_score_in =[]
sim_score_entr_in =[]
y_true_ = []
y_pred_mr_than7_3 = []
y_pred_mr_than8_3 = []
y_entr_pred_mr_than7_3 = []
y_entr_pred_mr_than8_3 = []

for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_3):

  # print(avg_entr, max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood)
  dist_in = distance.euclidean(max_pred_each_cls,ref_vector_in_3)
  dist_out = distance.euclidean(max_pred_each_cls,ref_vector_out_3)

  dist_entr_in = distance.euclidean(avg_entr,entropy_ref_in_3)
  dist_entr_out = distance.euclidean(avg_entr,entropy_ref_out_3)
  
  sim_in = 1 / (1 + dist_in)
  sim_out = 1/ (1 + dist_out)

  sim_entr_in = 1 / (1 + dist_entr_in)
  sim_entr_out = 1/ (1 + dist_entr_out)

  sim_score_in.append(sim_in)
  sim_score_entr_in.append(sim_entr_in)

  values, counts = np.unique(pred_lbl_each_cls, return_counts=True)
#############################################
  if sim_in > sim_out:
    if  np.max(counts) >=7:
      y_pred_mr_than7_3.append(id)
    else:
      y_pred_mr_than7_3.append(ood)
  else:
    y_pred_mr_than7_3.append(ood)
###############################################
  if sim_in > sim_out:
    if  np.max(counts) >=8:
      y_pred_mr_than8_3.append(id)
    else:
      y_pred_mr_than8_3.append(ood)
  else:
    y_pred_mr_than8_3.append(ood)
##############################################
  if sim_entr_in > sim_entr_out:  
    if  np.max(counts) >=7:
      y_entr_pred_mr_than7_3.append(id)
    else:
      y_entr_pred_mr_than7_3.append(ood)
  else:
    y_entr_pred_mr_than7_3.append(ood )
###############################################
  if sim_entr_in > sim_entr_out:  
    if  np.max(counts) >=8:
      y_entr_pred_mr_than8_3.append(id)
    else:
      y_entr_pred_mr_than8_3.append(ood)
  else:
    y_entr_pred_mr_than8_3.append(ood )

  if label_actual == 3:
    y_true_.append(ood)
  else:
    y_true_.append(id)


100%|██████████| 10000/10000 [00:01<00:00, 8360.61it/s]


In [None]:
print('Decision Rule(>=7) + Reference Vector ',roc_auc_score(y_true_,y_pred_mr_than7_3))
print('Decision Rule(>=8) + Reference Vector ',roc_auc_score(y_true_,y_pred_mr_than8_3))
print('Decision Rule(>=7) + Entropy Reference Value ',roc_auc_score(y_true_,y_entr_pred_mr_than7_3))
print('Decision Rule(>=8) + Entropy Reference Value ',roc_auc_score(y_true_,y_entr_pred_mr_than8_3))

Decision Rule(>=7) + Reference Vector  0.6315000000000001
Decision Rule(>=8) + Reference Vector  0.5456666666666667
Decision Rule(>=7) + Entropy Reference Value  0.6277777777777778
Decision Rule(>=8) + Entropy Reference Value  0.5435555555555556


**Method 2**
---



Normal weighted SVM




In [None]:
#########################  after appying temperature scaling and softmax  ###########################################
######################### the y_pred used is In + ood distribution for the ensemble classifier ############################
id = 1
ood = 0

new_dataset = []

for img, label in tqdm(zip(val_imgs3, val_lbl3)):
    img = img.reshape([-1,32,32,3])
    for i,lbl in enumerate(labels):
      if label == lbl:
        new_label = ood
      else :
        new_label = id

      m = model_dict[i]
      y_p = m.predict(img)
      temp_new_y_pred = temp_scaling(y_p,temp_val[i])
      new_softmax_y_pred = models[i].apply_softmax(temp_new_y_pred) 
      new_softmax_y_pred = np.array(new_softmax_y_pred).tolist()  
      new_softmax_y_pred = new_softmax_y_pred[0] 
      new_dataset.append([new_softmax_y_pred,new_label])
      

0it [00:00, ?it/s]



13500it [1:01:15,  3.67it/s]


In [None]:
df = pd.DataFrame(new_dataset, columns=["predictions", "label"])
predictions = df['predictions'].to_numpy()
new_labels = df['label'].to_numpy()

In [None]:
df.to_pickle("predictions_dataset.pkl")

In [None]:
df = pd.read_pickle('predictions_dataset.pkl')
df

Unnamed: 0,predictions,label
0,"[0.00020086750737391412, 0.16259394586086273, ...",1
1,"[0.0008949937182478607, 0.015803581103682518, ...",1
2,"[0.0002254879509564489, 3.738254292784404e-07,...",1
3,"[0.00021570987883023918, 2.9290036763995886e-0...",1
4,"[3.1819021387491375e-05, 4.748289484268753e-06...",1
...,...,...
121495,"[0.018406489863991737, 0.00011706836812663823,...",1
121496,"[0.004625416826456785, 0.0001379979366902262, ...",1
121497,"[0.0027103694155812263, 2.0260331439203583e-05...",1
121498,"[0.000491984304971993, 6.979791214689612e-05, ...",1


In [None]:
from collections import Counter
Counter(new_labels)

Counter({0: 13500, 1: 108000})

In [None]:
in_data = []
out_data = []

for i in range(len(predictions)):
  if new_labels[i]== 1:
    in_data.append([predictions[i],new_labels[i]])
  else:
    out_data.append([predictions[i],new_labels[i]])

print(len(in_data),len(out_data))

# import random 
# in_data = random.sample(in_data,(len(out_data)))
# print(len(in_data))

108000 13500


In [None]:
in_data[1]

[[0.0008949937182478607,
  0.015803581103682518,
  0.018093254417181015,
  0.05366850271821022,
  0.8415220975875854,
  0.06980808824300766,
  2.1229579942882992e-05,
  0.0001882278302218765],
 1]

In [None]:
input = []
labels = []
for idata in in_data:
  input.append(idata[0])
  labels.append(idata[1])
  

for odata in out_data:
  input.append(odata[0])
  labels.append(odata[1])
# print(input,labels)
print(len(input),len(labels))

121500 121500


In [None]:
svm_train_ip, svm_test_ip, svm_train_lb, svm_test_lb = train_test_split(input, labels, test_size=0.20,stratify = labels)
print(len(svm_train_ip),len(svm_test_ip),len(svm_train_lb))

97200 24300 97200


In [None]:
from sklearn.svm import SVC
model = SVC(gamma='scale')

In [None]:
from sklearn.utils.class_weight import compute_class_weight
class_wts = compute_class_weight('balanced', np.unique(svm_train_lb), svm_train_lb)
class_wts ={0:class_wts[0], 1:class_wts[1]}
class_wts

{0: 4.5, 1: 0.5625}

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
clf = make_pipeline(StandardScaler(), SVC(gamma='auto',class_weight=class_wts))
clf.fit(svm_train_ip, svm_train_lb)

Pipeline(memory=None,
         steps=[('standardscaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('svc',
                 SVC(C=1.0, break_ties=False, cache_size=200,
                     class_weight={0: 4.5, 1: 0.5625}, coef0=0.0,
                     decision_function_shape='ovr', degree=3, gamma='auto',
                     kernel='rbf', max_iter=-1, probability=False,
                     random_state=None, shrinking=True, tol=0.001,
                     verbose=False))],
         verbose=False)

In [None]:
pred = clf.predict(svm_test_ip)
len(pred) ### svm fitted on whole data with weights

24300

In [None]:
clf.score(svm_test_ip,svm_test_lb) 

0.6651028806584363

In [None]:
y_true = []
y_pred = []
for img, label in tqdm(zip(test_imgs, test_lbls)):
    img= img.reshape([-1,32,32,3])
    
    svm_output =[]
    for i in range(0,9):
      m = model_dict[i]
      y_p = m.predict(img)
      temp_new_y_pred = temp_scaling(y_p,temp_val[i])
      new_softmax_y_pred = models[i].apply_softmax(temp_new_y_pred) 
      new_softmax_y_pred = np.array(new_softmax_y_pred).tolist()  
      new_softmax_y_pred = np.array(new_softmax_y_pred[0]).reshape([1,-1])
      # print(new_softmax_y_pred)
      a = clf.predict(new_softmax_y_pred)
      # print(a)

      svm_output.append(a[0])
    # print(svm_output)
 
    a = Counter(list(svm_output))
    b = a.most_common()[0][0]
    # print(b)
    y_pred.append(b)
    if not label == 3:
      y_true.append(1)
    else:
      y_true.append(0)

10000it [49:07,  3.39it/s]


In [None]:
print(y_pred)
print(y_true)

[0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 

In [None]:
print(Counter(y_pred))
print(Counter(y_true))

Counter({1: 6111, 0: 3889})
Counter({1: 9000, 0: 1000})


In [None]:
from sklearn.metrics import roc_auc_score  ##### weighted SVM
roc_auc_score(y_true, y_pred)

0.7322777777777778

In [None]:
df1= pd.DataFrame(y_pred, columns=["y_pred"])
df2= pd.DataFrame(y_true, columns=["y_true"])
df = pd.concat([df1, df2], axis=1)
df

Unnamed: 0,y_pred,y_true
0,0,0
1,1,1
2,0,1
3,1,1
4,1,1
...,...,...
9995,0,1
9996,0,0
9997,1,1
9998,0,1


In [None]:
df.to_pickle("final_predictions.pkl")
df = pd.read_pickle('final_predictions.pkl')

In [None]:
y_true = df['y_true'].to_numpy().tolist()
y_pred = df['y_pred'].to_numpy().tolist()
print(y_true)
print(y_pred)

[0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 

# 2 as anomaly

In [None]:
import os
print(os.getcwd())
os.chdir("/content/gdrive/My Drive/Colab Notebooks/Method 2/CIFAR/Leave one class out/leave 2/")
# os.chdir("/content/gdrive/My Drive/leave 6/")
print(os.listdir())

/content/gdrive/My Drive/Colab Notebooks/Method 2/CIFAR/Leave one class out/leave 0
['predictions_on_test.pkl', 'reference_vector_in_out.pkl']


In [None]:
train_imgs2, val_imgs2 , train_lbl2, val_lbl2 = remove_anomalous_class(train_imgs,train_lbls,val_req=True,anomalous_class=[2])
print(train_imgs2.shape, val_imgs2.shape , train_lbl2.shape, val_lbl2.shape)
open_file = open(os.path.join('Training_data.pkl'), "wb")
pickle.dump((train_imgs2, val_imgs2 , train_lbl2, val_lbl2 ), open_file)

(31500, 32, 32, 3) (13500, 32, 32, 3) (31500, 1) (13500, 1)


In [None]:
open_file = open(os.path.join('Training_data.pkl'), "rb")
(train_imgs2, val_imgs2 , train_lbl2, val_lbl2 )= pickle.load(open_file)
print(train_imgs2.shape, val_imgs2.shape , train_lbl2.shape, val_lbl2.shape)

(31500, 32, 32, 3) (13500, 32, 32, 3) (31500, 1) (13500, 1)


In [None]:
print( np.unique(train_lbl2),len(np.unique(train_lbl2)))

[0 1 3 4 5 6 7 8 9] 9


In [None]:
models = []
for i in range(len(np.unique(train_lbl2))):
  models.append(Model(name='2anomaly:classifier'+str(i)))

names = [m.name for m in models]
names

['2anomaly:classifier0',
 '2anomaly:classifier1',
 '2anomaly:classifier2',
 '2anomaly:classifier3',
 '2anomaly:classifier4',
 '2anomaly:classifier5',
 '2anomaly:classifier6',
 '2anomaly:classifier7',
 '2anomaly:classifier8']

In [None]:
def remap_labels(train_lbl,val_lbl,unique_lbl):
  print(unique_lbl)
  index_map = {unique_lbl[0]:0, unique_lbl[1]:1,unique_lbl[2]:2,unique_lbl[3]:3,unique_lbl[4]:4,unique_lbl[5]:5,unique_lbl[6]:6,unique_lbl[7]:7}
  train_lbl = np.array([index_map[int(x)] for x in train_lbl ])
  val_lbl = np.array([index_map[int(x)] for x in val_lbl ])

  return train_lbl,val_lbl

In [None]:
labels = np.unique(train_lbl2)
labels

array([0, 1, 3, 4, 5, 6, 7, 8, 9], dtype=int32)

In [None]:
for indx,i in enumerate(labels):
  # if i == 9:
      
    train_imgs_wt_clas_i, train_lbls_wt_clas_i = remove_anomalous_class(train_imgs2,train_lbl2,val_req= False, anomalous_class = [i])
    val_imgs_wt_clas_i, val_lbls__wt_clas_i = remove_anomalous_class(val_imgs2,val_lbl2,val_req= False, anomalous_class = [i])
    unique_lbls = np.unique(train_lbls_wt_clas_i)
    print('before remapping',np.unique(train_lbls_wt_clas_i),np.unique(val_lbls__wt_clas_i))
    (train_lbls_wt_clas_i,val_lbls__wt_clas_i) = remap_labels(train_lbls_wt_clas_i,val_lbls__wt_clas_i,unique_lbls)
    print(np.unique(train_lbls_wt_clas_i),np.unique(val_lbls__wt_clas_i))
    models[indx].build_model(8)
    models[indx].train(train_imgs_wt_clas_i, train_lbls_wt_clas_i, val_imgs_wt_clas_i, val_lbls__wt_clas_i)
    models[indx].save_model('classifier'+str(indx))

In [None]:
train_loss = []
train_acc = []
val_loss = []
val_acc = []

for indx,i in enumerate(labels):
  # if i == 9:
      
    train_imgs_wt_clas_i, train_lbls_wt_clas_i = remove_anomalous_class(train_imgs2,train_lbl2,val_req= False, anomalous_class = [i])
    val_imgs_wt_clas_i, val_lbls__wt_clas_i = remove_anomalous_class(val_imgs2,val_lbl2,val_req= False, anomalous_class = [i])
    unique_lbls = np.unique(train_lbls_wt_clas_i)
    (train_lbls_wt_clas_i,val_lbls__wt_clas_i) = remap_labels(train_lbls_wt_clas_i,val_lbls__wt_clas_i,unique_lbls)
    # print(np.unique(train_lbls_wt_clas_i),np.unique(val_lbls__wt_clas_i))
    model = models[indx].load_model('classifier'+str(indx))
    train_ = model.evaluate(train_imgs_wt_clas_i, train_lbls_wt_clas_i)
    val_ = model.evaluate(val_imgs_wt_clas_i, val_lbls__wt_clas_i)
    train_loss.append(train_[0])
    train_acc.append(train_[1])
    val_loss.append(val_[0])
    val_acc.append(val_[1])

[1 3 4 5 6 7 8 9]
[0 3 4 5 6 7 8 9]
[0 1 4 5 6 7 8 9]
[0 1 3 5 6 7 8 9]
[0 1 3 4 6 7 8 9]
[0 1 3 4 5 7 8 9]
[0 1 3 4 5 6 8 9]
[0 1 3 4 5 6 7 9]
[0 1 3 4 5 6 7 8]


In [None]:
print('training accuracy average :',np.average(train_acc))
print('validation accuracy average :',np.average(val_acc))

training accuracy average : 0.8838134871588813
validation accuracy average : 0.7596296270688375


In [None]:
print(train_acc)
print(val_acc)

[0.9018928408622742, 0.8645356893539429, 0.926892876625061, 0.8718571662902832, 0.8947857022285461, 0.8243571519851685, 0.8860714435577393, 0.8934999704360962, 0.8904285430908203]
[0.7605833411216736, 0.737416684627533, 0.8061666488647461, 0.7605833411216736, 0.784500002861023, 0.7133333086967468, 0.7668333053588867, 0.7538333535194397, 0.7534166574478149]


In [None]:
classifiers = [models[i].load_model('classifier'+str(i)) for i in range(len(models))]

In [None]:
classifiers

[<tensorflow.python.keras.engine.sequential.Sequential at 0x7fd8b27dc990>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7fd8b11be050>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7fd8b6301d10>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7fd8b11eaa50>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7fd8b4e8f650>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7fd8b2708a50>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7fd8afd83f10>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7fd8b1086410>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7fd8af221ed0>]

In [None]:
layer_name_list = [classifier.layers[-2:][0].name for classifier in classifiers]
layer_name_list

['dense_1',
 'dense_3',
 'dense_5',
 'dense_7',
 'dense_9',
 'dense_11',
 'dense_13',
 'dense_15',
 'dense_17']

In [None]:
from keras import Model
model_dict = {}

for i in range(len(models)):
  model_output_i = classifiers[i].get_layer(layer_name_list[i]).output
  m0 = Model(inputs=classifiers[i].input, outputs=model_output_i)
  model_dict[i] = m0

In [None]:
y_pred = []
for i,label  in enumerate(labels):
  val_imgs_wt_clas_i, val_lbls_wt_clas_i = remove_anomalous_class(val_imgs2,val_lbl2,val_req= False, anomalous_class = [label])
  m = model_dict[i]
  print(label,np.unique(val_lbls_wt_clas_i))
  # m.summary()
  y_p = m.predict(val_imgs_wt_clas_i)
  y_pred.append(y_p)

0 [1 3 4 5 6 7 8 9]
1 [0 3 4 5 6 7 8 9]
3 [0 1 4 5 6 7 8 9]
4 [0 1 3 5 6 7 8 9]
5 [0 1 3 4 6 7 8 9]
6 [0 1 3 4 5 7 8 9]
7 [0 1 3 4 5 6 8 9]
8 [0 1 3 4 5 6 7 9]
9 [0 1 3 4 5 6 7 8]


In [None]:
len(y_pred)

9

In [None]:
temp_val = []
for i,label in enumerate(labels):
  # if i == 8:
    val_imgs_wt_clas_i, val_lbls_wt_clas_i = remove_anomalous_class(val_imgs2,val_lbl2,val_req= False, anomalous_class = [label])
    print(np.unique(val_lbls_wt_clas_i),y_pred[i].shape)

    unique_lbls = np.unique(val_lbls_wt_clas_i)
    (val_lbls_wt_clas_i,val_lbls_wt_clas_i) = remap_labels(val_lbls_wt_clas_i,val_lbls_wt_clas_i,unique_lbls)
  
    temp = temp_cal(y_pred[i],val_lbls_wt_clas_i,len(unique_lbls))
    temp_val.append(temp)

[1 3 4 5 6 7 8 9] (12000, 8)
[1 3 4 5 6 7 8 9]
Temperature Initial value: 1.0
Temperature Final value: 2.1629583835601807
[0 3 4 5 6 7 8 9] (12000, 8)
[0 3 4 5 6 7 8 9]
Temperature Initial value: 1.0
Temperature Final value: 2.194932222366333
[0 1 4 5 6 7 8 9] (12000, 8)
[0 1 4 5 6 7 8 9]
Temperature Initial value: 1.0
Temperature Final value: 2.058931827545166
[0 1 3 5 6 7 8 9] (12000, 8)
[0 1 3 5 6 7 8 9]
Temperature Initial value: 1.0
Temperature Final value: 2.0847392082214355
[0 1 3 4 6 7 8 9] (12000, 8)
[0 1 3 4 6 7 8 9]
Temperature Initial value: 1.0
Temperature Final value: 2.1468346118927
[0 1 3 4 5 7 8 9] (12000, 8)
[0 1 3 4 5 7 8 9]
Temperature Initial value: 1.0
Temperature Final value: 2.2035582065582275
[0 1 3 4 5 6 8 9] (12000, 8)
[0 1 3 4 5 6 8 9]
Temperature Initial value: 1.0
Temperature Final value: 2.090282917022705
[0 1 3 4 5 6 7 9] (12000, 8)
[0 1 3 4 5 6 7 9]
Temperature Initial value: 1.0
Temperature Final value: 2.0590269565582275
[0 1 3 4 5 6 7 8] (12000, 8)
[

In [None]:
temp_val

[<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.1629584>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.1949322>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.0589318>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.0847392>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.1468346>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.2035582>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.090283>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.059027>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.0701303>]

In [None]:
temp_val = [2.1629584,2.1949322,2.0589318,2.0847392,2.1468346,2.2035582,2.090283,2.059027,2.0701303]

**Method 1**
---



In [None]:
# labels --> gives the non anomalous classes ex: here labels are 0 1 2 3 4 5 6 7 9

In [None]:
from tqdm import tqdm

entropy_in = 0
count_entr_in = 0
avg_in = [0,0,0,0,0,0,0,0,0]
count_in = [0,0,0,0,0,0,0,0,0]

entropy_out = 0
count_entr_out = 0
avg_out = [0,0,0,0,0,0,0,0,0]
count_out = [0,0,0,0,0,0,0,0,0]

for img, lbl in tqdm(zip(val_imgs2, val_lbl2)):
  img = img.reshape([-1, 32, 32, 3])

  for i,anmls_lbl in enumerate(labels):
    if not anmls_lbl == lbl:
      logits = model_dict[i](img)
      logits = tf.math.divide(logits, temp_val[i])
      pred = tf.nn.softmax(logits)
      max_val = np.max(pred)
      avg_in[i] = avg_in[i] + max_val
      count_in[i] = count_in[i] + 1

      entrpy = entropy(pred[0], base=len(pred[0]))
      entropy_in = entropy_in + entrpy
      count_entr_in = count_entr_in + 1 

    if anmls_lbl == lbl:
      logits = model_dict[i](img)
      logits = tf.math.divide(logits, temp_val[i])
      pred = tf.nn.softmax(logits)
      max_val = np.max(pred)
      avg_out[i] = avg_out[i] + max_val
      count_out[i] = count_out[i] + 1

      entrpy = entropy(pred[0], base=len(pred[0]))
      entropy_out = entropy_out + entrpy
      count_entr_out = count_entr_out + 1 


13500it [17:28, 12.87it/s]


In [None]:
classifier_avg_in = [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]
classifier_avg_out = [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]
for i in range(9):
  classifier_avg_in[i] = avg_in[i]/count_in[i]
  classifier_avg_out[i] = avg_out[i]/count_out[i]

treshold_value_in = 0.0
treshold_value_out = 0.0

for i in range(9):
  treshold_value_in = treshold_value_in + classifier_avg_in[i]
  treshold_value_out = treshold_value_out + classifier_avg_out[i]

treshold_value_in = treshold_value_in/len(classifier_avg_in)
treshold_value_out = treshold_value_out/len(classifier_avg_out)


In [None]:
print(classifier_avg_in)
print(classifier_avg_out)

print(treshold_value_in)
print(treshold_value_out)

entropy_ref_in_2 = entropy_in/count_entr_in
entropy_ref_out_2 = entropy_out/ count_entr_out

print(entropy_ref_in_2)
print(entropy_ref_out_2)

[0.7651020913943648, 0.7465623073515792, 0.7940352811714013, 0.7636011504183213, 0.7836516065374016, 0.7222282663484414, 0.7719387381449342, 0.7555050459342698, 0.7568711580000818]
[0.6354738906125228, 0.6920262730518977, 0.6391327868103981, 0.6238533188501993, 0.7351547516187033, 0.6568210136195024, 0.627321061750253, 0.7526426495114962, 0.7084719334244728]
0.7621661828111994
0.6745441865832718
0.3147462503491272
0.4216787373580428


In [None]:
open_file = open(os.path.join('reference_vector_in_out.pkl'), "wb")
pickle.dump((classifier_avg_in,classifier_avg_out,entropy_ref_in_2,entropy_ref_out_2), open_file)

In [None]:
open_file = open(os.path.join('reference_vector_in_out.pkl'), "rb")
ref_vector_in_2,ref_vector_out_2,entropy_ref_in_2,entropy_ref_out_2 = pickle.load(open_file)

In [None]:
max_sm_all_wt_2 = []
ood = 0
ind= 1
for data, label in tqdm(zip(test_imgs, test_lbls)):

  entr = 0
  img = data.reshape([-1, 32, 32, 3])
  prediction_sftmx = []
  prediction_lbl = []
  
  if label[0] == 2:
    lbl = ood
  else:
    lbl = ind
  
  for i,anmls_lbl in enumerate(labels):
    logits = model_dict[i](img)
    logits = tf.math.divide(logits, temp_val[i])
    pred = tf.nn.softmax(logits)

    prediction_sftmx.append(np.max(pred))

    pred_ind = tf.argmax(pred[0]).numpy()
    labels_removed_anmls = np.delete(labels,0)
    pred_lbl = labels_removed_anmls[pred_ind]   ### reverse mapping the actual label

    prediction_lbl.append(pred_lbl)

    entr = entr + entropy(pred[0], base=len(pred[0]))

  max_sm_all_wt_2.append([prediction_sftmx,prediction_lbl,label[0],lbl,entr/len(labels)])

10000it [13:28, 12.36it/s]


In [None]:
open_file = open(os.path.join('predictions_on_test.pkl'), "wb")
pickle.dump(max_sm_all_wt_2, open_file)

In [None]:
open_file = open(os.path.join('predictions_on_test.pkl'), "rb")
max_sm_all_wt_2 = pickle.load(open_file)

euclidean and entroy (sim score + euclidean id/od)

In [None]:
from scipy.spatial import distance

id = 1
ood = 0
sim_score_in =[]
sim_score_out = []
sim_score_entr_in =[]
sim_score_entr_out = []
sim_score_add = []
y_true = []
y_pred=[]
y_entr_pred=[]
y_true_ood = []

for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_2):

  #print(avg_entr, max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood)
  dist_in = distance.euclidean(max_pred_each_cls,ref_vector_in_2)
  dist_out = distance.euclidean(max_pred_each_cls,ref_vector_out_2)

  dist_entr_in = distance.euclidean(avg_entr,entropy_ref_in_2)
  dist_entr_out = distance.euclidean(avg_entr,entropy_ref_out_2)
  
  sim_in = 1 / (1 + dist_in)
  sim_out = 1/ (1 + dist_out)
  sim_add = sim_in + sim_out

  sim_entr_in = 1 / (1 + dist_entr_in)
  sim_entr_out = 1/ (1 + dist_entr_out)

  sim_score_in.append(sim_in)
  sim_score_out.append(sim_out)
  sim_score_entr_in.append(sim_entr_in)
  sim_score_entr_out.append(sim_entr_out)
  sim_score_add.append(sim_add)

  if sim_in > sim_out:
    prd = id 
  else:
    prd = ood 

  if sim_entr_in > sim_entr_out:
    prd_e = id
  else:
    prd_e = ood 
  
  if lbl_id_or_ood == 1:
    label_true_ood = 0
  elif lbl_id_or_ood ==0:
    label_true_ood = 1

  
  y_pred.append(prd)
  y_entr_pred.append(prd_e)
  y_true.append(lbl_id_or_ood)
  y_true_ood.append(label_true_ood)

100%|██████████| 10000/10000 [00:01<00:00, 6467.80it/s]


In [None]:
from sklearn.metrics import roc_auc_score  ##### weighted SVM
print('Auroc with prediction as ID or OOD  ',roc_auc_score(y_true, y_pred))
print('Auroc prediction with sim score to in distribution only  :',roc_auc_score(y_true,sim_score_in))
print('Auroc prediction with sim score to ood distribution only  :',roc_auc_score(y_true_ood,sim_score_out))

print('\nAuroc prediction with sim added score to for in and ood distribution only  :',roc_auc_score(y_true_ood,sim_score_add))

print('\nAuroc with prediction as ID or OOD Entropy ',roc_auc_score(y_true, y_entr_pred))
print('Auroc prediction with sim score to in distribution only Entropy  :',roc_auc_score(y_true,sim_score_entr_in))
print('Auroc prediction with sim score to out distribution only Entropy  :',roc_auc_score(y_true_ood,sim_score_entr_out))

Auroc with prediction as ID or OOD   0.7003888888888888
Auroc prediction with sim score to in distribution only  : 0.6860114444444445
Auroc prediction with sim score to ood distribution only  : 0.5822528888888888

Auroc prediction with sim added score to for in and ood distribution only  : 0.4381471111111111

Auroc with prediction as ID or OOD Entropy  0.7168888888888889
Auroc prediction with sim score to in distribution only Entropy  : 0.6481305555555557
Auroc prediction with sim score to out distribution only Entropy  : 0.5901391111111112


Decision Rule 

In [None]:
############################### simple decision rule , not taloinmg more than 7 or 8 just took majority prediction ##################

id = 1
ood = 0
y_true = []
y_pred=[]

for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_2):
  count = Counter(pred_lbl_each_cls)
  most_common= count.most_common(1)[0][0]

  if most_common == label_actual:
    pred = id
  else:
    pred = ood 
  y_pred.append(pred)
  y_true.append(lbl_id_or_ood)

print(' Auroc with simple decision rule about majority_:  ',roc_auc_score(y_true, y_pred))
  

100%|██████████| 10000/10000 [00:00<00:00, 186483.96it/s]

 Auroc with simple decision rule about majority_:   0.7400555555555556





In [None]:
id = 1
ood = 0

In [None]:
y_true_dec_rule = []
y_pred_mr_than8_2 = []
y_pred_mr_than7_2 = []
for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_2):
  values, counts = np.unique(pred_lbl_each_cls, return_counts=True)

  if np.max(counts) >= 7:
    y_pred_mr_than7_2.append(id)
  else:
    y_pred_mr_than7_2.append(ood)

  if np.max(counts) >= 8:
    y_pred_mr_than8_2.append(id)
  else:
    y_pred_mr_than8_2.append(ood)

  if label_actual == 2:
    y_true_dec_rule.append(ood)
  else:
    y_true_dec_rule.append(id)

print('\nAuroc for decision rule > 7 ',roc_auc_score(y_true_dec_rule,y_pred_mr_than7_2))
print('Auroc for decision rule > 8 ',roc_auc_score(y_true_dec_rule,y_pred_mr_than8_2))

100%|██████████| 10000/10000 [00:00<00:00, 25748.72it/s]



Auroc for decision rule > 7  0.6101111111111112
Auroc for decision rule > 8  0.5323888888888889


In [None]:
sim_score_in =[]
sim_score_entr_in =[]
y_true_ = []
y_pred_mr_than7_2 = []
y_pred_mr_than8_2 = []
y_entr_pred_mr_than7_2 = []
y_entr_pred_mr_than8_2 = []

for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_2):

  # print(avg_entr, max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood)
  dist_in = distance.euclidean(max_pred_each_cls,ref_vector_in_2)
  dist_out = distance.euclidean(max_pred_each_cls,ref_vector_out_2)

  dist_entr_in = distance.euclidean(avg_entr,entropy_ref_in_2)
  dist_entr_out = distance.euclidean(avg_entr,entropy_ref_out_2)
  
  sim_in = 1 / (1 + dist_in)
  sim_out = 1/ (1 + dist_out)

  sim_entr_in = 1 / (1 + dist_entr_in)
  sim_entr_out = 1/ (1 + dist_entr_out)

  sim_score_in.append(sim_in)
  sim_score_entr_in.append(sim_entr_in)

  values, counts = np.unique(pred_lbl_each_cls, return_counts=True)
#############################################
  if sim_in > sim_out:
    if  np.max(counts) >=7:
      y_pred_mr_than7_2.append(id)
    else:
      y_pred_mr_than7_2.append(ood)
  else:
    y_pred_mr_than7_2.append(ood)
###############################################
  if sim_in > sim_out:
    if  np.max(counts) >=8:
      y_pred_mr_than8_2.append(id)
    else:
      y_pred_mr_than8_2.append(ood)
  else:
    y_pred_mr_than8_2.append(ood)
##############################################
  if sim_entr_in > sim_entr_out:  
    if  np.max(counts) >=7:
      y_entr_pred_mr_than7_2.append(id)
    else:
      y_entr_pred_mr_than7_2.append(ood)
  else:
    y_entr_pred_mr_than7_2.append(ood )
###############################################
  if sim_entr_in > sim_entr_out:  
    if  np.max(counts) >=8:
      y_entr_pred_mr_than8_2.append(id)
    else:
      y_entr_pred_mr_than8_2.append(ood)
  else:
    y_entr_pred_mr_than8_2.append(ood )

  if label_actual == 2:
    y_true_.append(ood)
  else:
    y_true_.append(id)


100%|██████████| 10000/10000 [00:01<00:00, 7026.84it/s]


In [None]:
print('Decision Rule(>=7) + Reference Vector ',roc_auc_score(y_true_,y_pred_mr_than7_2))
print('Decision Rule(>=8) + Reference Vector ',roc_auc_score(y_true_,y_pred_mr_than8_2))
print('Decision Rule(>=7) + Entropy Reference Value ',roc_auc_score(y_true_,y_entr_pred_mr_than7_2))
print('Decision Rule(>=8) + Entropy Reference Value ',roc_auc_score(y_true_,y_entr_pred_mr_than8_2))

Decision Rule(>=7) + Reference Vector  0.6089444444444444
Decision Rule(>=8) + Reference Vector  0.5316111111111111
Decision Rule(>=7) + Entropy Reference Value  0.609111111111111
Decision Rule(>=8) + Entropy Reference Value  0.5326111111111111


**Method 2**
---



Normal weighted SVM




In [None]:
#########################  after appying temperature scaling and softmax  ###########################################
######################### the y_pred used is In + ood distribution for the ensemble classifier ############################
id = 1
ood = 0

new_dataset = []

for img, label in tqdm(zip(val_imgs2, val_lbl2)):
    img = img.reshape([-1,32,32,3])
    for i,lbl in enumerate(labels):
      if label == lbl:
        new_label = ood
      else :
        new_label = id

      m = model_dict[i]
      y_p = m.predict(img)
      temp_new_y_pred = temp_scaling(y_p,temp_val[i])
      new_softmax_y_pred = models[i].apply_softmax(temp_new_y_pred) 
      new_softmax_y_pred = np.array(new_softmax_y_pred).tolist()  
      new_softmax_y_pred = new_softmax_y_pred[0] 
      new_dataset.append([new_softmax_y_pred,new_label])
      

0it [00:00, ?it/s]



13500it [1:02:18,  3.61it/s]


In [None]:
# df = pd.DataFrame(new_dataset, columns=["predictions", "label"])
predictions = df['predictions'].to_numpy()
new_labels = df['label'].to_numpy()

In [None]:
df.to_pickle("predictions_dataset.pkl")

In [None]:
df = pd.read_pickle('predictions_dataset.pkl')
df

Unnamed: 0,predictions,label
0,"[0.005744569469243288, 0.5289509892463684, 0.0...",1
1,"[0.021808115765452385, 0.2936646640300751, 0.3...",1
2,"[0.01396484300494194, 0.033244773745536804, 0....",1
3,"[0.001782756531611085, 0.005255561787635088, 0...",1
4,"[3.571277920855209e-05, 0.0017310472903773189,...",0
...,...,...
121495,"[0.00010092231241287664, 3.296816430520266e-05...",1
121496,"[0.0007023076759651303, 0.000256382510997355, ...",1
121497,"[2.6772531782626174e-05, 5.97183607169427e-05,...",1
121498,"[0.0008424821426160634, 5.2488558139884844e-05...",1


In [None]:
from collections import Counter
Counter(new_labels)

Counter({0: 13500, 1: 108000})

In [None]:
in_data = []
out_data = []

for i in range(len(predictions)):
  if new_labels[i]== 1:
    in_data.append([predictions[i],new_labels[i]])
  else:
    out_data.append([predictions[i],new_labels[i]])

print(len(in_data),len(out_data))

# import random 
# in_data = random.sample(in_data,(len(out_data)))
# print(len(in_data))

108000 13500


In [None]:
in_data[1]

[[0.021808115765452385,
  0.2936646640300751,
  0.30976080894470215,
  0.035158995538949966,
  0.12291385978460312,
  0.07378963381052017,
  0.06677697598934174,
  0.07612696290016174],
 1]

In [None]:
input = []
labels = []
for idata in in_data:
  input.append(idata[0])
  labels.append(idata[1])
  

for odata in out_data:
  input.append(odata[0])
  labels.append(odata[1])
# print(input,labels)
print(len(input),len(labels))

121500 121500


In [None]:
svm_train_ip, svm_test_ip, svm_train_lb, svm_test_lb = train_test_split(input, labels, test_size=0.20,stratify = labels)
print(len(svm_train_ip),len(svm_test_ip),len(svm_train_lb))

97200 24300 97200


In [None]:
from sklearn.svm import SVC
model = SVC(gamma='scale')

In [None]:
from sklearn.utils.class_weight import compute_class_weight
class_wts = compute_class_weight('balanced', np.unique(svm_train_lb), svm_train_lb)
class_wts ={0:class_wts[0], 1:class_wts[1]}
class_wts

{0: 4.5, 1: 0.5625}

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
clf = make_pipeline(StandardScaler(), SVC(gamma='auto',class_weight=class_wts))
clf.fit(svm_train_ip, svm_train_lb)

Pipeline(memory=None,
         steps=[('standardscaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('svc',
                 SVC(C=1.0, break_ties=False, cache_size=200,
                     class_weight={0: 4.5, 1: 0.5625}, coef0=0.0,
                     decision_function_shape='ovr', degree=3, gamma='auto',
                     kernel='rbf', max_iter=-1, probability=False,
                     random_state=None, shrinking=True, tol=0.001,
                     verbose=False))],
         verbose=False)

In [None]:
pred = clf.predict(svm_test_ip)
len(pred) ### svm fitted on whole data with weights

24300

In [None]:
clf.score(svm_test_ip,svm_test_lb) 

0.649753086419753

In [None]:
y_true = []
y_pred = []
for img, label in tqdm(zip(test_imgs, test_lbls)):
    img= img.reshape([-1,32,32,3])
    
    svm_output =[]
    for i in range(0,9):
      m = model_dict[i]
      y_p = m.predict(img)
      temp_new_y_pred = temp_scaling(y_p,temp_val[i])
      new_softmax_y_pred = models[i].apply_softmax(temp_new_y_pred) 
      new_softmax_y_pred = np.array(new_softmax_y_pred).tolist()  
      new_softmax_y_pred = np.array(new_softmax_y_pred[0]).reshape([1,-1])
      # print(new_softmax_y_pred)
      a = clf.predict(new_softmax_y_pred)
      # print(a)

      svm_output.append(a[0])
    # print(svm_output)
 
    a = Counter(list(svm_output))
    b = a.most_common()[0][0]
    # print(b)
    y_pred.append(b)
    if not label ==2:
      y_true.append(1)
    else:
      y_true.append(0)

0it [00:00, ?it/s]



10000it [49:52,  3.34it/s]


In [None]:
print(y_pred)
print(y_true)

[0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 

In [None]:
print(Counter(y_pred))
print(Counter(y_true))

Counter({1: 5977, 0: 4023})
Counter({1: 9000, 0: 1000})


In [None]:
from sklearn.metrics import roc_auc_score  ##### weighted SVM
roc_auc_score(y_true, y_pred)

0.6848333333333334

In [None]:
df1= pd.DataFrame(y_pred, columns=["y_pred"])
df2= pd.DataFrame(y_true, columns=["y_true"])
df = pd.concat([df1, df2], axis=1)
df

Unnamed: 0,y_pred,y_true
0,0,1
1,1,1
2,0,1
3,1,1
4,1,1
...,...,...
9995,1,1
9996,0,1
9997,1,1
9998,0,1


In [None]:
df.to_pickle("final_predictions.pkl")
df = pd.read_pickle('final_predictions.pkl')

In [None]:
y_true = df['y_true'].to_numpy().tolist()
y_pred = df['y_pred'].to_numpy().tolist()
print(y_true)
print(y_pred)

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 

# 1 as anomaly

In [None]:
import os
print(os.getcwd())
os.chdir("/content/gdrive/My Drive/Colab Notebooks/Method 2/CIFAR/Leave one class out/leave 1/")
# os.chdir("/content/gdrive/My Drive/leave 1/")
print(os.listdir())

/content/gdrive/My Drive/Colab Notebooks/Method 2/CIFAR/Leave one class out/leave 1
['reference_vector_in_out.pkl', 'predictions_on_test.pkl']


In [None]:
train_imgs1, val_imgs1 , train_lbl1, val_lbl1 = remove_anomalous_class(train_imgs,train_lbls,val_req=True,anomalous_class=[1])
print(train_imgs1.shape, val_imgs1.shape , train_lbl1.shape, val_lbl1.shape)
open_file = open(os.path.join('Training_data.pkl'), "wb")
pickle.dump((train_imgs1, val_imgs1 , train_lbl1, val_lbl1 ), open_file)

(31500, 32, 32, 3) (13500, 32, 32, 3) (31500, 1) (13500, 1)


In [None]:
open_file = open(os.path.join('Training_data.pkl'), "rb")
(train_imgs1, val_imgs1 , train_lbl1, val_lbl1 )= pickle.load(open_file)
print(train_imgs1.shape, val_imgs1.shape , train_lbl1.shape, val_lbl1.shape)

(31500, 32, 32, 3) (13500, 32, 32, 3) (31500, 1) (13500, 1)


In [None]:
print( np.unique(train_lbl1),len(np.unique(train_lbl1)))

[0 2 3 4 5 6 7 8 9] 9


In [None]:
models = []
for i in range(len(np.unique(train_lbl1))):
  models.append(Model(name='1anomaly:classifier'+str(i)))

names = [m.name for m in models]
names

['1anomaly:classifier0',
 '1anomaly:classifier1',
 '1anomaly:classifier2',
 '1anomaly:classifier3',
 '1anomaly:classifier4',
 '1anomaly:classifier5',
 '1anomaly:classifier6',
 '1anomaly:classifier7',
 '1anomaly:classifier8']

In [None]:
def remap_labels(train_lbl,val_lbl,unique_lbl):
  print(unique_lbl)
  index_map = {unique_lbl[0]:0, unique_lbl[1]:1,unique_lbl[2]:2,unique_lbl[3]:3,unique_lbl[4]:4,unique_lbl[5]:5,unique_lbl[6]:6,unique_lbl[7]:7}
  train_lbl = np.array([index_map[int(x)] for x in train_lbl ])
  val_lbl = np.array([index_map[int(x)] for x in val_lbl ])

  return train_lbl,val_lbl

In [None]:
labels = np.unique(train_lbl1)
labels

array([0, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int32)

In [None]:
for indx,i in enumerate(labels):
  # if i == 9:
      
    train_imgs_wt_clas_i, train_lbls_wt_clas_i = remove_anomalous_class(train_imgs1,train_lbl1,val_req= False, anomalous_class = [i])
    val_imgs_wt_clas_i, val_lbls__wt_clas_i = remove_anomalous_class(val_imgs1,val_lbl1,val_req= False, anomalous_class = [i])
    unique_lbls = np.unique(train_lbls_wt_clas_i)
    print('before remapping',np.unique(train_lbls_wt_clas_i),np.unique(val_lbls__wt_clas_i))
    (train_lbls_wt_clas_i,val_lbls__wt_clas_i) = remap_labels(train_lbls_wt_clas_i,val_lbls__wt_clas_i,unique_lbls)
    print(np.unique(train_lbls_wt_clas_i),np.unique(val_lbls__wt_clas_i))
    models[indx].build_model(8)
    models[indx].train(train_imgs_wt_clas_i, train_lbls_wt_clas_i, val_imgs_wt_clas_i, val_lbls__wt_clas_i)
    models[indx].save_model('classifier'+str(indx))

In [None]:
train_loss = []
train_acc = []
val_loss = []
val_acc = []

for indx,i in enumerate(labels):
  # if i == 9:
      
    train_imgs_wt_clas_i, train_lbls_wt_clas_i = remove_anomalous_class(train_imgs1,train_lbl1,val_req= False, anomalous_class = [i])
    val_imgs_wt_clas_i, val_lbls__wt_clas_i = remove_anomalous_class(val_imgs1,val_lbl1,val_req= False, anomalous_class = [i])
    unique_lbls = np.unique(train_lbls_wt_clas_i)
    (train_lbls_wt_clas_i,val_lbls__wt_clas_i) = remap_labels(train_lbls_wt_clas_i,val_lbls__wt_clas_i,unique_lbls)
    # print(np.unique(train_lbls_wt_clas_i),np.unique(val_lbls__wt_clas_i))
    model = models[indx].load_model('classifier'+str(indx))
    train_ = model.evaluate(train_imgs_wt_clas_i, train_lbls_wt_clas_i)
    val_ = model.evaluate(val_imgs_wt_clas_i, val_lbls__wt_clas_i)
    train_loss.append(train_[0])
    train_acc.append(train_[1])
    val_loss.append(val_[0])
    val_acc.append(val_[1])

[2 3 4 5 6 7 8 9]
[0 3 4 5 6 7 8 9]
[0 2 4 5 6 7 8 9]
[0 2 3 5 6 7 8 9]
[0 2 3 4 6 7 8 9]
[0 2 3 4 5 7 8 9]
[0 2 3 4 5 6 8 9]
[0 2 3 4 5 6 7 9]
[0 2 3 4 5 6 7 8]


In [None]:
print('training accuracy average :',np.average(train_acc))
print('validation accuracy average :',np.average(val_acc))

training accuracy average : 0.8946269883049859
validation accuracy average : 0.7434814771016439


In [None]:
print(train_acc)
print(val_acc)

[0.9001071453094482, 0.9038571715354919, 0.9236785769462585, 0.9003571271896362, 0.9088571667671204, 0.9160714149475098, 0.9018571376800537, 0.8357499837875366, 0.8611071705818176]
[0.7425833344459534, 0.7494999766349792, 0.7873333096504211, 0.753166675567627, 0.7649999856948853, 0.7593333125114441, 0.734333336353302, 0.6930000185966492, 0.7070833444595337]


In [None]:
classifiers = [models[i].load_model('classifier'+str(i)) for i in range(len(models))]

In [None]:
classifiers

In [None]:
layer_name_list = [classifier.layers[-2:][0].name for classifier in classifiers]
layer_name_list

In [None]:
from keras import Model
model_dict = {}

for i in range(len(models)):
  model_output_i = classifiers[i].get_layer(layer_name_list[i]).output
  m0 = Model(inputs=classifiers[i].input, outputs=model_output_i)
  model_dict[i] = m0

In [None]:
y_pred = []
for i,label  in enumerate(labels):
  val_imgs_wt_clas_i, val_lbls_wt_clas_i = remove_anomalous_class(val_imgs1,val_lbl1,val_req= False, anomalous_class = [label])
  m = model_dict[i]
  print(label,np.unique(val_lbls_wt_clas_i))
  # m.summary()
  y_p = m.predict(val_imgs_wt_clas_i)
  y_pred.append(y_p)

0 [2 3 4 5 6 7 8 9]
2 [0 3 4 5 6 7 8 9]
3 [0 2 4 5 6 7 8 9]
4 [0 2 3 5 6 7 8 9]
5 [0 2 3 4 6 7 8 9]
6 [0 2 3 4 5 7 8 9]
7 [0 2 3 4 5 6 8 9]
8 [0 2 3 4 5 6 7 9]
9 [0 2 3 4 5 6 7 8]


In [None]:
len(y_pred)

9

In [None]:
temp_val = []
for i,label in enumerate(labels):
  # if i == 8:
    val_imgs_wt_clas_i, val_lbls_wt_clas_i = remove_anomalous_class(val_imgs1,val_lbl1,val_req= False, anomalous_class = [label])
    print(np.unique(val_lbls_wt_clas_i),y_pred[i].shape)

    unique_lbls = np.unique(val_lbls_wt_clas_i)
    (val_lbls_wt_clas_i,val_lbls_wt_clas_i) = remap_labels(val_lbls_wt_clas_i,val_lbls_wt_clas_i,unique_lbls)
  
    temp = temp_cal(y_pred[i],val_lbls_wt_clas_i,len(unique_lbls))
    temp_val.append(temp)

In [None]:
temp_val

[<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.103368>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.070417>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=1.9260864>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.0121164>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=1.9074483>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.0800858>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.1147597>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.238107>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=1.9419119>]

In [None]:
temp_val = [2.103368,2.070417,1.9260864,2.0121164,1.9074483,2.0800858,2.1147597,2.238107,1.9419119]

**Method 1**
---



In [None]:
# labels --> gives the non anomalous classes ex: here labels are 0 1 2 3 4 5 6 7 9

In [None]:
from tqdm import tqdm

entropy_in = 0
count_entr_in = 0
avg_in = [0,0,0,0,0,0,0,0,0]
count_in = [0,0,0,0,0,0,0,0,0]

entropy_out = 0
count_entr_out = 0
avg_out = [0,0,0,0,0,0,0,0,0]
count_out = [0,0,0,0,0,0,0,0,0]

for img, lbl in tqdm(zip(val_imgs1, val_lbl1)):
  img = img.reshape([-1, 32, 32, 3])

  for i,anmls_lbl in enumerate(labels):
    if not anmls_lbl == lbl:
      logits = model_dict[i](img)
      logits = tf.math.divide(logits, temp_val[i])
      pred = tf.nn.softmax(logits)
      max_val = np.max(pred)
      avg_in[i] = avg_in[i] + max_val
      count_in[i] = count_in[i] + 1

      entrpy = entropy(pred[0], base=len(pred[0]))
      entropy_in = entropy_in + entrpy
      count_entr_in = count_entr_in + 1 

    if anmls_lbl == lbl:
      logits = model_dict[i](img)
      logits = tf.math.divide(logits, temp_val[i])
      pred = tf.nn.softmax(logits)
      max_val = np.max(pred)
      avg_out[i] = avg_out[i] + max_val
      count_out[i] = count_out[i] + 1

      entrpy = entropy(pred[0], base=len(pred[0]))
      entropy_out = entropy_out + entrpy
      count_entr_out = count_entr_out + 1 


13500it [18:05, 12.43it/s]


In [None]:
classifier_avg_in = [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]
classifier_avg_out = [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]
for i in range(9):
  classifier_avg_in[i] = avg_in[i]/count_in[i]
  classifier_avg_out[i] = avg_out[i]/count_out[i]

treshold_value_in = 0.0
treshold_value_out = 0.0

for i in range(9):
  treshold_value_in = treshold_value_in + classifier_avg_in[i]
  treshold_value_out = treshold_value_out + classifier_avg_out[i]

treshold_value_in = treshold_value_in/len(classifier_avg_in)
treshold_value_out = treshold_value_out/len(classifier_avg_out)


In [None]:
print(classifier_avg_in)
print(classifier_avg_out)

print(treshold_value_in)
print(treshold_value_out)

entropy_ref_in_1 = entropy_in/count_entr_in
entropy_ref_out_1 = entropy_out/ count_entr_out

print(entropy_ref_in_1)
print(entropy_ref_out_1)

[0.7400516476569077, 0.749979900645713, 0.784873060942938, 0.7528750002558032, 0.75979439342767, 0.7540563285549482, 0.7382084313842158, 0.7146394664570689, 0.7063492819120487]
[0.6708516939878464, 0.553871576766173, 0.6163456805149714, 0.5613724065522353, 0.6399187765816847, 0.6055145018001398, 0.6509682759642601, 0.6806689092417558, 0.6199357890387376]
0.7445363901374793
0.6221608456053115
0.34301294911797536
0.4924766236319091


In [None]:
open_file = open(os.path.join('reference_vector_in_out.pkl'), "wb")
pickle.dump((classifier_avg_in,classifier_avg_out,entropy_ref_in_1,entropy_ref_out_1), open_file)

In [None]:
open_file = open(os.path.join('reference_vector_in_out.pkl'), "rb")
ref_vector_in_1,ref_vector_out_1,entropy_ref_in_1,entropy_ref_out_1 = pickle.load(open_file)

In [None]:
max_sm_all_wt_1 = []
ood = 0
ind= 1
for data, label in tqdm(zip(test_imgs, test_lbls)):

  entr = 0
  img = data.reshape([-1, 32, 32, 3])
  prediction_sftmx = []
  prediction_lbl = []
  
  if label[0] == 1:
    lbl = ood
  else:
    lbl = ind
  
  for i,anmls_lbl in enumerate(labels):
    logits = model_dict[i](img)
    logits = tf.math.divide(logits, temp_val[i])
    pred = tf.nn.softmax(logits)

    prediction_sftmx.append(np.max(pred))

    pred_ind = tf.argmax(pred[0]).numpy()
    labels_removed_anmls = np.delete(labels,0)
    pred_lbl = labels_removed_anmls[pred_ind]   ### reverse mapping the actual label

    prediction_lbl.append(pred_lbl)

    entr = entr + entropy(pred[0], base=len(pred[0]))

  max_sm_all_wt_1.append([prediction_sftmx,prediction_lbl,label[0],lbl,entr/len(labels)])

10000it [13:58, 11.93it/s]


In [None]:
open_file = open(os.path.join('predictions_on_test.pkl'), "wb")
pickle.dump(max_sm_all_wt_1, open_file)

In [None]:
open_file = open(os.path.join('predictions_on_test.pkl'), "rb")
max_sm_all_wt_1 = pickle.load(open_file)

euclidean and entroy (sim score + euclidean id/od)

In [None]:
from scipy.spatial import distance

id = 1
ood = 0
sim_score_in =[]
sim_score_out = []
sim_score_entr_in =[]
sim_score_entr_out = []
sim_score_add = []
y_true = []
y_pred=[]
y_entr_pred=[]
y_true_ood = []

for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_1):

  #print(avg_entr, max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood)
  dist_in = distance.euclidean(max_pred_each_cls,ref_vector_in_1)
  dist_out = distance.euclidean(max_pred_each_cls,ref_vector_out_1)

  dist_entr_in = distance.euclidean(avg_entr,entropy_ref_in_1)
  dist_entr_out = distance.euclidean(avg_entr,entropy_ref_out_1)
  
  sim_in = 1 / (1 + dist_in)
  sim_out = 1/ (1 + dist_out)
  sim_add = sim_in + sim_out

  sim_entr_in = 1 / (1 + dist_entr_in)
  sim_entr_out = 1/ (1 + dist_entr_out)

  sim_score_in.append(sim_in)
  sim_score_out.append(sim_out)
  sim_score_entr_in.append(sim_entr_in)
  sim_score_entr_out.append(sim_entr_out)
  sim_score_add.append(sim_add)

  if sim_in > sim_out:
    prd = id 
  else:
    prd = ood 

  if sim_entr_in > sim_entr_out:
    prd_e = id
  else:
    prd_e = ood 
  
  if lbl_id_or_ood == 1:
    label_true_ood = 0
  elif lbl_id_or_ood ==0:
    label_true_ood = 1

  
  y_pred.append(prd)
  y_entr_pred.append(prd_e)
  y_true.append(lbl_id_or_ood)
  y_true_ood.append(label_true_ood)

100%|██████████| 10000/10000 [00:01<00:00, 6791.55it/s]


In [None]:
from sklearn.metrics import roc_auc_score  ##### weighted SVM
print('Auroc with prediction as ID or OOD  ',roc_auc_score(y_true, y_pred))
print('Auroc prediction with sim score to in distribution only  :',roc_auc_score(y_true,sim_score_in))
print('Auroc prediction with sim score to ood distribution only  :',roc_auc_score(y_true_ood,sim_score_out))

print('\nAuroc prediction with sim added score to for in and ood distribution only  :',roc_auc_score(y_true_ood,sim_score_add))

print('\nAuroc with prediction as ID or OOD Entropy ',roc_auc_score(y_true, y_entr_pred))
print('Auroc prediction with sim score to in distribution only Entropy  :',roc_auc_score(y_true,sim_score_entr_in))
print('Auroc prediction with sim score to out distribution only Entropy  :',roc_auc_score(y_true_ood,sim_score_entr_out))

Auroc with prediction as ID or OOD   0.48661111111111105
Auroc prediction with sim score to in distribution only  : 0.47037777777777784
Auroc prediction with sim score to ood distribution only  : 0.499166

Auroc prediction with sim added score to for in and ood distribution only  : 0.5164263333333334

Auroc with prediction as ID or OOD Entropy  0.46338888888888885
Auroc prediction with sim score to in distribution only Entropy  : 0.4036217777777778
Auroc prediction with sim score to out distribution only Entropy  : 0.5162133333333333


Decision Rule 

In [None]:
############################### simple decision rule , not taloinmg more than 7 or 8 just took majority prediction ##################

id = 1
ood = 0
y_true = []
y_pred=[]

for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_1):
  count = Counter(pred_lbl_each_cls)
  most_common= count.most_common(1)[0][0]

  if most_common == label_actual:
    pred = id
  else:
    pred = ood 
  y_pred.append(pred)
  y_true.append(lbl_id_or_ood)

print(' Auroc with simple decision rule about majority_:  ',roc_auc_score(y_true, y_pred))
  

100%|██████████| 10000/10000 [00:00<00:00, 183822.83it/s]

 Auroc with simple decision rule about majority_:   0.7402222222222222





In [None]:
id = 1
ood = 0

In [None]:
y_true_dec_rule = []
y_pred_mr_than8_1 = []
y_pred_mr_than7_1 = []
for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_1):
  values, counts = np.unique(pred_lbl_each_cls, return_counts=True)

  if np.max(counts) >= 7:
    y_pred_mr_than7_1.append(id)
  else:
    y_pred_mr_than7_1.append(ood)

  if np.max(counts) >= 8:
    y_pred_mr_than8_1.append(id)
  else:
    y_pred_mr_than8_1.append(ood)

  if label_actual == 1:
    y_true_dec_rule.append(ood)
  else:
    y_true_dec_rule.append(id)

print('\nAuroc for decision rule > 7 ',roc_auc_score(y_true_dec_rule,y_pred_mr_than7_1))
print('Auroc for decision rule > 8 ',roc_auc_score(y_true_dec_rule,y_pred_mr_than8_1))

100%|██████████| 10000/10000 [00:00<00:00, 24811.20it/s]


Auroc for decision rule > 7  0.4351666666666667
Auroc for decision rule > 8  0.43516666666666665





In [None]:
sim_score_in =[]
sim_score_entr_in =[]
y_true_ = []
y_pred_mr_than7_1 = []
y_pred_mr_than8_1 = []
y_entr_pred_mr_than7_1 = []
y_entr_pred_mr_than8_1 = []

for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_1):

  # print(avg_entr, max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood)
  dist_in = distance.euclidean(max_pred_each_cls,ref_vector_in_1)
  dist_out = distance.euclidean(max_pred_each_cls,ref_vector_out_1)

  dist_entr_in = distance.euclidean(avg_entr,entropy_ref_in_1)
  dist_entr_out = distance.euclidean(avg_entr,entropy_ref_out_1)
  
  sim_in = 1 / (1 + dist_in)
  sim_out = 1/ (1 + dist_out)

  sim_entr_in = 1 / (1 + dist_entr_in)
  sim_entr_out = 1/ (1 + dist_entr_out)

  sim_score_in.append(sim_in)
  sim_score_entr_in.append(sim_entr_in)

  values, counts = np.unique(pred_lbl_each_cls, return_counts=True)
#############################################
  if sim_in > sim_out:
    if  np.max(counts) >=7:
      y_pred_mr_than7_1.append(id)
    else:
      y_pred_mr_than7_1.append(ood)
  else:
    y_pred_mr_than7_1.append(ood)
###############################################
  if sim_in > sim_out:
    if  np.max(counts) >=8:
      y_pred_mr_than8_1.append(id)
    else:
      y_pred_mr_than8_1.append(ood)
  else:
    y_pred_mr_than8_1.append(ood)
##############################################
  if sim_entr_in > sim_entr_out:  
    if  np.max(counts) >=7:
      y_entr_pred_mr_than7_1.append(id)
    else:
      y_entr_pred_mr_than7_1.append(ood)
  else:
    y_entr_pred_mr_than7_1.append(ood )
###############################################
  if sim_entr_in > sim_entr_out:  
    if  np.max(counts) >=8:
      y_entr_pred_mr_than8_1.append(id)
    else:
      y_entr_pred_mr_than8_1.append(ood)
  else:
    y_entr_pred_mr_than8_1.append(ood )

  if label_actual == 1:
    y_true_.append(ood)
  else:
    y_true_.append(id)


100%|██████████| 10000/10000 [00:01<00:00, 8493.12it/s]


In [None]:
print('Decision Rule(>=7) + Reference Vector ',roc_auc_score(y_true_,y_pred_mr_than7_1))
print('Decision Rule(>=8) + Reference Vector ',roc_auc_score(y_true_,y_pred_mr_than8_1))
print('Decision Rule(>=7) + Entropy Reference Value ',roc_auc_score(y_true_,y_entr_pred_mr_than7_1))
print('Decision Rule(>=8) + Entropy Reference Value ',roc_auc_score(y_true_,y_entr_pred_mr_than8_1))

Decision Rule(>=7) + Reference Vector  0.4371111111111111
Decision Rule(>=8) + Reference Vector  0.43505555555555553
Decision Rule(>=7) + Entropy Reference Value  0.4344444444444444
Decision Rule(>=8) + Entropy Reference Value  0.43466666666666665


**Method 2**
---



Normal weighted SVM




In [None]:
#########################  after appying temperature scaling and softmax  ###########################################
######################### the y_pred used is In + ood distribution for the ensemble classifier ############################
id = 1
ood = 0

new_dataset = []

for img, label in tqdm(zip(val_imgs1, val_lbl1)):
    img = img.reshape([-1,32,32,3])
    for i,lbl in enumerate(labels):
      if label == lbl:
        new_label = ood
      else :
        new_label = id

      m = model_dict[i]
      y_p = m.predict(img)
      temp_new_y_pred = temp_scaling(y_p,temp_val[i])
      new_softmax_y_pred = models[i].apply_softmax(temp_new_y_pred) 
      new_softmax_y_pred = np.array(new_softmax_y_pred).tolist()  
      new_softmax_y_pred = new_softmax_y_pred[0] 
      new_dataset.append([new_softmax_y_pred,new_label])
      

0it [00:00, ?it/s]



13500it [1:04:39,  3.48it/s]


In [None]:
df = pd.DataFrame(new_dataset, columns=["predictions", "label"])
predictions = df['predictions'].to_numpy()
new_labels = df['label'].to_numpy()

In [None]:
df.to_pickle("predictions_dataset.pkl")

In [None]:
df = pd.read_pickle('predictions_dataset.pkl')
df

Unnamed: 0,predictions,label
0,"[0.0006968142115511, 0.0016072661383077502, 6....",1
1,"[0.1779852658510208, 0.05228806287050247, 0.00...",1
2,"[0.3944475054740906, 0.027863575145602226, 0.0...",1
3,"[0.22997534275054932, 0.0039938073605299, 0.00...",1
4,"[0.007046738173812628, 0.0001994134217966348, ...",1
...,...,...
121495,"[0.00494624488055706, 0.2894105017185211, 0.00...",1
121496,"[0.003907484468072653, 0.5780871510505676, 0.0...",1
121497,"[0.003526237327605486, 0.360741525888443, 0.00...",1
121498,"[0.010601880960166454, 0.7933622598648071, 0.0...",1


In [None]:
from collections import Counter
Counter(new_labels)

Counter({0: 13500, 1: 108000})

In [None]:
in_data = []
out_data = []

for i in range(len(predictions)):
  if new_labels[i]== 1:
    in_data.append([predictions[i],new_labels[i]])
  else:
    out_data.append([predictions[i],new_labels[i]])

print(len(in_data),len(out_data))

# import random 
# in_data = random.sample(in_data,(len(out_data)))
# print(len(in_data))

108000 13500


In [None]:
in_data[1]

[[0.1779852658510208,
  0.05228806287050247,
  0.0022531861905008554,
  0.02192901261150837,
  0.001126228366047144,
  0.02660069800913334,
  0.6810447573661804,
  0.03677280247211456],
 1]

In [None]:
input = []
labels = []
for idata in in_data:
  input.append(idata[0])
  labels.append(idata[1])
  

for odata in out_data:
  input.append(odata[0])
  labels.append(odata[1])
# print(input,labels)
print(len(input),len(labels))

121500 121500


In [None]:
svm_train_ip, svm_test_ip, svm_train_lb, svm_test_lb = train_test_split(input, labels, test_size=0.20,stratify = labels)
print(len(svm_train_ip),len(svm_test_ip),len(svm_train_lb))

97200 24300 97200


In [None]:
from sklearn.svm import SVC
model = SVC(gamma='scale')

In [None]:
from sklearn.utils.class_weight import compute_class_weight
class_wts = compute_class_weight('balanced', np.unique(svm_train_lb), svm_train_lb)
class_wts ={0:class_wts[0], 1:class_wts[1]}
class_wts

{0: 4.5, 1: 0.5625}

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
clf = make_pipeline(StandardScaler(), SVC(gamma='auto',class_weight=class_wts))
clf.fit(svm_train_ip, svm_train_lb)

Pipeline(memory=None,
         steps=[('standardscaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('svc',
                 SVC(C=1.0, break_ties=False, cache_size=200,
                     class_weight={0: 4.5, 1: 0.5625}, coef0=0.0,
                     decision_function_shape='ovr', degree=3, gamma='auto',
                     kernel='rbf', max_iter=-1, probability=False,
                     random_state=None, shrinking=True, tol=0.001,
                     verbose=False))],
         verbose=False)

In [None]:
pred = clf.predict(svm_test_ip)
len(pred) ### svm fitted on whole data with weights

24300

In [None]:
clf.score(svm_test_ip,svm_test_lb) 

0.561604938271605

In [None]:
y_true = []
y_pred = []
for img, label in tqdm(zip(test_imgs, test_lbls)):
    img= img.reshape([-1,32,32,3])
    
    svm_output =[]
    for i in range(0,9):
      m = model_dict[i]
      y_p = m.predict(img)
      temp_new_y_pred = temp_scaling(y_p,temp_val[i])
      new_softmax_y_pred = models[i].apply_softmax(temp_new_y_pred) 
      new_softmax_y_pred = np.array(new_softmax_y_pred).tolist()  
      new_softmax_y_pred = np.array(new_softmax_y_pred[0]).reshape([1,-1])
      # print(new_softmax_y_pred)
      a = clf.predict(new_softmax_y_pred)
      # print(a)

      svm_output.append(a[0])
    # print(svm_output)
 
    a = Counter(list(svm_output))
    b = a.most_common()[0][0]
    # print(b)
    y_pred.append(b)
    if not label ==1:
      y_true.append(1)
    else:
      y_true.append(0)

In [None]:
print(y_pred)
print(y_true)

[0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 

In [None]:
print(Counter(y_pred))
print(Counter(y_true))

Counter({0: 5175, 1: 4825})
Counter({1: 9000, 0: 1000})


In [None]:
from sklearn.metrics import roc_auc_score  ##### weighted SVM
roc_auc_score(y_true, y_pred)

0.4763888888888889

In [None]:
df1= pd.DataFrame(y_pred, columns=["y_pred"])
df2= pd.DataFrame(y_true, columns=["y_true"])
df = pd.concat([df1, df2], axis=1)
df

In [None]:
df.to_pickle("final_predictions.pkl")
df = pd.read_pickle('final_predictions.pkl')

In [None]:
y_true = df['y_true'].to_numpy().tolist()
y_pred = df['y_pred'].to_numpy().tolist()
print(y_true)
print(y_pred)

[1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 

# 0 as anomaly

In [None]:
import os
print(os.getcwd())
os.chdir("/content/gdrive/My Drive/Colab Notebooks/Method 2/CIFAR/Leave one class out/leave 0/")
# os.chdir("/content/gdrive/My Drive/leave 0/")
print(os.listdir())

/content/gdrive/My Drive/Colab Notebooks/Method 2/CIFAR/Leave one class out/leave 0
['predictions_on_test.pkl', 'reference_vector_in_out.pkl']


In [None]:
train_imgs0, val_imgs0 , train_lbl0, val_lbl0 = remove_anomalous_class(train_imgs,train_lbls,val_req=True,anomalous_class=[0])
print(train_imgs0.shape, val_imgs0.shape , train_lbl1.shape, val_lbl1.shape)
open_file = open(os.path.join('Training_data.pkl'), "wb")
pickle.dump((train_imgs0, val_imgs0 , train_lbl0, val_lbl0 ), open_file)

(31500, 32, 32, 3) (13500, 32, 32, 3) (31500, 1) (13500, 1)


In [None]:
open_file = open(os.path.join('Training_data.pkl'), "rb")
(train_imgs0, val_imgs0 , train_lbl0, val_lbl0 )= pickle.load(open_file)
print(train_imgs0.shape, val_imgs0.shape , train_lbl0.shape, val_lbl0.shape)

(31500, 32, 32, 3) (13500, 32, 32, 3) (31500, 1) (13500, 1)


In [None]:
print( np.unique(train_lbl0),len(np.unique(train_lbl0)))

[1 2 3 4 5 6 7 8 9] 9


In [None]:
models = []
for i in range(len(np.unique(train_lbl0))):
  models.append(Model(name='0anomaly:classifier'+str(i)))

names = [m.name for m in models]
names

['0anomaly:classifier0',
 '0anomaly:classifier1',
 '0anomaly:classifier2',
 '0anomaly:classifier3',
 '0anomaly:classifier4',
 '0anomaly:classifier5',
 '0anomaly:classifier6',
 '0anomaly:classifier7',
 '0anomaly:classifier8']

In [None]:
def remap_labels(train_lbl,val_lbl,unique_lbl):
  print(unique_lbl)
  index_map = {unique_lbl[0]:0, unique_lbl[1]:1,unique_lbl[2]:2,unique_lbl[3]:3,unique_lbl[4]:4,unique_lbl[5]:5,unique_lbl[6]:6,unique_lbl[7]:7}
  train_lbl = np.array([index_map[int(x)] for x in train_lbl ])
  val_lbl = np.array([index_map[int(x)] for x in val_lbl ])

  return train_lbl,val_lbl

In [None]:
labels = np.unique(train_lbl0)
labels

array([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int32)

In [None]:
for indx,i in enumerate(labels):
  # if i == 9:
      
    train_imgs_wt_clas_i, train_lbls_wt_clas_i = remove_anomalous_class(train_imgs0,train_lbl0,val_req= False, anomalous_class = [i])
    val_imgs_wt_clas_i, val_lbls__wt_clas_i = remove_anomalous_class(val_imgs0,val_lbl0,val_req= False, anomalous_class = [i])
    unique_lbls = np.unique(train_lbls_wt_clas_i)
    print('before remapping',np.unique(train_lbls_wt_clas_i),np.unique(val_lbls__wt_clas_i))
    (train_lbls_wt_clas_i,val_lbls__wt_clas_i) = remap_labels(train_lbls_wt_clas_i,val_lbls__wt_clas_i,unique_lbls)
    print(np.unique(train_lbls_wt_clas_i),np.unique(val_lbls__wt_clas_i))
    models[indx].build_model(8)
    models[indx].train(train_imgs_wt_clas_i, train_lbls_wt_clas_i, val_imgs_wt_clas_i, val_lbls__wt_clas_i)
    models[indx].save_model('classifier'+str(indx))

In [None]:
train_loss = []
train_acc = []
val_loss = []
val_acc = []

for indx,i in enumerate(labels):
  # if i == 9:
      
    train_imgs_wt_clas_i, train_lbls_wt_clas_i = remove_anomalous_class(train_imgs0,train_lbl0,val_req= False, anomalous_class = [i])
    val_imgs_wt_clas_i, val_lbls__wt_clas_i = remove_anomalous_class(val_imgs0,val_lbl0,val_req= False, anomalous_class = [i])
    unique_lbls = np.unique(train_lbls_wt_clas_i)
    (train_lbls_wt_clas_i,val_lbls__wt_clas_i) = remap_labels(train_lbls_wt_clas_i,val_lbls__wt_clas_i,unique_lbls)
    # print(np.unique(train_lbls_wt_clas_i),np.unique(val_lbls__wt_clas_i))
    model = models[indx].load_model('classifier'+str(indx))
    train_ = model.evaluate(train_imgs_wt_clas_i, train_lbls_wt_clas_i)
    val_ = model.evaluate(val_imgs_wt_clas_i, val_lbls__wt_clas_i)
    train_loss.append(train_[0])
    train_acc.append(train_[1])
    val_loss.append(val_[0])
    val_acc.append(val_[1])

[2 3 4 5 6 7 8 9]
[1 3 4 5 6 7 8 9]
[1 2 4 5 6 7 8 9]
[1 2 3 5 6 7 8 9]
[1 2 3 4 6 7 8 9]
[1 2 3 4 5 7 8 9]
[1 2 3 4 5 6 8 9]
[1 2 3 4 5 6 7 9]
[1 2 3 4 5 6 7 8]


In [None]:
print('training accuracy average :',np.average(train_acc))
print('validation accuracy average :',np.average(val_acc))

training accuracy average : 0.8877936469184028
validation accuracy average : 0.7474629680315653


In [None]:
print(train_acc)
print(val_acc)

[0.8431785702705383, 0.9316071271896362, 0.879964292049408, 0.8980357050895691, 0.8899285793304443, 0.9235000014305115, 0.907714307308197, 0.8244642615318298, 0.8917499780654907]
[0.7147499918937683, 0.7868333458900452, 0.7604166865348816, 0.7646666765213013, 0.7594166398048401, 0.7522500157356262, 0.7570833563804626, 0.6903333067893982, 0.7414166927337646]


In [None]:
classifiers = [models[i].load_model('classifier'+str(i)) for i in range(len(models))]

In [None]:
classifiers

In [None]:
layer_name_list = [classifier.layers[-2:][0].name for classifier in classifiers]
layer_name_list

['dense_19',
 'dense_21',
 'dense_23',
 'dense_25',
 'dense_27',
 'dense_29',
 'dense_31',
 'dense_33',
 'dense_35']

In [None]:
from keras import Model
model_dict = {}

for i in range(len(models)):
  model_output_i = classifiers[i].get_layer(layer_name_list[i]).output
  m0 = Model(inputs=classifiers[i].input, outputs=model_output_i)
  model_dict[i] = m0

In [None]:
y_pred = []
for i,label  in enumerate(labels):
  val_imgs_wt_clas_i, val_lbls_wt_clas_i = remove_anomalous_class(val_imgs0,val_lbl0,val_req= False, anomalous_class = [label])
  m = model_dict[i]
  print(label,np.unique(val_lbls_wt_clas_i))
  # m.summary()
  y_p = m.predict(val_imgs_wt_clas_i)
  y_pred.append(y_p)

1 [2 3 4 5 6 7 8 9]
2 [1 3 4 5 6 7 8 9]
3 [1 2 4 5 6 7 8 9]
4 [1 2 3 5 6 7 8 9]
5 [1 2 3 4 6 7 8 9]
6 [1 2 3 4 5 7 8 9]
7 [1 2 3 4 5 6 8 9]
8 [1 2 3 4 5 6 7 9]
9 [1 2 3 4 5 6 7 8]


In [None]:
len(y_pred)

9

In [None]:
temp_val = []
for i,label in enumerate(labels):
  # if i == 8:
    val_imgs_wt_clas_i, val_lbls_wt_clas_i = remove_anomalous_class(val_imgs0,val_lbl0,val_req= False, anomalous_class = [label])
    print(np.unique(val_lbls_wt_clas_i),y_pred[i].shape)

    unique_lbls = np.unique(val_lbls_wt_clas_i)
    (val_lbls_wt_clas_i,val_lbls_wt_clas_i) = remap_labels(val_lbls_wt_clas_i,val_lbls_wt_clas_i,unique_lbls)
  
    temp = temp_cal(y_pred[i],val_lbls_wt_clas_i,len(unique_lbls))
    temp_val.append(temp)

In [None]:
temp_val

In [None]:
temp_val = [2.1473336,2.0425196,2.33733,1.9314604,2.2554636,2.1953905,1.987695,2.2827978,2.1055408]

**Method 1**
---



In [None]:
# labels --> gives the non anomalous classes ex: here labels are 0 1 2 3 4 5 6 7 9

In [None]:
from tqdm import tqdm

entropy_in = 0
count_entr_in = 0
avg_in = [0,0,0,0,0,0,0,0,0]
count_in = [0,0,0,0,0,0,0,0,0]

entropy_out = 0
count_entr_out = 0
avg_out = [0,0,0,0,0,0,0,0,0]
count_out = [0,0,0,0,0,0,0,0,0]

for img, lbl in tqdm(zip(val_imgs0, val_lbl0)):
  img = img.reshape([-1, 32, 32, 3])

  for i,anmls_lbl in enumerate(labels):
    if not anmls_lbl == lbl:
      logits = model_dict[i](img)
      logits = tf.math.divide(logits, temp_val[i])
      pred = tf.nn.softmax(logits)
      max_val = np.max(pred)
      avg_in[i] = avg_in[i] + max_val
      count_in[i] = count_in[i] + 1

      entrpy = entropy(pred[0], base=len(pred[0]))
      entropy_in = entropy_in + entrpy
      count_entr_in = count_entr_in + 1 

    if anmls_lbl == lbl:
      logits = model_dict[i](img)
      logits = tf.math.divide(logits, temp_val[i])
      pred = tf.nn.softmax(logits)
      max_val = np.max(pred)
      avg_out[i] = avg_out[i] + max_val
      count_out[i] = count_out[i] + 1

      entrpy = entropy(pred[0], base=len(pred[0]))
      entropy_out = entropy_out + entrpy
      count_entr_out = count_entr_out + 1 


13500it [18:26, 12.20it/s]


In [None]:
classifier_avg_in = [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]
classifier_avg_out = [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]
for i in range(9):
  classifier_avg_in[i] = avg_in[i]/count_in[i]
  classifier_avg_out[i] = avg_out[i]/count_out[i]

treshold_value_in = 0.0
treshold_value_out = 0.0

for i in range(9):
  treshold_value_in = treshold_value_in + classifier_avg_in[i]
  treshold_value_out = treshold_value_out + classifier_avg_out[i]

treshold_value_in = treshold_value_in/len(classifier_avg_in)
treshold_value_out = treshold_value_out/len(classifier_avg_out)


In [None]:
print(classifier_avg_in)
print(classifier_avg_out)

print(treshold_value_in)
print(treshold_value_out)

entropy_ref_in_0 = entropy_in/count_entr_in
entropy_ref_out_0 = entropy_out/ count_entr_out

print(entropy_ref_in_0)
print(entropy_ref_out_0)

[0.7206484871295591, 0.7867828972277542, 0.7844558049291372, 0.7564162949336072, 0.7682750959657133, 0.7644602392415205, 0.7483412800952792, 0.7228208382924398, 0.7465326943707963]
[0.7099376080334187, 0.5763494877020517, 0.6247952837944031, 0.5916481250027815, 0.6737140421370665, 0.5855842731992403, 0.600825888812542, 0.6103627229531606, 0.7364417767425379]
0.7554148480206453
0.6344065787085781
0.32329126212238524
0.4682570079314511


In [None]:
open_file = open(os.path.join('reference_vector_in_out.pkl'), "wb")
pickle.dump((classifier_avg_in,classifier_avg_out,entropy_ref_in_0,entropy_ref_out_0), open_file)

In [None]:
open_file = open(os.path.join('reference_vector_in_out.pkl'), "rb")
ref_vector_in_0,ref_vector_out_0,entropy_ref_in_0,entropy_ref_out_0 = pickle.load(open_file)

In [None]:
max_sm_all_wt_0 = []
ood = 0
ind= 1
for data, label in tqdm(zip(test_imgs, test_lbls)):

  entr = 0
  img = data.reshape([-1, 32, 32, 3])
  prediction_sftmx = []
  prediction_lbl = []
  
  if label[0] == 0:
    lbl = ood
  else:
    lbl = ind
  
  for i,anmls_lbl in enumerate(labels):
    logits = model_dict[i](img)
    logits = tf.math.divide(logits, temp_val[i])
    pred = tf.nn.softmax(logits)

    prediction_sftmx.append(np.max(pred))

    pred_ind = tf.argmax(pred[0]).numpy()
    labels_removed_anmls = np.delete(labels,0)
    pred_lbl = labels_removed_anmls[pred_ind]   ### reverse mapping the actual label

    prediction_lbl.append(pred_lbl)

    entr = entr + entropy(pred[0], base=len(pred[0]))

  max_sm_all_wt_0.append([prediction_sftmx,prediction_lbl,label[0],lbl,entr/len(labels)])

10000it [13:53, 12.00it/s]


In [None]:
open_file = open(os.path.join('predictions_on_test.pkl'), "wb")
pickle.dump(max_sm_all_wt_0, open_file)

In [None]:
open_file = open(os.path.join('predictions_on_test.pkl'), "rb")
max_sm_all_wt_0 = pickle.load(open_file)

euclidean and entroy (sim score + euclidean id/od)

In [None]:
from scipy.spatial import distance

id = 1
ood = 0
sim_score_in =[]
sim_score_out = []
sim_score_entr_in =[]
sim_score_entr_out = []
sim_score_add = []
y_true = []
y_pred=[]
y_entr_pred=[]
y_true_ood = []

for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_0):

  #print(avg_entr, max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood)
  dist_in = distance.euclidean(max_pred_each_cls,ref_vector_in_0)
  dist_out = distance.euclidean(max_pred_each_cls,ref_vector_out_0)

  dist_entr_in = distance.euclidean(avg_entr,entropy_ref_in_0)
  dist_entr_out = distance.euclidean(avg_entr,entropy_ref_out_0)
  
  sim_in = 1 / (1 + dist_in)
  sim_out = 1/ (1 + dist_out)
  sim_add = sim_in + sim_out

  sim_entr_in = 1 / (1 + dist_entr_in)
  sim_entr_out = 1/ (1 + dist_entr_out)

  sim_score_in.append(sim_in)
  sim_score_out.append(sim_out)
  sim_score_entr_in.append(sim_entr_in)
  sim_score_entr_out.append(sim_entr_out)
  sim_score_add.append(sim_add)

  if sim_in > sim_out:
    prd = id 
  else:
    prd = ood 

  if sim_entr_in > sim_entr_out:
    prd_e = id
  else:
    prd_e = ood 
  
  if lbl_id_or_ood == 1:
    label_true_ood = 0
  elif lbl_id_or_ood ==0:
    label_true_ood = 1

  
  y_pred.append(prd)
  y_entr_pred.append(prd_e)
  y_true.append(lbl_id_or_ood)
  y_true_ood.append(label_true_ood)

100%|██████████| 10000/10000 [00:01<00:00, 6830.34it/s]


In [None]:
from sklearn.metrics import roc_auc_score  ##### weighted SVM
print('Auroc with prediction as ID or OOD  ',roc_auc_score(y_true, y_pred))
print('Auroc prediction with sim score to in distribution only  :',roc_auc_score(y_true,sim_score_in))
print('Auroc prediction with sim score to ood distribution only  :',roc_auc_score(y_true_ood,sim_score_out))

print('\nAuroc prediction with sim added score to for in and ood distribution only  :',roc_auc_score(y_true_ood,sim_score_add))

print('\nAuroc with prediction as ID or OOD Entropy ',roc_auc_score(y_true, y_entr_pred))
print('Auroc prediction with sim score to in distribution only Entropy  :',roc_auc_score(y_true,sim_score_entr_in))
print('Auroc prediction with sim score to out distribution only Entropy  :',roc_auc_score(y_true_ood,sim_score_entr_out))

Auroc with prediction as ID or OOD   0.6044444444444445
Auroc prediction with sim score to in distribution only  : 0.5343593333333333
Auroc prediction with sim score to ood distribution only  : 0.625537

Auroc prediction with sim added score to for in and ood distribution only  : 0.5683738888888888

Auroc with prediction as ID or OOD Entropy  0.5938888888888889
Auroc prediction with sim score to in distribution only Entropy  : 0.4896664444444444
Auroc prediction with sim score to out distribution only Entropy  : 0.6146422222222222


Decision Rule 

In [None]:
############################### simple decision rule , not taloinmg more than 7 or 8 just took majority prediction ##################

id = 1
ood = 0
y_true = []
y_pred=[]

for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_0):
  count = Counter(pred_lbl_each_cls)
  most_common= count.most_common(1)[0][0]

  if most_common == label_actual:
    pred = id
  else:
    pred = ood 
  y_pred.append(pred)
  y_true.append(lbl_id_or_ood)

print(' Auroc with simple decision rule about majority_:  ',roc_auc_score(y_true, y_pred))
  

100%|██████████| 10000/10000 [00:00<00:00, 178446.85it/s]

 Auroc with simple decision rule about majority_:   0.7371666666666666





In [None]:
id = 1
ood = 0

In [None]:
y_true_dec_rule = []
y_pred_mr_than8_0 = []
y_pred_mr_than7_0 = []
for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_0):
  values, counts = np.unique(pred_lbl_each_cls, return_counts=True)

  if np.max(counts) >= 7:
    y_pred_mr_than7_0.append(id)
  else:
    y_pred_mr_than7_0.append(ood)

  if np.max(counts) >= 8:
    y_pred_mr_than8_0.append(id)
  else:
    y_pred_mr_than8_0.append(ood)

  if label_actual == 0:
    y_true_dec_rule.append(ood)
  else:
    y_true_dec_rule.append(id)

print('\nAuroc for decision rule > 7 ',roc_auc_score(y_true_dec_rule,y_pred_mr_than7_0))
print('Auroc for decision rule > 8 ',roc_auc_score(y_true_dec_rule,y_pred_mr_than8_0))

100%|██████████| 10000/10000 [00:00<00:00, 19492.29it/s]


Auroc for decision rule > 7  0.5191111111111111
Auroc for decision rule > 8  0.5608888888888889





In [None]:
sim_score_in =[]
sim_score_entr_in =[]
y_true_ = []
y_pred_mr_than7_0 = []
y_pred_mr_than8_0 = []
y_entr_pred_mr_than7_0 = []
y_entr_pred_mr_than8_0 = []

for max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood,avg_entr in tqdm(max_sm_all_wt_0):

  # print(avg_entr, max_pred_each_cls,pred_lbl_each_cls,label_actual,lbl_id_or_ood)
  dist_in = distance.euclidean(max_pred_each_cls,ref_vector_in_0)
  dist_out = distance.euclidean(max_pred_each_cls,ref_vector_out_0)

  dist_entr_in = distance.euclidean(avg_entr,entropy_ref_in_0)
  dist_entr_out = distance.euclidean(avg_entr,entropy_ref_out_0)
  
  sim_in = 1 / (1 + dist_in)
  sim_out = 1/ (1 + dist_out)

  sim_entr_in = 1 / (1 + dist_entr_in)
  sim_entr_out = 1/ (1 + dist_entr_out)

  sim_score_in.append(sim_in)
  sim_score_entr_in.append(sim_entr_in)

  values, counts = np.unique(pred_lbl_each_cls, return_counts=True)
#############################################
  if sim_in > sim_out:
    if  np.max(counts) >=7:
      y_pred_mr_than7_0.append(id)
    else:
      y_pred_mr_than7_0.append(ood)
  else:
    y_pred_mr_than7_0.append(ood)
###############################################
  if sim_in > sim_out:
    if  np.max(counts) >=8:
      y_pred_mr_than8_0.append(id)
    else:
      y_pred_mr_than8_0.append(ood)
  else:
    y_pred_mr_than8_0.append(ood)
##############################################
  if sim_entr_in > sim_entr_out:  
    if  np.max(counts) >=7:
      y_entr_pred_mr_than7_0.append(id)
    else:
      y_entr_pred_mr_than7_0.append(ood)
  else:
    y_entr_pred_mr_than7_0.append(ood )
###############################################
  if sim_entr_in > sim_entr_out:  
    if  np.max(counts) >=8:
      y_entr_pred_mr_than8_0.append(id)
    else:
      y_entr_pred_mr_than8_0.append(ood)
  else:
    y_entr_pred_mr_than8_0.append(ood )

  if label_actual == 0:
    y_true_.append(ood)
  else:
    y_true_.append(id)


100%|██████████| 10000/10000 [00:01<00:00, 8541.04it/s]


In [None]:
print('Decision Rule(>=7) + Reference Vector ',roc_auc_score(y_true_,y_pred_mr_than7_0))
print('Decision Rule(>=8) + Reference Vector ',roc_auc_score(y_true_,y_pred_mr_than8_0))
print('Decision Rule(>=7) + Entropy Reference Value ',roc_auc_score(y_true_,y_entr_pred_mr_than7_0))
print('Decision Rule(>=8) + Entropy Reference Value ',roc_auc_score(y_true_,y_entr_pred_mr_than8_0))

Decision Rule(>=7) + Reference Vector  0.5360555555555555
Decision Rule(>=8) + Reference Vector  0.5606666666666666
Decision Rule(>=7) + Entropy Reference Value  0.5383333333333333
Decision Rule(>=8) + Entropy Reference Value  0.5596111111111111


**Method 2**
---



Normal weighted SVM




In [None]:
#########################  after appying temperature scaling and softmax  ###########################################
######################### the y_pred used is In + ood distribution for the ensemble classifier ############################
id = 1
ood = 0

new_dataset = []

for img, label in tqdm(zip(val_imgs0, val_lbl0)):
    img = img.reshape([-1,32,32,3])
    for i,lbl in enumerate(labels):
      if label == lbl:
        new_label = ood
      else :
        new_label = id

      m = model_dict[i]
      y_p = m.predict(img)
      temp_new_y_pred = temp_scaling(y_p,temp_val[i])
      new_softmax_y_pred = models[i].apply_softmax(temp_new_y_pred) 
      new_softmax_y_pred = np.array(new_softmax_y_pred).tolist()  
      new_softmax_y_pred = new_softmax_y_pred[0] 
      new_dataset.append([new_softmax_y_pred,new_label])
      

0it [00:00, ?it/s]



13500it [1:03:54,  3.52it/s]


In [None]:
df = pd.DataFrame(new_dataset, columns=["predictions", "label"])
predictions = df['predictions'].to_numpy()
new_labels = df['label'].to_numpy()

In [None]:
df.to_pickle("predictions_dataset.pkl")

In [None]:
df = pd.read_pickle('predictions_dataset.pkl')
df

In [None]:
from collections import Counter
Counter(new_labels)

Counter({0: 13500, 1: 108000})

In [None]:
in_data = []
out_data = []

for i in range(len(predictions)):
  if new_labels[i]== 1:
    in_data.append([predictions[i],new_labels[i]])
  else:
    out_data.append([predictions[i],new_labels[i]])

print(len(in_data),len(out_data))

# import random 
# in_data = random.sample(in_data,(len(out_data)))
# print(len(in_data))

108000 13500


In [None]:
in_data[1]

[[7.27373335394077e-05,
  0.027628222480416298,
  0.4475102424621582,
  0.09674197435379028,
  0.026312189176678658,
  0.40081822872161865,
  0.0008246548823080957,
  9.183258225675672e-05],
 1]

In [None]:
input = []
labels = []
for idata in in_data:
  input.append(idata[0])
  labels.append(idata[1])
  

for odata in out_data:
  input.append(odata[0])
  labels.append(odata[1])
# print(input,labels)
print(len(input),len(labels))

121500 121500


In [None]:
svm_train_ip, svm_test_ip, svm_train_lb, svm_test_lb = train_test_split(input, labels, test_size=0.20,stratify = labels)
print(len(svm_train_ip),len(svm_test_ip),len(svm_train_lb))

97200 24300 97200


In [None]:
from sklearn.svm import SVC
model = SVC(gamma='scale')

In [None]:
from sklearn.utils.class_weight import compute_class_weight
class_wts = compute_class_weight('balanced', np.unique(svm_train_lb), svm_train_lb)
class_wts ={0:class_wts[0], 1:class_wts[1]}
class_wts

{0: 4.5, 1: 0.5625}

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
clf = make_pipeline(StandardScaler(), SVC(gamma='auto',class_weight=class_wts))
clf.fit(svm_train_ip, svm_train_lb)

Pipeline(memory=None,
         steps=[('standardscaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('svc',
                 SVC(C=1.0, break_ties=False, cache_size=200,
                     class_weight={0: 4.5, 1: 0.5625}, coef0=0.0,
                     decision_function_shape='ovr', degree=3, gamma='auto',
                     kernel='rbf', max_iter=-1, probability=False,
                     random_state=None, shrinking=True, tol=0.001,
                     verbose=False))],
         verbose=False)

In [None]:
pred = clf.predict(svm_test_ip)
len(pred) ### svm fitted on whole data with weights

24300

In [None]:
clf.score(svm_test_ip,svm_test_lb) 

0.613127572016461

In [None]:
y_true = []
y_pred = []
for img, label in tqdm(zip(test_imgs, test_lbls)):
    img= img.reshape([-1,32,32,3])
    
    svm_output =[]
    for i in range(0,9):
      m = model_dict[i]
      y_p = m.predict(img)
      temp_new_y_pred = temp_scaling(y_p,temp_val[i])
      new_softmax_y_pred = models[i].apply_softmax(temp_new_y_pred) 
      new_softmax_y_pred = np.array(new_softmax_y_pred).tolist()  
      new_softmax_y_pred = np.array(new_softmax_y_pred[0]).reshape([1,-1])
      # print(new_softmax_y_pred)
      a = clf.predict(new_softmax_y_pred)
      # print(a)

      svm_output.append(a[0])
    # print(svm_output)
 
    a = Counter(list(svm_output))
    b = a.most_common()[0][0]
    # print(b)
    y_pred.append(b)
    if not label ==0:
      y_true.append(1)
    else:
      y_true.append(0)

10000it [51:20,  3.25it/s]


In [None]:
print(y_pred)
print(y_true)

[0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 

In [None]:
print(Counter(y_pred))
print(Counter(y_true))

Counter({1: 5633, 0: 4367})
Counter({1: 9000, 0: 1000})


In [None]:
from sklearn.metrics import roc_auc_score  ##### weighted SVM
roc_auc_score(y_true, y_pred)

0.5679444444444445

In [None]:
df1= pd.DataFrame(y_pred, columns=["y_pred"])
df2= pd.DataFrame(y_true, columns=["y_true"])
df = pd.concat([df1, df2], axis=1)
df

In [None]:
df.to_pickle("final_predictions.pkl")
df = pd.read_pickle('final_predictions.pkl')

In [None]:
y_true = df['y_true'].to_numpy().tolist()
y_pred = df['y_pred'].to_numpy().tolist()
print(y_true)
print(y_pred)

[1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 