In [None]:
import glob
import os
import tensorflow as tf
from PIL import Image
from tqdm import tqdm
from joblib import Parallel, delayed
import numpy as np

2023-04-27 14:48:47.738216: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
os.listdir("/gpfsscratch/rech/zpf/uyf36me/validation_patches/")

In [3]:
main_path="/gpfsscratch/rech/zpf/uyf36me/validation_patches/"

label_0=os.path.join(main_path,"label_0/**.png") #basic
label_1=os.path.join(main_path,"label_1/**.png") #proof
label_2=os.path.join(main_path,"label_2/**.png") #theorem
label_3=os.path.join(main_path,"label_3/**.png") #overlap

vals=[len(glob.glob(label_0)),len(glob.glob(label_1)),len(glob.glob(label_2)),len(glob.glob(label_3))]
print(vals)
print("ratios in the data --")

for val in vals:
    print(val/sum(vals))
    

[314504, 125526, 85803, 3470]
ratios in the data --
0.5941851831559617
0.2371533885128178
0.1621056370358755
0.006555791295345011


In [4]:
import cv2
def white_padding_and_scaling(default_shape,file_loc,overwrite=False):
    """
    2- adds white padding wherever necessary
    3- takes bitwise NOT transformation this esentially inverts the image sets black -0 as background while 
    255 is set as foreground
    4- if overwrite true then makes a new file with '_t' suffix 
    """
    try:
        img_array=cv2.imread(file_loc)
        shape=img_array.shape
    except:
        print("error in white padding--",file_loc)
        return

    padding_height=0
    padding_width=0
    crop_width=False
    crop_height=False

    if(shape[0]<=default_shape[0]): #if img is small in width then we need padding then 
        padding_height=default_shape[0]-shape[0]
    else:
        crop_height=True
        padding_height=0
    if(shape[1]<=default_shape[1]):
        padding_width=default_shape[1]-shape[1]
    else:
        crop_width=True
        padding_width=0
    if(padding_width>0 or padding_height>0):
        colour_fill=(255,255,255) #colour to pad this is white
        new_array=cv2.copyMakeBorder(img_array, 0,padding_height , 0, padding_width, cv2.BORDER_CONSTANT,value=colour_fill)
    else:
        new_array=img_array[0:default_shape[0], 0:default_shape[1]]

    if(crop_width==True):
        new_array=new_array[0:default_shape[0], 0:default_shape[1]]
    if(crop_height==True):
        new_array=new_array[0:default_shape[0], 0:default_shape[1]]


    new_array=cv2.bitwise_not(new_array)
    if(overwrite==True):
        new_name=file_loc.replace(".png","_t.png")
        #print(new_name)
        cv2.imwrite(new_name,new_array)
        os.remove(file_loc)
        return

    return new_array
       
            

In [5]:
#generate dataset


path="/gpfsscratch/rech/zpf/uyf36me/validation_patches/**/**.png"

png_files=glob.glob(path)

            
filtered_files=list(filter(lambda x: not x.endswith("_t.png"),png_files))
print(len(filtered_files))

bad_files=list(filter(lambda x:  x.endswith("_t_t.png"),png_files))
print(len(bad_files))


print("--running transformations")
image_shapes=(400,1400)
n_jobs=-2
#res=Parallel(n_jobs=n_jobs,backend="threading",verbose=2)(delayed(white_padding_and_scaling)
                                           #(default_shape=image_shapes,file_loc=fname,overwrite=True) for fname in tqdm(filtered_files))


0
0
--running transformations


In [6]:
strategy=tf.distribute.MirroredStrategy()
devices=strategy.num_replicas_in_sync

print("no of devices: {}".format(strategy.num_replicas_in_sync))

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')
no of devices: 4


In [7]:
image_shapes=(400,1400)
batch_per_gpu=4
batch_size=batch_per_gpu*devices

sub_sample_validation_dataset=tf.keras.preprocessing.image_dataset_from_directory(
    directory="/gpfsscratch/rech/zpf/uyf36me/validation_patches",
    image_size=image_shapes,
    batch_size=batch_size,
    seed=2,
    labels='inferred',
    label_mode='categorical',
    shuffle=False #<<<<<<<<change this when training
    )

sub_sample_validation_dataset=sub_sample_validation_dataset.prefetch(buffer_size=tf.data.AUTOTUNE)

Found 529303 files belonging to 4 classes.


In [9]:
labels=None
for x, y in tqdm(sub_sample_validation_dataset):
    if(labels is None):
        labels=y
    else:
        labels=np.concatenate([labels,y])
        
#ground truth        
y_true=np.argmax(labels,axis=1)

100%|██████████| 33082/33082 [16:57<00:00, 32.51it/s]


In [10]:
from sklearn.metrics import classification_report, confusion_matrix, f1_score
from tensorflow_addons.optimizers import AdamW ,LAMB

def evaluate_f1_for_tf_model(model_path,validation_dataset,y_true,show_confusion_report=True):
    
    #460h cpu for 28K images
    # 4 A100 can do the job in

    class_names=["Basic","Proof","Theorem","Overlap"]

    # Wrap the loaded model inside the strategy scope to distribute it across the GPUs
    with strategy.scope():
        model = tf.keras.models.load_model(model_path)

    #show model arch
    print(model.summary())
    
    
    #generating predictions
    predictions=model.predict(validation_dataset)
    
    #generating predictions
    y_pred = np.argmax(predictions, axis=1)
    
    if(show_confusion_report is True):
        print('Confusion Matrix')
        print(classification_report(y_true, y_pred, target_names=class_names))
        
    return f1_score(y_true,y_pred,average="macro")
    
#"EfficientNetB0.h5","EfficientNetB0_max.h5","EfficientNetB0_avg.h5",
        #"EfficientNetB4_avg.h5","efficientnetv2s_avg.h5",
models=["new_models/r_efficientnetv2m_avg9.h5"]

for model in models:
    _f1_score=evaluate_f1_for_tf_model(model_path=model,validation_dataset=sub_sample_validation_dataset,y_true=y_true)
    print(f"f1 score of the {model} is {_f1_score}")
    


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Redu

2023-04-27 09:09:17.055429: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:784] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Found an unshardable source dataset: name: "TensorSliceDataset/_1"
op: "TensorSliceDataset"
input: "Placeholder/_0"
attr {
  key: "Toutput_types"
  value {
    list {
      type: DT_STRING
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: 529303
  }
}
attr {
  key: "is_files"
  value {
    b: false
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\024TensorSliceDataset:0"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
      }
    }
  }
}
attr {
  key: "replicate_on_split"
  value {
    b: false
  }
}
experimental_type {
  type_id: TFT_PRODUCT
  args {
    type_id: TFT_DATASET
    args {
      type_id: TFT_PRODUCT
      args {
        type_id: TFT_TENSOR
        args {
          type_id: TFT_STRING
        }
      }
    }
  }
}



Confusion Matrix


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

       Basic       0.69      0.91      0.79    314504
       Proof       0.61      0.22      0.33    125526
     Theorem       0.78      0.62      0.69     85803
     Overlap       0.00      0.00      0.00      3470

    accuracy                           0.69    529303
   macro avg       0.52      0.44      0.45    529303
weighted avg       0.68      0.69      0.66    529303

f1 score of the new_models/r_efficientnetv2m_avg9.h5 is 0.44971657487054234


  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
from tensorflow_addons.optimizers import AdamW ,LAMB
#460h cpu for 28K images
# 4 A100 can do the job in

class_names=["Basic","Proof","Theorem","Overlap"]
model_path="efficientnetv2s_avg.h5"

# Wrap the loaded model inside the strategy scope to distribute it across the GPUs
with strategy.scope():
    model = tf.keras.models.load_model(model_path)

print(model.summary())


In [None]:
predictions=model.predict(sub_sample_validation_dataset)

In [None]:
predictions.shape

In [None]:
y_pred.shape

In [None]:
y_true=np.argmax(labels,axis=1)
y_true.shape

In [26]:
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

y_pred = np.argmax(predictions, axis=1)
print('Confusion Matrix')
print(confusion_matrix(y_true, y_pred))
print('Classification Report')
target_names = ['Basic', 'Proofs', 'Theorems','Overlaps']
print(classification_report(y_true, y_pred, target_names=target_names))

Confusion Matrix
[[123291   1248  34516     16]
 [ 47168    987  16129      0]
 [  1812     35   5401     18]
 [   955      6    222      1]]
Classification Report
              precision    recall  f1-score   support

       Basic       0.71      0.78      0.74    159071
      Proofs       0.43      0.02      0.03     64284
    Theorems       0.10      0.74      0.17      7266
    Overlaps       0.03      0.00      0.00      1184

    accuracy                           0.56    231805
   macro avg       0.32      0.38      0.24    231805
weighted avg       0.61      0.56      0.52    231805



In [None]:
#evaluate model on larger dataset to see performance difference
#use F1 score to measure the impact
#decide the pooling part
#do big arch lead to bad generalization
#flops vs accuracy