# Experiment: Model with 3 classes (clean, damaged, dirt)

## Unbalaced classes

In [None]:
#load images
from tensorflow.keras.preprocessing.image import array_to_img, img_to_array, load_img
from tensorflow.keras.utils import image_dataset_from_directory
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras import layers, optimizers, callbacks
import matplotlib.pyplot as plt 
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from PIL import Image
import os
%matplotlib inline

In [None]:
batch_size = 32
current_path = os.getcwd()
print(current_path)
#test_data_dir = os.path.join(current_path,"..", "raw_data/exp2-val-data-balanced/dirt")

train_data_dir = os.path.join(current_path,"..","raw_data/exp2-training-data-balanced/")
test_data_dir = os.path.join(current_path,"..","raw_data/exp2-val-data-balanced/")

train_data = image_dataset_from_directory(
  train_data_dir,
  #labels = "inferred",
  label_mode = "int",  
  seed=123,
  image_size=(225, 225),
  batch_size=batch_size,
  #validation_split=0.2,
  #subset='both'
) 
val_data = image_dataset_from_directory(
   test_data_dir,
   #labels = "inferred",
   label_mode = "int",
   seed=123,
   image_size=(225, 225),
   batch_size=batch_size,
 )

#train_data = train_ds[0]
#val_data = train_ds[1]

In [None]:
classes1 = train_data.class_names
classes1

In [None]:
classes2 = val_data.class_names
classes2

In [None]:
import os
current_path = os.getcwd()
print(current_path)
test_data_dir = os.path.join(current_path,"..", "raw_data/exp2-val-data-balanced/dirt")
print(test_data_dir)
# Specify the directory path you want to list files in
directory = test_data_dir

# List all files in the directory
file_list = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]

# Print the list of files
for file in file_list:
    print(file)


### Model

In [None]:
# We'll use a model with the same convolutional layers, but we'll add Augmentation layers before that

model_multi = Sequential()

model_multi.add(layers.Rescaling(1./255, input_shape = (225, 225, 3)))

# Data Augmentation Layers

model_multi.add(layers.RandomFlip("horizontal"))
model_multi.add(layers.RandomZoom(0.1))
model_multi.add(layers.RandomTranslation(0.2, 0.2))
model_multi.add(layers.RandomRotation(0.1))


# Convolutional Layers

model_multi.add(layers.Conv2D(filters = 32, kernel_size = (3,3), activation="relu", padding = "same"))
model_multi.add(layers.MaxPooling2D(pool_size=(2, 2), padding = "same") )


model_multi.add(layers.Conv2D(filters = 32, kernel_size = (3,3), input_shape = (225, 225, 3), activation="relu", padding = "same"))
model_multi.add(layers.MaxPooling2D(pool_size=(2, 2), padding = "same") )


model_multi.add(layers.Conv2D(filters = 64, kernel_size = (3,3), input_shape = (225, 225, 3), activation="relu", padding = "same"))
model_multi.add(layers.MaxPooling2D(pool_size=(2, 2), padding = "same") )

model_multi.add(layers.Conv2D(filters = 128, kernel_size = (3,3), input_shape = (225, 225, 3), activation="relu", padding = "same"))
model_multi.add(layers.MaxPooling2D(pool_size=(2, 2), padding = "same") )

model_multi.add(layers.Flatten())

model_multi.add(layers.Dense(64, activation="relu"))

model_multi.add(layers.Dropout(0.5))

model_multi.add(layers.Dense(3, activation="softmax"))

In [None]:
model_multi.summary()

In [None]:
adam = optimizers.Adam(learning_rate = 0.001)

model_multi.compile(loss= SparseCategoricalCrossentropy(),
              optimizer= adam,
              metrics=['accuracy'])

In [None]:
MODEL = "model_multiclass_clean_damage_dirt_230904_self_balanced"

modelCheckpoint = callbacks.ModelCheckpoint("{}.h5".format(MODEL), monitor="val_loss", verbose=0, save_best_only=True)

LRreducer = callbacks.ReduceLROnPlateau(monitor="val_loss", factor = 0.1, patience=3, verbose=1, min_lr=0)

EarlyStopper = callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=0, restore_best_weights=True)

In [None]:
#new weights - remove snow images, and macro pic of bird
clean = 133
damage = 133
dirt = 133
total = clean + damage + dirt
print(f'total: {total}')
clean_weight =  (clean/total)**-1
damage_weight = (damage/total)**-1
dirt_weight = (dirt/total)**-1
print(f"clean weight: {clean_weight}")
print(f"damage weight: {damage_weight}")
print(f"dirt weight: {dirt_weight}")

In [None]:
%%time
#'clean', 'damage', 'dirt'
history_multi = model_multi.fit(
        train_data,
        epochs=30,
        validation_data=val_data,
        callbacks = [modelCheckpoint, LRreducer, EarlyStopper],
        class_weight = {
            0: 3.0,
            1: 3.0,
            2: 3.0
        }
        )


In [None]:
#improvement = (0.51 - (1/3))/(1/3)
#improvement

In [None]:
def plot_history(history):
    fig, ax = plt.subplots(1, 2, figsize=(15,5))
    ax[0].set_title('loss')
    ax[0].plot(history.epoch, history.history["loss"], label="Train loss")
    ax[0].plot(history.epoch, history.history["val_loss"], label="Validation loss")
    ax[1].set_title('accuracy')
    ax[1].plot(history.epoch, history.history["accuracy"], label="Train acc")
    ax[1].plot(history.epoch, history.history["val_accuracy"], label="Validation acc")
    ax[0].legend()
    ax[1].legend()

In [None]:
plot_history(history_multi)

### Helper functions

In [None]:
def find_index_of_max_element(input_list):
    max_value = max(input_list)
    max_index = input_list.index(max_value)
    return max_index

In [None]:
def predictImage_multiclass(url, model):

  # Takes an image and a model

  img = url
  img = img_to_array(img)
  img = img.reshape((-1, 225, 225, 3))
  res = model.predict(img)
  print(f"Probabilities: ")
  names_of_classes = class_names
  print(f"{names_of_classes}")
  print(f"{res[0]}")
  print(f"Result: {names_of_classes[find_index_of_max_element(res[0].tolist())]}")  
  return res 

In [None]:
# img_clean_1 = load_img(f"raw_data/training-data/clean/Cleaan (4).jpeg", target_size=(225, 225))
# predictImage_multiclass(img_clean_1,model_multi)
# plt.imshow(img_clean_1);

In [None]:
# img_clean_1 = load_img(f"raw_data/training-data/clean/Cleaan (12).jpeg", target_size=(225, 225))
# predictImage_multiclass(img_clean_1,model_multi)
# plt.imshow(img_clean_1);

In [None]:
# img_clean_1 = load_img(f"raw_data/training-data/clean/Cleaan (21).jpeg", target_size=(225, 225))
# predictImage_multiclass(img_clean_1,model_multi)
# plt.imshow(img_clean_1);

### Snow

In [None]:
# img_snow = load_img(f"raw_data/training-data/solar/Solar (3).jpeg", target_size=(225, 225))
# predictImage_multiclass(img_snow,model_multi)
# plt.imshow(img_snow);

In [None]:
# img_snow = load_img(f"raw_data/training-data/solar/Solar (12).jpeg", target_size=(225, 225))
# predictImage_multiclass(img_snow,model_multi)
# plt.imshow(img_snow);

In [None]:
# img_snow = load_img(f"raw_data/training-data/solar/Solar (33).jpeg", target_size=(225, 225))
# predictImage_multiclass(img_snow,model_multi)
# plt.imshow(img_snow);

### Damage

In [None]:
# img_electrical = load_img(f"raw_data/training-data/electrical/Electrical (29).jpeg", target_size=(225, 225))
# predictImage_multiclass(img_electrical,model_multi)
# plt.imshow(img_electrical);

In [None]:
# img_physical = load_img(f"raw_data/training-data/physical_damaged/Physical-damaged (37).jpeg", target_size=(225, 225))
# predictImage_multiclass(img_physical,model_multi)
# plt.imshow(img_physical);

In [None]:
# img_physical = load_img(f"raw_data/training-data-exp/damage/Electrical (23).jpeg", target_size=(225, 225))
# predictImage_multiclass(img_physical,model_multi)
# plt.imshow(img_physical);

### Bird or dust


In [None]:
##pics bird 37,55,59,  --> macro pic on brid drop --> predicting wrong class
# pic 65
# img_physical = load_img(f"raw_data/training-data/bird/Bird (65).jpeg", target_size=(225, 225))
# predictImage_multiclass(img_physical,model_multi)
# plt.imshow(img_physical);

In [None]:
# img_physical = load_img(f"raw_data/training-data/bird/Bird (5).jpeg", target_size=(225, 225))
# predictImage_multiclass(img_physical,model_multi)
# plt.imshow(img_physical);

In [None]:
#img_physical = load_img(f"raw_data/training-data-exp/dirt/Bird (47).jpeg", target_size=(225, 225))
#predictImage_multiclass(img_physical,model_multi)
#plt.imshow(img_physical);

In [None]:
def predictImage_multiclass(url, model):

  # Takes an image and a model

  img = url
  img = img_to_array(img)
  img = img.reshape((-1, 225, 225, 3))
  res = model.predict(img)
  print(f"Probabilities: ")
  names_of_classes = ['clean','damaged','dirty']
  print(f"{names_of_classes}")
  print(f"{res[0]}")
  print(f"Result: {names_of_classes[find_index_of_max_element(res[0].tolist())]}")  
  return res 

In [None]:
import numpy as np
from tensorflow.math import confusion_matrix
from sklearn.metrics import classification_report


predictions = np.array([])
print(predictions.shape)
labels =  np.array([])
for x, y in val_data:
    
  pred = model_multi.predict(x)
  pred = np.argmax(pred, axis=-1)
  print(f"pred {pred.shape}")
  predictions = np.concatenate([predictions, pred ])
  label =  np.argmax(y.numpy(), axis=-1)
  print(f"label shape {label.shape}")
  labels = np.concatenate([labels, y])  #np.argmax(y.numpy()

report = classification_report(labels, predictions, target_names=classes1)

print(report)

In [None]:
confusion_matrix(labels=labels, predictions=predictions).numpy()


## Test data

In [None]:
# batch_size = 8
# test_data_dir = "raw_data/exp-test-data/"

# test_data = image_dataset_from_directory(
#   test_data_dir,
#   #labels = "inferred",
#   label_mode = "int",  
#   seed=123,
#   image_size=(225, 225),
#   batch_size=batch_size,
# ) 


In [None]:
# predictions = np.array([])
# print(predictions.shape)
# labels =  np.array([])
# for x, y in test_data:
    
#   pred = model_multi.predict(x)
#   pred = np.argmax(pred, axis=-1)
#   print(f"pred {pred.shape}")
#   predictions = np.concatenate([predictions, pred ])
#   label =  np.argmax(y.numpy(), axis=-1)
#   print(f"label shape {label.shape}")
#   labels = np.concatenate([labels, y])  #np.argmax(y.numpy()

# report = classification_report(labels, predictions, target_names=classes1)

# print(report)

In [None]:
confusion_matrix(labels=labels, predictions=predictions).numpy()

In [None]:
import numpy as np
from tensorflow.math import confusion_matrix
from sklearn.metrics import classification_report

In [None]:
predictions = np.array([])
probabilities_clean = np.array([])
labels =  np.array([])

for x, y in val_data:
    
  prediction_array = model_multi.predict(x)
  probability_clean = prediction_array
  #probability_clean = np.squeeze(probability_clean)  
  #print(f'prob clean: {probability_clean}')  
  probabilities_clean = np.append(probabilities_clean, probability_clean)
    
  pred = np.argmax(prediction_array, axis=-1)

  predictions = np.concatenate([predictions, pred ])

  labels = np.concatenate([labels, y])  #np.argmax(y.numpy()

report = classification_report(labels, predictions, target_names=classes1)

print(report)
reshaped_array = probabilities_clean.reshape(-1, 3)


In [None]:
from sklearn.metrics import precision_recall_curve
import pandas as pd

In [None]:
precision, recall, threshold = precision_recall_curve(labels, probabilities_clean)
scores = pd.DataFrame({'threshold':threshold,
                       'precision': precision[:-1],
                       'recall':recall[:-1]}) # Store in a dataframe
scores

In [None]:
plt.plot(scores['recall'],scores['precision'])
plt.ylabel('precision')
plt.xlabel('recall')

In [None]:
new_threshold = scores[scores['precision'] >= 0.4].threshold.min()
new_threshold

In [None]:
def custom_predict(X, custom_threshold, model):
    probability = model.predict(X) # Get likelihood of each sample being classified as 0 or 1
    proba_clean = 1- probability[:,-1]
    #print(modified_array)
    #more_5y_probs = probability[:, 1] # Only keep expensive likelihoods (1) 
    return (proba_clean > custom_threshold).astype(int) # Boolean outcome converted to 0 or 1

pred_thres = np.array([])

for x, y in val_data:    
    updated_preds = custom_predict(X=x, custom_threshold=0.6, model=model_multi) # Update prediction
    updated_preds = np.squeeze(updated_preds)
    pred_thres = np.concatenate([pred_thres, updated_preds])

In [None]:
labels.shape

In [None]:
pred_thres.shape

In [None]:
confusion_matrix(labels=labels, predictions=pred_thres).numpy()

In [None]:
report = classification_report(labels, pred_thres, target_names=class_names)

print(report)

In [None]:
from sklearn.metrics import classification_report
import numpy as np
print(f'labels shape: {labels.shape}')
# Assuming 'probs' is the predicted probabilities for each class
# 'thresholds' is a list of chosen thresholds for each class
thresholds = [0.8, 0.8, 0.8]

probs = reshaped_array
print(len(reshaped_array[:,0]))
# Apply threshold adjustment
adjusted_predictions = []
labels_matrix = []
for j, label in zip(range(len(probs[:,0])),labels):
    prob_element = probs[j,:]
    #print(f'prob_element: {prob_element}')
    #for i in range(len(prob_element)):
    adjustment = [1 if p >= thresholds[c] else 0 for c, p in enumerate(prob_element)]
    labels_mat = [1 if label==c else 0 for c, l in enumerate(range(len(prob_element)))]
    #print(f'labels_mat: {labels_mat}, label: {label}')
    #print(f"adjustment: {adjustment}")
    #adjustment = np.argmax(adjustment, axis=-1)
    adjusted_predictions.append(adjustment)
    labels_matrix.append(labels_mat)
# Convert to NumPy array
adjusted_predictions = np.array(adjusted_predictions)
labels_matrix = np.array(labels_matrix)
print(f'adjust pred: {adjusted_predictions.shape}')

# Assuming 'labels' is the ground truth labels
class_names = ['class_1', 'class_2', 'class_3']

#labels = 
print(f'labels_matrix shape: {labels_matrix.shape}')
# Generate classification report for each class
for c in range(len(class_names)): #range(len(labels)): #
    print(f"Metrics for {class_names[c]}:")
    class_labels = labels_matrix[:, c]  # Extract labels for the current class   # c 
    class_predictions = adjusted_predictions[:, c]  # Extract adjusted predictions for the current class
    report = classification_report(class_labels, class_predictions)
    print(report)


In [None]:
prediction_array = np.array([0.3, 0.4,0.3])
pred = np.argmax(prediction_array, axis=-1)
pred