In [20]:
import os
from PIL import Image
import numpy as np
import random
import shutil

In [21]:
# define directories for train and validation sets
root_dir = '/projectnb/ds340/projects/Samuolis_Parikh_Image_Data/'

train_dir = root_dir +"resized_images/train"
validation_dir = root_dir + "resized_images/validation"

train_target = train_dir +"/baldeagle"
train_nontarget = train_dir +"/nonbaldeagle"

val_target = validation_dir +"/baldeagle"
val_nontarget = validation_dir +"/nonbaldeagle"

In [22]:
def load_images_from_folders(folder1, folder2, img_size = (224,224)):
    images = []
    labels = []
    
    # Load images from the first folder
    for filename in os.listdir(folder1):
        img_path = os.path.join(folder1, filename)
        try:
            with Image.open(img_path) as img:
                img = img.convert('RGB')
                img = img.resize(img_size)
                images.append(np.array(img))  # Convert image to array
                labels.append(1)  # Class label for folder1
        except Exception as e:
            print(f"Could not load image {filename} from {folder1}: {e}")

    # Load images from the second folder
    for filename in os.listdir(folder2):
        img_path = os.path.join(folder2, filename)
        try:
            with Image.open(img_path) as img:
                img = img.convert('RGB')
                img = img.resize(img_size)
                images.append(np.array(img))
                labels.append(0)  # Class label for folder2
        except Exception as e:
            print(f"Could not load image {filename} from {folder2}: {e}")

    # convert lists to NumPy arrays
    
    images = np.array(images)
    labels = np.array(labels)
    
    return images, labels

images_train, label_train = load_images_from_folders(train_target, train_nontarget)
images_val, label_val = load_images_from_folders(val_target, val_nontarget)

print(images_train.shape, label_train.shape, type(images_train))
print(images_train.min(), images_train.max())  # expected: 0 255, later will normalize

(5200, 224, 224, 3) (5200,) <class 'numpy.ndarray'>
0 255


In [23]:
def change_labels(labels, percentage):
    random.seed(340)
    label_one_indices = np.where(labels == 1)[0]
    
    n = int(len(label_one_indices) * (percentage / 100))
    
    indices_to_change = np.random.choice(label_one_indices, size=n, replace=False)
    
    labels[indices_to_change] = 0
    
    return labels, indices_to_change

# for example, change 20% of label 1s to label 0
percentage = 0  
# changed_indices
# label_train, changed_indices = change_labels(label_train, percentage)

In [24]:
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.preprocessing.image import ImageDataGenerator
tf.keras.utils.set_random_seed(340)
#tf.config.experimental.enable_op_determinism()

In [25]:
callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
]
epochs = 15
# restore best weights make the model be the one that was the best instead of last one
# patience changed from 4-->3

In [26]:
# reload Images
images_train, label_train = load_images_from_folders(train_target, train_nontarget)
percentage = 20  
# changed_indices
label_train, changed_indices = change_labels(label_train, percentage)

In [27]:
#Remake Models
#mutliple inputs taken from chat and https://pyimagesearch.com/2019/02/04/keras-multiple-inputs-and-mixed-data/
# we have full confidence if it is a 1, the lower the number the more confident you are in the 0 class
# .999999 vs .00004

from tensorflow.keras.layers import Input, Dropout, Concatenate
confidence_init = np.array([.35 if x<.5 else 1 for x in label_train]).reshape(5200,1)

base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

for layer in base_model.layers:
    layer.trainable = True


# add new fully connected layers for binary classification
image_input = base_model.input
x = base_model.output
x = Flatten()(x)

additional_input = Input(shape=(1,), name="additional_input") #shape is just 1 feature for the confidence 
y = Dense(64, activation='relu')(additional_input) 
y = Dropout(0.1)(y) # when .5, the additional input was too powerful, the prediction vals were always either to close to 1 or 0, we try to make the additional input less important than the images

combined = Concatenate()([x, y]) # 2 channels
combined = Dense(256, activation='relu')(combined)
combined = Dense(1, activation='sigmoid')(combined)  # sigmoid for binary 

model = Model(inputs=[image_input, additional_input], outputs=combined)

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'], jit_compile=False)

# train model
#val_confidence = np.array([.6 if x<.5 else 1 for x in label_val]).reshape(200,1)

history = model.fit(
    [images_train, confidence_init],
    label_train,
    batch_size = 32,
    epochs=epochs,  # adjust this for more epochs as needed
    validation_data=([images_val, label_val.reshape(-1,1)], label_val),
    callbacks = callbacks
)

Epoch 1/15
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 137ms/step - accuracy: 0.7777 - loss: 49.7898 - val_accuracy: 1.0000 - val_loss: 0.0458
Epoch 2/15
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 136ms/step - accuracy: 0.9982 - loss: 0.0537 - val_accuracy: 1.0000 - val_loss: 0.0044
Epoch 3/15
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 136ms/step - accuracy: 1.0000 - loss: 0.0097 - val_accuracy: 1.0000 - val_loss: 0.0013
Epoch 4/15
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 136ms/step - accuracy: 1.0000 - loss: 0.0043 - val_accuracy: 1.0000 - val_loss: 7.4632e-04
Epoch 5/15
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 137ms/step - accuracy: 1.0000 - loss: 0.0023 - val_accuracy: 1.0000 - val_loss: 3.6104e-04
Epoch 6/15
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 136ms/step - accuracy: 1.0000 - loss: 0.0013 - val_accuracy: 1.0000 - val_loss: 3.8361e-

In [28]:
#accracy isnt what we care about rn, we care more about the confidence score and actual classigications
preds = model.predict([images_train, confidence_init])

[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 41ms/step


In [29]:
changed_indices.sort()
changed_indices

array([   0,    7,   10,   15,   18,   25,   33,   34,   36,   43,   45,
         46,   47,   51,   53,   55,   59,   71,   72,   73,   74,   82,
         91,   93,   95,  105,  106,  107,  120,  123,  124,  130,  134,
        138,  145,  146,  148,  149,  150,  158,  160,  169,  193,  201,
        204,  207,  224,  228,  235,  239,  245,  249,  255,  260,  266,
        269,  273,  283,  290,  292,  293,  298,  304,  306,  307,  311,
        313,  315,  322,  326,  329,  336,  340,  341,  349,  351,  352,
        353,  357,  362,  364,  368,  370,  377,  378,  385,  386,  387,
        388,  392,  400,  403,  409,  410,  414,  417,  423,  428,  432,
        436,  438,  455,  458,  460,  465,  470,  477,  480,  482,  486,
        499,  511,  530,  533,  534,  537,  546,  547,  553,  564,  566,
        572,  573,  574,  578,  598,  601,  604,  619,  623,  634,  656,
        665,  666,  672,  674,  675,  677,  684,  687,  688,  689,  698,
        710,  720,  723,  724,  731,  734,  738,  7

In [30]:
np.where(label_train > .5)[0].shape #260 eagles missing, instead more eagles are getting flipped to noneagles

(1040,)

In [37]:
preds
preds.reshape(5200,)
print(np.where(preds.reshape(5200,) >.5)[0].shape)
#find indices of those 17 indexes that were incorrectly flipped and flip them to correct label?
misclassified_indices = np.where((label_train> .5) & (preds <= .5))[0]
print(misclassified_indices)

(1040,)
[   0    0    0 ... 5199 5199 5199]


In [13]:
print(preds[changed_indices].mean()) #preds for eagles labeled as 0, could we try flipping back top/highest value n percent of these back into eagles?
print(preds[1300:].mean()) #noneagles labeled as noneagles
from scipy.stats import ttest_ind
tstat, pval = ttest_ind(preds[changed_indices], preds[1300:])
print(pval)

0.00012634016
6.2458086e-05
[1.55298927e-113]


In [17]:


# Assuming:
# - true_labels: True binary labels (0 for "Not Eagle", 1 for "Eagle")
# - preds: Model predictions (binary, 0 for "Not Eagle", 1 for "Eagle")
# - confidences: Confidence scores for class 1 ("Eagle") from the model output

# 1. Identify misclassified labels (true label = 1 but predicted as 0)
misclassified_indices = np.where((label_train== 1) & (preds == 0))[0]

# 2. Extract confidence scores for these misclassified labels
misclassified_confidences = confidence_init[misclassified_indices]

# 3. Rank these misclassified labels by confidence in descending order
ranked_indices = misclassified_indices[np.argsort(-misclassified_confidences)]  # Sort by confidence

# 4. Define percentage or count for flipping
# Use a count instead of a percentage since the total misclassifications are small
max_flips = 20  # Define how many misclassified labels you want to flip at most
flip_indices = ranked_indices[:max_flips]

# 5. Flip the selected indices back to the correct label
preds[flip_indices] = 1

# Optional: Output for debugging or validation
print(f"Total misclassified labels: {len(misclassified_indices)}")
print(f"Number of labels flipped: {len(flip_indices)}")
print(f"Indices flipped: {flip_indices}")


Total misclassified labels: 0
Number of labels flipped: 0
Indices flipped: []


In [35]:
import numpy as np
from scipy.stats import ttest_ind

# Assume `scores` is an array of prediction scores for all samples
# Higher scores indicate stronger confidence in the prediction
# `true_labels` is the ground truth, and `preds` are the predictions
misclassified_indices = np.where(preds != label_train)[0]
correctly_classified_indices = np.where(preds == label_train)[0]

# Scores for misclassified and correctly classified samples
misclassified_scores = confidence_init[misclassified_indices]
correctly_classified_scores = confidence_init[correctly_classified_indices]

# Perform a two-sample t-test
t_stat, p_value = ttest_ind(misclassified_scores, correctly_classified_scores, equal_var=False)

# Set a significance level (e.g., alpha = 0.05)
alpha = 0.05
if p_value < alpha:
    # Find the minimum score among misclassified samples that is significant
    threshold = np.min(misclassified_scores)
else:
    # No statistically significant difference; do not flip any labels
    threshold = float('inf')

# Select indices to flip based on the threshold
flip_indices = misclassified_indices[misclassified_scores >= threshold]

# Flip the selected indices back to the correct label
preds[flip_indices] = true_labels[flip_indices]

# Optional: Output for debugging or validation
print(f"Total misclassified labels: {len(misclassified_indices)}")
print(f"Number of labels flipped: {len(flip_indices)}")
print(f"Indices flipped: {flip_indices}")
print(f"T-statistic: {t_stat}, P-value: {p_value}, Threshold: {threshold}")


  t_stat, p_value = ttest_ind(misclassified_scores, correctly_classified_scores, equal_var=False)


IndexError: too many indices for array: array is 1-dimensional, but 2 were indexed