In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
#from tensorflow.keras.utils import to_categorical

In [2]:
train_dataset_path = "/Users/zhiyumo/Downloads/Alzheimer's MRI/Train"
test_dataset_path = "/Users/zhiyumo/Downloads/Alzheimer's MRI/Test"

In [3]:
image_size = (180, 180)
batch_size = 32

In [4]:
datagen = ImageDataGenerator(rescale=1.0/255.0)

# Load the training data
train_generator = datagen.flow_from_directory(
    train_dataset_path,
    target_size=image_size,
    batch_size=batch_size, 
    color_mode='grayscale',    
    class_mode='categorical',    # For multi-class classification
    shuffle=False                 
)

# Load the testing data
test_generator = datagen.flow_from_directory(
    test_dataset_path,
    target_size=image_size,
    batch_size=batch_size,
    color_mode='grayscale',
    class_mode='categorical',
    shuffle=False                
)


Found 33984 images belonging to 3 classes.
Found 6388 images belonging to 3 classes.


In [5]:
num_classes = len(train_generator.class_indices)

# Class labels mapping (useful for reference later)
class_labels = {v: k for k, v in train_generator.class_indices.items()}

# Number of training samples and testing samples
num_train_samples = train_generator.samples
num_test_samples = test_generator.samples

class_counts = dict(zip(train_generator.class_indices.keys(), np.bincount(train_generator.classes)))
print("Class counts:", class_counts)

Class counts: {'Mild': 8960, 'Moderate': 15424, 'Non': 9600}


In [6]:
X_train = []
y_train = []

# Iterate over the generator and concatenate data and labels
for images, labels in train_generator:
    X_train.append(images)
    y_train.append(labels)
    if len(X_train) * batch_size >= len(train_generator.filenames):
        break

# Concatenate the data into numpy arrays
X_train = np.concatenate(X_train, axis=0)
y_train = np.concatenate(y_train, axis=0)

# Flatten the images into 1D vectors
X_train = X_train.reshape(X_train.shape[0], -1)

In [7]:
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)

X_train shape: (33984, 32400)
y_train shape: (33984, 3)


In [8]:
X_test = []
y_test = []

# Iterate over the generator and concatenate data and labels
for images, labels in test_generator:
    X_test.append(images)
    y_test.append(labels)
    if len(X_test) * batch_size >= len(test_generator.filenames):
        break

# Concatenate the data into numpy arrays
X_test = np.concatenate(X_test, axis=0)
y_test = np.concatenate(y_test, axis=0)

# Flatten the images into 1D vectors
X_test = X_test.reshape(X_test.shape[0], -1)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
logreg = LogisticRegression(multi_class='multinomial', solver='lbfgs')

# Train the model
logreg.fit(X_train, np.argmax(y_train, axis=1))

# Make predictions on the training data
y_train_pred = logreg.predict(X_train)

# Calculate accuracy on the training data
train_accuracy = accuracy_score(np.argmax(y_train, axis=1), y_train_pred)
print("Training Accuracy:", train_accuracy)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Training Accuracy: 0.7033309792843692


In [None]:
from sklearn.metrics import f1_score
train_f1_score = f1_score(np.argmax(y_train, axis=1), y_train_pred, average='weighted')

In [None]:
train_f1_score

0.6972335007787336

In [None]:
y_test_pred = logreg.predict(X_test)

# Calculate F1-score on the test data
test_f1_score = f1_score(np.argmax(y_test,axis=1), y_test_pred, average='weighted')
print("Test F1-score:", test_f1_score)

Test F1-score: 0.7511591421638016


In [None]:
import joblib
model_filename = "logistic_regression_model.pkl"
joblib.dump(logreg, model_filename)

['logistic_regression_model.pkl']

In [9]:
adversarial_features = np.concatenate((X_train, X_test))
adversarial_labels = np.concatenate((np.zeros(len(X_train)), np.ones(len(X_test))))


In [10]:
from sklearn.ensemble import RandomForestClassifier
adv_model = RandomForestClassifier(n_estimators=100, random_state=42)
adv_model.fit(adversarial_features, adversarial_labels)

In [11]:
adversarial_weights = adv_model.predict_proba(X_train)[:, 1]
adversarial_weights = 1.0 - adversarial_weights  


In [12]:
train_adv_labels = adv_model.predict(X_train)
test_adv_labels = adv_model.predict(X_test)

In [14]:
adversarial_weights = 1.0 - train_adv_labels