<a href="https://colab.research.google.com/github/sarahayek98/Alzheimer-detection-using-3-machine-learning-algorithms-/blob/main/KNN1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import packages

In [1]:
from google.colab import drive
drive.mount('/content/drive')


import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from skimage.io import imread
from skimage.transform import resize

Mounted at /content/drive


Mount Google Drive to access uploaded files

In [2]:
# Set the path to your dataset
path_to_dataset = '/content/drive/MyDrive/Colab Notebooks/Alzheimer Dataset/Alzheimer Dataset'

Efficient Dataset Loading and Preprocessing Function

In [3]:
# Function to load and preprocess the dataset
def load_and_preprocess_dataset(folder):
    data = []
    labels = []

    for class_folder in os.listdir(os.path.join(path_to_dataset, folder)):
        class_path = os.path.join(path_to_dataset, folder, class_folder)

        for image_file in os.listdir(class_path):
            image_path = os.path.join(class_path, image_file)
            image = imread(image_path, as_gray=True)
            image = resize(image, (100, 100))  # Adjust the size as needed
            flattened_image = image.flatten()

            data.append(flattened_image)
            labels.append(class_folder)

    return np.array(data), np.array(labels)

In [4]:
# Load and preprocess the training dataset
train_data, train_labels = load_and_preprocess_dataset('train')


In [5]:
# Load and preprocess the testing dataset
test_data, test_labels = load_and_preprocess_dataset('test')

In [6]:
# Encode labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)
test_labels_encoded = label_encoder.transform(test_labels)


In [7]:
# Normalize pixel values
scaler = StandardScaler()
train_data_normalized = scaler.fit_transform(train_data)
test_data_normalized = scaler.transform(test_data)


In [8]:
# Split the data into training and testing sets
X_train, X_val, y_train, y_val = train_test_split(
    train_data_normalized, train_labels_encoded, test_size=0.2, random_state=42
)


In [9]:
# Train the KNN model
knn_model = KNeighborsClassifier(n_neighbors=5)  # You can experiment with different values of k
knn_model.fit(X_train, y_train)


Evaluating Model Performance on Validation Set






In [10]:
from skimage.transform import rotate

def augment_data(data, labels):
    augmented_data = []
    augmented_labels = []

    for i in range(len(data)):
        augmented_data.append(data[i])
        augmented_labels.append(labels[i])

        # Add rotated versions of the images
        rotated_image = rotate(data[i].reshape(100, 100), angle=45, mode='reflect').flatten()
        augmented_data.append(rotated_image)
        augmented_labels.append(labels[i])

    return np.array(augmented_data), np.array(augmented_labels)

# Augment the training data
augmented_train_data, augmented_train_labels = augment_data(X_train, y_train)

# Train the KNN model with the augmented data
knn_model.fit(augmented_train_data, augmented_train_labels)

In [11]:
from sklearn.model_selection import GridSearchCV

# Hyperparameter tuning for KNN
param_grid = {'n_neighbors': [3, 5, 7, 9, 11]}
grid_search = GridSearchCV(KNeighborsClassifier(), param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Get the best hyperparameters
best_k = grid_search.best_params_['n_neighbors']

# Train the KNN model with the best hyperparameters
knn_model = KNeighborsClassifier(n_neighbors=best_k)
knn_model.fit(X_train, y_train)

In [12]:
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder, StandardScaler
from skimage.transform import rotate
import numpy as np

# Function to perform data augmentation
def augment_data(data, labels):
    augmented_data = []
    augmented_labels = []

    for i in range(len(data)):
        augmented_data.append(data[i])
        augmented_labels.append(labels[i])

        # Add rotated versions of the images
        rotated_image = rotate(data[i].reshape(100, 100), angle=45, mode='reflect').flatten()
        augmented_data.append(rotated_image)
        augmented_labels.append(labels[i])

    return np.array(augmented_data), np.array(augmented_labels)

# Augment the training data
augmented_train_data, augmented_train_labels = augment_data(X_train, y_train)

# Hyperparameter tuning for KNN
param_grid = {'n_neighbors': [3, 5, 7, 9, 11]}
stratified_kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
grid_search = GridSearchCV(KNeighborsClassifier(), param_grid, cv=stratified_kfold)
grid_search.fit(augmented_train_data, augmented_train_labels)

# Get the best hyperparameters
best_k = grid_search.best_params_['n_neighbors']

# Train the KNN model with the best hyperparameters
knn_model = KNeighborsClassifier(n_neighbors=best_k)
knn_model.fit(augmented_train_data, augmented_train_labels)

# Predict on the validation set
y_pred = knn_model.predict(X_val)

# Evaluate the model
validation_accuracy = accuracy_score(y_val, y_pred)
print(f"Validation Accuracy after Hyperparameter Tuning and Data Augmentation: {validation_accuracy}")

# Predict on the test set
test_predictions = knn_model.predict(test_data_normalized)

# Evaluate the model on the test set
test_accuracy = accuracy_score(test_labels_encoded, test_predictions)
print(f"Test Accuracy after Hyperparameter Tuning and Data Augmentation: {test_accuracy}")

Validation Accuracy after Hyperparameter Tuning and Data Augmentation: 0.933378196500673
Test Accuracy after Hyperparameter Tuning and Data Augmentation: 0.5168100078186083


In [13]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Train a Support Vector Machine (SVM) model
svm_model = SVC(kernel='linear', C=1.0)
svm_model.fit(train_data_normalized, train_labels_encoded)

# Evaluate SVM model on test set
svm_test_predictions = svm_model.predict(test_data_normalized)
svm_test_accuracy = accuracy_score(test_labels_encoded, svm_test_predictions)
print(f"SVM Test Accuracy: {svm_test_accuracy}")

# Train a Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(train_data_normalized, train_labels_encoded)

# Evaluate Random Forest model on test set
rf_test_predictions = rf_model.predict(test_data_normalized)
rf_test_accuracy = accuracy_score(test_labels_encoded, rf_test_predictions)
print(f"Random Forest Test Accuracy: {rf_test_accuracy}")


SVM Test Accuracy: 0.5027365129007036
Random Forest Test Accuracy: 0.6286161063330727


In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier

# Define the hyperparameters grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Initialize the Random Forest classifier
rf_classifier = RandomForestClassifier(random_state=42)

# Perform grid search cross-validation
grid_search = GridSearchCV(rf_classifier, param_grid, cv=5, n_jobs=-1)
grid_search.fit(train_data_normalized, train_labels_encoded)

# Get the best hyperparameters
best_params = grid_search.best_params_
best_rf_classifier = grid_search.best_estimator_

# Evaluate the best model on the test set
best_rf_test_predictions = best_rf_classifier.predict(test_data_normalized)
best_rf_test_accuracy = accuracy_score(test_labels_encoded, best_rf_test_predictions)
print(f"Best Random Forest Test Accuracy: {best_rf_test_accuracy}")
print(f"Best Hyperparameters: {best_params}")


In [None]:
# Predict on the validation set
y_pred = knn_model.predict(X_val)

# Evaluate the model
accuracy = accuracy_score(y_val, y_pred)
print(f"Validation Accuracy: {accuracy}")

Assessing Model Performance on Test Set

In [None]:
# Predict on the test set
test_predictions = knn_model.predict(test_data_normalized)

# Evaluate the model on the test set
test_accuracy = accuracy_score(test_labels_encoded, test_predictions)
print(f"Test Accuracy: {test_accuracy}")

Optimizing KNN Hyperparameters with GridSearchCV

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix, classification_report

# Hyperparameter tuning for KNN using GridSearchCV
param_grid = {'n_neighbors': [3, 5, 7, 9]}
grid_search = GridSearchCV(KNeighborsClassifier(), param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Get the best hyperparameters
best_k = grid_search.best_params_['n_neighbors']

Training KNN Model with Optimized Hyperparameters and Assessing Performance

In [None]:
# Train the KNN model with the best hyperparameters
knn_model = KNeighborsClassifier(n_neighbors=best_k)
knn_model.fit(X_train, y_train)

# Predict on the validation set
y_pred = knn_model.predict(X_val)

# Evaluate the model with the best hyperparameters
accuracy = accuracy_score(y_val, y_pred)
print(f"Validation Accuracy with Best k ({best_k}): {accuracy}")

# Predict on the test set
test_predictions = knn_model.predict(test_data_normalized)

# Evaluate the model on the test set
test_accuracy = accuracy_score(test_labels_encoded, test_predictions)
print(f"Test Accuracy: {test_accuracy}")


Analyzing Model Performance with Confusion Matrix and Classification Report

In [None]:
# Display confusion matrix and classification report
conf_matrix = confusion_matrix(test_labels_encoded, test_predictions)
class_report = classification_report(test_labels_encoded, test_predictions, target_names=label_encoder.classes_)

print("Confusion Matrix:")
print(conf_matrix)

print("\nClassification Report:")
print(class_report)

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Assuming y_test and y_pred are the true labels and predicted labels, respectively
conf_matrix_knn = confusion_matrix(test_labels_encoded, test_predictions)

# Calculate True Positive (TP), True Negative (TN), False Positive (FP), False Negative (FN)
TP = conf_matrix_knn[1, 1]
TN = conf_matrix_knn[0, 0]
FP = conf_matrix_knn[0, 1]
FN = conf_matrix_knn[1, 0]

# Calculate sensitivity (recall)
sensitivity_knn = TP / (TP + FN)

# Calculate specificity
specificity_knn = TN / (TN + FP)

# Display the results
print(f'Sensitivity (Recall): {sensitivity_knn:.2f}')
print(f'Specificity: {specificity_knn:.2f}')

# Plot the confusion matrix using seaborn
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix_knn, annot=True, fmt='d', cmap='Blues', cbar=False,
            xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
plt.title('KNN Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()


In [None]:
from skimage.io import imread
from skimage.transform import resize

# Load and preprocess the new image
new_image_path = '/content/drive/MyDrive/example data/example data/moderateDem0.jpg'
new_image = imread(new_image_path, as_gray=True)
new_image_resized = resize(new_image, (100, 100)).flatten()  # Resize and flatten the image

# Normalize the pixel values using the same scaler used during training
new_image_normalized = scaler.transform([new_image_resized])

# Predict the class using the trained KNN model
new_image_prediction = knn_model.predict(new_image_normalized)

# Decode the predicted label
predicted_class = label_encoder.classes_[new_image_prediction[0]]

print(f'The predicted class for the new image is: {predicted_class}')


In [None]:
from skimage.io import imread
from skimage.transform import resize

# Load and preprocess the new image
new_image_path = '/content/drive/MyDrive/example data/example data/MRI.jpg'
new_image = imread(new_image_path, as_gray=True)
new_image_resized = resize(new_image, (100, 100)).flatten()  # Resize and flatten the image

# Normalize the pixel values using the same scaler used during training
new_image_normalized = scaler.transform([new_image_resized])

# Predict the class using the trained KNN model
new_image_prediction = knn_model.predict(new_image_normalized)

# Decode the predicted label
predicted_class = label_encoder.classes_[new_image_prediction[0]]

print(f'The predicted class for the new image is: {predicted_class}')


Class Prediction for New Image Using Trained Model

In [None]:
from skimage.io import imread
from skimage.transform import resize

# Load and preprocess the new image
new_image_path = '/content/drive/MyDrive/Colab Notebooks/Alzheimer Dataset/Alzheimer Dataset/train/NonDemented/nonDem2.jpg'
new_image = imread(new_image_path, as_gray=True)
new_image_resized = resize(new_image, (100, 100)).flatten()  # Resize and flatten the image

# Normalize the pixel values using the same scaler used during training
new_image_normalized = scaler.transform([new_image_resized])

# Predict the class using the trained KNN model
new_image_prediction = knn_model.predict(new_image_normalized)

# Decode the predicted label
predicted_class = label_encoder.classes_[new_image_prediction[0]]

print(f'The predicted class for the new image is: {predicted_class}')


Classifying New Image Using Trained KNN Model

In [None]:
from skimage.io import imread
from skimage.transform import resize

# Load and preprocess the new image
new_image_path = '/content/drive/MyDrive/example data/example data/nd.png'
new_image = imread(new_image_path, as_gray=True)
new_image_resized = resize(new_image, (100, 100)).flatten()  # Resize and flatten the image

# Normalize the pixel values using the same scaler used during training
new_image_normalized = scaler.transform([new_image_resized])

# Predict the class using the trained KNN model
new_image_prediction = knn_model.predict(new_image_normalized)

# Decode the predicted label
predicted_class = label_encoder.classes_[new_image_prediction[0]]

print(f'The predicted class for the new image is: {predicted_class}')


Data Loading and Preprocessing Function

In [None]:
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder, StandardScaler
from skimage.io import imread
from skimage.transform import resize
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import os

# Function to load and preprocess the dataset
def load_and_preprocess_dataset(folder):
    data = []
    labels = []

    for class_folder in os.listdir(os.path.join(path_to_dataset, folder)):
        class_path = os.path.join(path_to_dataset, folder, class_folder)

        for image_file in os.listdir(class_path):
            image_path = os.path.join(class_path, image_file)
            image = imread(image_path, as_gray=True)
            image = resize(image, (100, 100))  # Adjust the size as needed
            flattened_image = image.flatten()

            data.append(flattened_image)
            labels.append(class_folder)

    return np.array(data), np.array(labels)

# Load and preprocess the dataset
path_to_dataset = '/content/drive/MyDrive/Colab Notebooks/Alzheimer Dataset/Alzheimer Dataset'
train_data, train_labels = load_and_preprocess_dataset('train')
test_data, test_labels = load_and_preprocess_dataset('test')

# Encode labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)
test_labels_encoded = label_encoder.transform(test_labels)

# Normalize pixel values
scaler = StandardScaler()
train_data_normalized = scaler.fit_transform(train_data)
test_data_normalized = scaler.transform(test_data)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(
    train_data_normalized, train_labels_encoded, test_size=0.2, random_state=42
)

# Apply data augmentation to the training set
augmented_data = []
augmented_labels = []

for i in range(len(X_train)):
    augmented_data.append(X_train[i])
    augmented_labels.append(y_train[i])

    # Add rotated versions of the images
    rotated_image = np.rot90(X_train[i].reshape(100, 100)).flatten()
    augmented_data.append(rotated_image)
    augmented_labels.append(y_train[i])

# Create a KNN model
knn_model = KNeighborsClassifier(n_neighbors=5)

# Fit the KNN model on the training data
knn_model.fit(np.array(augmented_data), np.array(augmented_labels))

# Use StratifiedKFold for cross-validation
stratified_kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Perform cross-validation
cv_scores = cross_val_score(knn_model, train_data_normalized, train_labels_encoded, cv=stratified_kfold)

# Print the cross-validation scores
print("Cross-Validation Scores:", cv_scores)
print("Mean Accuracy:", np.mean(cv_scores))
print("Standard Deviation of Accuracy:", np.std(cv_scores))

# Predict on the test set
test_predictions = knn_model.predict(test_data_normalized)

# Evaluate the model on the test set
test_accuracy = accuracy_score(test_labels_encoded, test_predictions)
print(f"Test Accuracy: {test_accuracy}")

# Compare training and test accuracies
training_accuracy = np.mean(cv_scores)
print(f"Training Accuracy: {training_accuracy}")

# Check for overfitting by comparing training and test accuracies
overfitting_indicator = training_accuracy - test_accuracy
if overfitting_indicator < 0:
    print("The model is potentially overfitting.")
else:
    print("The model is generalizing well to the test set.")


Image Classification Using Trained KNN Model

In [None]:
from skimage.io import imread
from skimage.transform import resize

# Load and preprocess the new image
new_image_path = '/content/drive/MyDrive/example data/example data/nd.png'
new_image = imread(new_image_path, as_gray=True)
new_image_resized = resize(new_image, (100, 100)).flatten()  # Resize and flatten the image

# Normalize the pixel values using the same scaler used during training
new_image_normalized = scaler.transform([new_image_resized])

# Predict the class using the trained KNN model
new_image_prediction = knn_model.predict(new_image_normalized)

# Decode the predicted label
predicted_class = label_encoder.classes_[new_image_prediction[0]]

print(f'The predicted class for the new image is: {predicted_class}')


Image Classification with Trained KNN Model

In [None]:
from skimage.io import imread
from skimage.transform import resize

# Load and preprocess the new image
new_image_path = '/content/drive/MyDrive/example data/example data/vmd.png'
new_image = imread(new_image_path, as_gray=True)
new_image_resized = resize(new_image, (100, 100)).flatten()  # Resize and flatten the image

# Normalize the pixel values using the same scaler used during training
new_image_normalized = scaler.transform([new_image_resized])

# Predict the class using the trained KNN model
new_image_prediction = knn_model.predict(new_image_normalized)

# Decode the predicted label
predicted_class_index = new_image_prediction[0]
predicted_class = label_encoder.classes_[predicted_class_index]

print(f'The predicted class for the new image is: {predicted_class}')




Enhancing KNN Performance with GridSearchCV and Data Augmentation

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from skimage.transform import rotate

# Function to perform data augmentation
def augment_data(data, labels):
    augmented_data = []
    augmented_labels = []

    for i in range(len(data)):
        augmented_data.append(data[i])
        augmented_labels.append(labels[i])

        # Add rotated versions of the images
        rotated_image = rotate(data[i].reshape(100, 100), angle=45, mode='reflect').flatten()
        augmented_data.append(rotated_image)
        augmented_labels.append(labels[i])

    return np.array(augmented_data), np.array(augmented_labels)

# Hyperparameter tuning for KNN
param_grid = {'n_neighbors': [3, 5, 7, 9, 11]}
grid_search = GridSearchCV(KNeighborsClassifier(), param_grid, cv=stratified_kfold)
grid_search.fit(train_data_normalized, train_labels_encoded)

# Get the best hyperparameters
best_k = grid_search.best_params_['n_neighbors']

# Perform data augmentation
augmented_train_data, augmented_train_labels = augment_data(X_train, y_train)

# Train the KNN model with the best hyperparameters
knn_model = KNeighborsClassifier(n_neighbors=best_k)
knn_model.fit(augmented_train_data, augmented_train_labels)

# Predict on the validation set
y_pred = knn_model.predict(X_val)

# Evaluate the model
validation_accuracy = accuracy_score(y_val, y_pred)
print(f"Validation Accuracy after Hyperparameter Tuning and Data Augmentation: {validation_accuracy}")

# Predict on the test set
test_predictions = knn_model.predict(test_data_normalized)

# Evaluate the model on the test set
test_accuracy = accuracy_score(test_labels_encoded, test_predictions)
print(f"Test Accuracy after Hyperparameter Tuning and Data Augmentation: {test_accuracy}")


In [None]:
from skimage.io import imread
from skimage.transform import resize

# Load and preprocess the new image
new_image_path = '/content/drive/MyDrive/example data/example data/vmd.png'
new_image = imread(new_image_path, as_gray=True)
new_image_resized = resize(new_image, (100, 100)).flatten()  # Resize and flatten the image

# Normalize the pixel values using the same scaler used during training
new_image_normalized = scaler.transform([new_image_resized])

# Predict the class using the trained KNN model
new_image_prediction = knn_model.predict(new_image_normalized)

# Decode the predicted label
predicted_class = label_encoder.classes_[new_image_prediction[0]]

print(f'The predicted class for the new image is: {predicted_class}')


In [None]:
from skimage.io import imread
from skimage.transform import resize

# Load and preprocess the new image
new_image_path = '/content/drive/MyDrive/example data/example data/md.png'
new_image = imread(new_image_path, as_gray=True)
new_image_resized = resize(new_image, (100, 100)).flatten()  # Resize and flatten the image

# Normalize the pixel values using the same scaler used during training
new_image_normalized = scaler.transform([new_image_resized])

# Predict the class using the trained KNN model
new_image_prediction = knn_model.predict(new_image_normalized)

# Decode the predicted label
predicted_class = label_encoder.classes_[new_image_prediction[0]]

print(f'The predicted class for the new image is: {predicted_class}')


In [None]:
import pickle

# Save the trained model
with open('knn_model.pkl', 'wb') as file:
    pickle.dump(knn_model, file)

# Save the label encoder
with open('label_encoder.pkl', 'wb') as file:
    pickle.dump(label_encoder, file)

# Save the scaler
with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

print("Model, label encoder, and scaler saved successfully.")

In [None]:
import os
import zipfile

def create_zip_file():
    # Create a list of files to download
    files_to_download = ['knn_model.pkl', 'label_encoder.pkl', 'scaler.pkl']

    # Create a zip file with the specified files
    zip_filename = 'saved_model.zip'
    with zipfile.ZipFile(zip_filename, 'w') as zip_file:
        for file in files_to_download:
            if os.path.exists(file):
                zip_file.write(file)
            else:
                print(f'File {file} not found.')

    # Serve the zip file for download
    with open(zip_filename, 'rb') as zip_file:
        zip_data = zip_file.read()

    # Set the appropriate headers for file download
    headers = {
        'Content-Disposition': f'attachment; filename="{zip_filename}"',
        'Content-Type': 'application/zip',
        'Content-Length': len(zip_data)
    }

    # Return the zip file data for download
    return zip_data, headers

In [None]:
from joblib import dump

# Save the trained KNN model
dump(knn_model, 'knn_model.joblib')


In [None]:
from google.colab import files

# Download the saved model file
files.download('knn_model.joblib')


In [None]:
from joblib import dump

# Save the label encoder
dump(label_encoder, 'label_encoder.joblib')


In [None]:
from google.colab import files

# Download the saved label encoder file
files.download('label_encoder.joblib')
