<a href="https://colab.research.google.com/github/omtriguero/Artificial-Intelligence-Projects/blob/main/Final_project_COMP_605_001_Introduction_to_Artificial_Intelligence.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**FINAL PROJECT-COMP-605-001 Introduction to Artificial Intelligence.**

**X-Ray AI Analysis System**

  *Group members:*
*   Oscar Mauricio Trigueros





**Step 1: Select a real-world dataset**

>•Find a one of the interesting datasetes:

Chest x ray Database : https://www.kaggle.com/code/mohamedgobara/chest-x-ray-classification-highest-accuracy

**Step 2:load the Libraries.**


In [None]:
import cv2
import numpy as np
import os
import zipfile
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

**Step 3: Define a function to extract test and train images by folders.**


In [None]:
# Function to load images from folder
def load_images_from_folder(folder_path):
    images = []
    labels = []

    # Define label mappings
    label_mapping = {'NORMAL': 0, 'PNEUMONIA': 1}

    # Check if the folder exists
    if not os.path.exists(folder_path):
        raise FileNotFoundError(f"Folder '{folder_path}' not found.")

    # Loop through each subfolder (NORMAL and PNEUMONIA)
    for subfolder in ['NORMAL', 'PNEUMONIA']:
        subfolder_path = os.path.join(folder_path, subfolder)

        # Check if the subfolder exists
        if not os.path.exists(subfolder_path):
            print(f"Subfolder '{subfolder}' not found in '{folder_path}'. Skipping...")
            continue

        label = label_mapping[subfolder]

        # Loop through each image file in the subfolder
        for filename in os.listdir(subfolder_path):
            img_path = os.path.join(subfolder_path, filename)

            # Read the image using OpenCV
            img = cv2.imread(img_path)

            # If the image is not None, add it to the list and assign the label
            if img is not None:
                images.append(img)
                labels.append(label)

    # Check if images and labels are empty
    if not images:
        raise ValueError(f"No images found in '{folder_path}'")

    if not labels:
        raise ValueError(f"No labels found in '{folder_path}'")

    return images, labels

**Step 4: Define variables for image extraction.**

In [None]:
# Path to dataset ZIP file in "Records"
zip_file_path = '/content/chest_xray_a.zip'  # Replace with the actual path in "Records"

# Extraction directory
extract_dir = '/content/chest_xray_a/'

# Create extraction directory if it doesn't exist
if not os.path.exists(extract_dir):
    os.makedirs(extract_dir)

# Extract the ZIP file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

# Path to train and test folders
train_folder_path = os.path.join(extract_dir, 'chest_xray_a', 'train')
test_folder_path = os.path.join(extract_dir, 'chest_xray_a', 'test')

# Load images and labels from train and test folders
train_images, train_labels = load_images_from_folder(train_folder_path)
test_images, test_labels = load_images_from_folder(test_folder_path)


.**Step 5: Preprocess images and resize them to a fixed size.**

In [None]:

def preprocess_and_resize_images(images):
    processed_images = [cv2.resize(img, (128, 128)) for img in images]
    processed_images = np.array(processed_images) / 255.0
    return processed_images

# Preprocess and resize train and test images
train_images = preprocess_and_resize_images(train_images)
test_images = preprocess_and_resize_images(test_images)

# Convert lists to NumPy arrays
train_labels = np.array(train_labels)
test_labels = np.array(test_labels)


.**Step 6:  Check if images and labels are loaded properly and split the data**

In [None]:

print(f"Number of train images: {len(train_images)}")
print(f"Number of train labels: {len(train_labels)}")
print(f"Number of test images: {len(test_images)}")
print(f"Number of test labels: {len(test_labels)}")

# Split dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(train_images, train_labels,
                                                  test_size=0.2,
                                                  random_state=42)


Number of train images: 2310
Number of train labels: 2310
Number of test images: 392
Number of test labels: 392


**Step 7: Defining the CNN model**

In [None]:
# Create the CNN model
def create_cnn_model(input_shape):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Conv2D(128, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dropout(0.5),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')  # Sigmoid activation for binary classification
    ])
    return model

**Step 7: Evaluate the model.**



In [None]:
# Create the CNN model
input_shape = (128, 128, 3)  # Input shape based on image size and channels
model = create_cnn_model(input_shape)

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=7, batch_size=32, validation_data=(X_val, y_val))

# Evaluate the model
loss, accuracy = model.evaluate(test_images, test_labels)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")


Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
Test Loss: 0.5689267516136169
Test Accuracy: 0.8494898080825806


In [None]:
# Predictions
y_pred_probs = model.predict(test_images)
y_pred = (y_pred_probs > 0.5).astype(int).flatten()  # Convert probabilities to binary predictions

# Display predicted and true labels
print("Predicted Labels:", y_pred)
print("True Labels:", test_labels)

# Classification Report
print("Classification Report:")
print(classification_report(test_labels, y_pred))



Predicted Labels: [1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1 0 1 1 0 1 1 0 0 1 1 0 1 1 1 0 0 0
 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 0 0 0 1 1 1 0 1 0 1 1 1
 0 0 1 1 1 1 1 0 1 1 0 1 1 1 0 1 1 1 0 0 0 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
True Labels: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 

**Sources:**


*   https://www.kaggle.com/code/aviadl/starter-chest-x-ray-images-pneumonia-d498de7c-3
*   https://www.kaggle.com/code/aviadl/

*   https://www.kaggle.com/code/kylewillemse345/starter-chest-x-ray-images-pneumonia-9ad0b8f3-9
*   https://www.kaggle.com/code/phantrngnhn/chest-x-ray-pneumonia-diagnosis-with-cnn







