<a href="https://colab.research.google.com/github/tednological/AIClubDiscordBot/blob/main/BrainTumors.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import pandas as pd
from PIL import Image
import numpy as np
from sklearn.model_selection import train_test_split

# Set up the directories where your images are stored.
# Make sure to update these paths to where your actual image folders are located.
tumor_dir = 'path/to/your/tumor_images'       # Directory with brains having tumors
non_tumor_dir = 'path/to/your/non_tumor_images' # Directory with brains without tumors

# Define the target size for the images (width, height). Feel free to adjust as needed.
target_size = (128, 128)

# Lists to hold the image data and corresponding labels.
data = []
labels = []

def process_images(directory, label):
    """
    Loads images from the specified directory, converts them to greyscale,
    resizes them to the target size, and appends them to the global lists.
    """
    for filename in os.listdir(directory):
        # Check for common image file extensions.
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            img_path = os.path.join(directory, filename)
            try:
                # Open the image, convert it to greyscale, and resize it.
                img = Image.open(img_path).convert('L')
                img = img.resize(target_size)
                # Convert the image to a NumPy array.
                img_array = np.array(img)
                data.append(img_array)
                labels.append(label)
            except Exception as e:
                print(f"Could not process {img_path}: {e}")

# Process images from both directories.
process_images(tumor_dir, label=1)      # Label 1 indicates a tumor.
process_images(non_tumor_dir, label=0)  # Label 0 indicates no tumor.

# Create a Pandas DataFrame with the image data and labels.
df = pd.DataFrame({
    'image': data,
    'label': labels
})

# Split the DataFrame into training (80%) and testing (20%) sets.
train_df, test_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)

print(f"Number of training samples: {len(train_df)}")
print(f"Number of testing samples: {len(test_df)}")
