In [130]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import kagglehub
from torch.utils.data import Dataset,DataLoader
import os
from torchvision.datasets import ImageFolder
from tqdm import tqdm
import cv2
import matplotlib.pyplot as plt
from torchvision.transforms import transforms
from PIL import Image

In [131]:
download_path = kagglehub.dataset_download("navoneel/brain-mri-images-for-brain-tumor-detection")
img_size = (256, 256)
output_csv_file = "brain_mri_pixels.csv"

In [132]:
def img_to_csv(download_path, img_size, output_csv):

    data = []
    valid_class = ['no', 'yes']

    temp_class = os.listdir(download_path)
    class_labels = []

    for item in temp_class:
        if item in valid_class:
            class_labels.append(item)

    print(f"Classes to process: {class_labels}")        

    # This loop gathers data from ALL class folders
    for label_name in class_labels:
        class_path = os.path.join(download_path, label_name)
        
        # This check is good, but you can remove it since you pre-filter `class_labels`
        # if not os.path.isdir(class_path):
        #     continue

        label = 1 if label_name.lower() == 'yes' else 0
        print(f"\nProcessing class '{label_name}' with assigned label: {label}")
    
        img_files = os.listdir(class_path)

        for img_name in tqdm(img_files, desc=f"Label: {label_name}"):
            img_path = os.path.join(class_path, img_name)

            try:
                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)

                if img is None:
                    print(f"Warning: Could not read image {img_path}, skipping.")
                    continue

                resized_img = cv2.resize(img, img_size)
                flattened_img = resized_img.flatten()
                row = np.insert(flattened_img, 0, label)
                data.append(row)

            except Exception as e:
                print(f"Error processing image {img_path}: {e}")    

    # --- MOVED CODE BLOCK ---
    # This block now runs AFTER the loop above has finished for ALL classes.
    
    # Check if any data was collected before proceeding
    if not data:
        print("Error: No data was collected. Cannot create DataFrame.")
        return None # Return None or an empty DataFrame

    # 1. Define columns (only need to do this once)
    num_pixels = img_size[0] * img_size[1]
    columns = ['label'] + [f"pixel_{i+1}" for i in range(num_pixels)] 

    # 2. Create DataFrame from the complete 'data' list
    df = pd.DataFrame(data, columns=columns)

    # 3. Shuffle it
    df = df.sample(frac=1).reset_index(drop=True)

    try:
        print(f"Saving DataFrame to {output_csv}")
        # 4. Save it
        df.to_csv(output_csv, index=False)

    except Exception as e:
        print(f"Error saving file to {output_csv}: {e}")

    # 5. Return the complete DataFrame
    return df

In [133]:
df = img_to_csv(download_path, img_size, output_csv_file)

Classes to process: ['no', 'yes']

Processing class 'no' with assigned label: 0


Label: no: 100%|██████████| 98/98 [00:00<00:00, 946.20it/s]
Label: no: 100%|██████████| 98/98 [00:00<00:00, 946.20it/s]



Processing class 'yes' with assigned label: 1


Label: yes: 100%|██████████| 155/155 [00:00<00:00, 1188.19it/s]



Saving DataFrame to brain_mri_pixels.csv


In [135]:
df.shape

(253, 65537)

In [136]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"using device: {device}")

using device: cuda


In [137]:
torch.manual_seed(42)

<torch._C.Generator at 0x1f82b33f750>

In [139]:
x = df.iloc[:,1:].values
y = df.iloc[:,0].values

In [140]:
x_train,x_test,y_train,y_test = train_test_split(x, y, test_size = 0.2, random_state = 42)

In [141]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)


In [142]:
custom_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor()
])

In [None]:
class CustomDataset(Dataset):
    def __init__(self,features, labels, transform):

        super().__init__()
        self.features = features
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.features)

    def __getitem__(self, index):

        img = self.features[index].reshape(256,256)

        img = np.stack([img]*3, axis = -1)

        img = img.astype(np.uint8)

        img = Image.fromarray(img)

        img = self.transform(img)

        return img, torch.tensor(self.labels[index], dtype = torch.long)


In [144]:
train_dataset = CustomDataset(x_train, y_train, transform = custom_transform)
test_dataset = CustomDataset(x_test, y_test, transform = custom_transform)

In [145]:
len(train_dataset)

202

In [146]:
train_loader = DataLoader(train_dataset, batch_size = 32, shuffle = True, pin_memory = True)
test_loader = DataLoader(test_dataset, batch_size = 32, shuffle = True, pin_memory = True)

In [147]:
class MyNN(nn.Module):
    def __init__(self, input_features):
        super().__init__()

        self.features = nn.Sequential(
            nn.Conv2d(input_features, 32, kernel_size = 3, padding = 'same'),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(kernel_size = 2, stride = 2),

            nn.Conv2d(32, 64, kernel_size = 3, padding = 'same'),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size = 2, stride = 2)

        )

        self.classifier = nn.Sequential(
            nn.Flatten(),

            nn.Linear(64*7*7,128),
            nn.ReLU(),
            nn.Dropout(p = 0.3),

            nn.Linear(128,64),
            nn.ReLU(),
            nn.Dropout(p = 0.3),

            nn.Linear(64,2)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x) 
        return x   

In [151]:
model = MyNN(3)
model = model.to(device)

In [152]:
epochs = 10
learning_rate = 0.001
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

In [153]:
for epoch in range(epochs):
    total_epoch_loss = 0

    for batch_features, batch_labels in train_loader:

        batch_features,batch_labels = batch_features.to(device),batch_labels.to(device)

        output = model(batch_features)
        loss = loss_fn(output,batch_labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_epoch_loss += loss.item()

    avg_loss = total_epoch_loss/len(train_loader) 
    print(f"Epoch: {epoch + 1}, Loss: {avg_loss}")


RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x200704 and 3136x128)