# Brain Tumor Project

This is the notebook where the project gets taken place

### Data Checking and Preprocessing 

In [19]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import cv2
import os

DATA_CHECKING = False     # Change this to True to test datasets


# Important info
# 1. Categories
CATEGORIES = ["glioma_tumor","meningioma_tumor","no_tumor","pituitary_tumor"]

# 2. Dataset directory
DIR_PATH = os.getcwd() + '/brain_tumor_dataset'

# 3. Training and Testing Directories 
TRAINING_PATH = DIR_PATH + "/Training/"
TESTING_PATH = DIR_PATH + "/Testing/"

# 3. Resizing the training data
IMG_SIZE = 150


# Check the Data:
if (DATA_CHECKING):
    # -- This will traverse the directory and print all the 
    # relevant filenames. Just check that all four 
    for dirname, _, filenames in os.walk(DIR_PATH):
        print(dirname)
        for filename in filenames:
            print(os.path.join(dirname, filename))

    # This will print all the images one after another 
    for category in CATEGORIES:
        path = os.path.join(TRAINING_PATH,category)
        for img in os.listdir(path):
            print(os.path.join(path,img))
            img_array = cv2.imread(os.path.join(path,img))
            plt.imshow(img_array)
            plt.show()
            plt.axis("off")
        break
        
    IMG_SIZE = 150
    new_array = cv2.resize(img_array,(IMG_SIZE,IMG_SIZE))    
    plt.imshow(new_array,cmap = "gray")
    plt.axis("off")

In [41]:
training_data = []

def populate_training_data():
    for category in CATEGORIES:
        new_path = os.path.join(TRAINING_PATH, category)
        category_index = CATEGORIES.index(category)
        
        for img in os.listdir(new_path):
            try:
                img_array = cv2.imread(os.path.join(new_path,img),cv2.IMREAD_GRAYSCALE)
                new_array = cv2.resize(img_array,(IMG_SIZE,IMG_SIZE)) 
                training_data.append([new_array,category_index])
            except Exception as e:
                print("failed", e)
                pass

# This takes the files in the training path and places them in this order [image, int] 
# where image = the image data, and int is the category it's in.
# [glioma_tumor = 0, meningioma_tumor = 1, no_tumor = 2, pituitary_tumor = 3]
populate_training_data()


In [48]:
X = []
y = []

for features,label in training_data:
    X.append(features)
    y.append(label)
    
# this is used to reshape and flatten the data.
X = np.array(X).reshape(-1,IMG_SIZE,IMG_SIZE)
X = X/255.0 
X = X.reshape(-1,150,150,1)

In [51]:
from keras.utils.np_utils import to_categorical # convert to one-hot-encoding
y = to_categorical(y, num_classes = 4)

In [53]:
from sklearn.model_selection import train_test_split
X_train, X_val, Y_train, Y_val = train_test_split(X, y, test_size = 0.2, random_state=42)
print("x_train shape",X_train.shape)
print("x_test shape",X_val.shape)
print("y_train shape",Y_train.shape)
print("y_test shape",Y_val.shape)

x_train shape (2296, 150, 150, 1)
x_test shape (574, 150, 150, 1)
y_train shape (2296, 4)
y_test shape (574, 4)


In [72]:
from sklearn.metrics import confusion_matrix
import itertools
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, Activation
import pickle

## Convolutional Neural Network 
You take an image, then convert the image into pixel data, then convert it into a grid or such. Now you have a convlutional window. i.e. a 3x3 window. Then this simplifies it down into a single thing. Then it turns these simplified things down. 

say for example, the we used convolution and max pooling, you look at your window and then take the values from it. From these values you take the maximum value. which gives you the next feature layer. Makes sense right?

Basically what's happening is it's slowly extracting values. The lower layers extract edges, then lines, then larger and larger stuff. Does that make sense? Okay. Moving on.

In [74]:
# Using a Sequential Model 
model = Sequential()

# Applying a convlutional layer
model.add(Conv2D(64, (5,5), input_shape = (150,150,1)))    # could be X.shape[1:]
model.add(Activation('relu')) # you could pass activation/pooling in whatever order
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(64, (3,3), input_shape = (150,150,1)))    # could be X.shape[1:]
model.add(Activation('relu')) # you could pass activation/pooling in whatever order
model.add(MaxPooling2D(pool_size=(2,2)))
 
model.add(Flatten())
model.add(Dense(64, activation = "relu"))
model.add(Dense(4, activation = "softmax"))


model.compile(optimizer = 'adam' , loss = "categorical_crossentropy", metrics=["accuracy"])
epochs = 5 
batch_size = 40

model.fit(X_train, Y_train, batch_size=batch_size, epochs = epochs, validation_data = (X_val,Y_val))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fa4f3f4fa58>

In [79]:
filename = "finalized_brain_tumor_model.h5"
model.save(filename)