In [1]:
import cv2 as cv
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
import pickle

In [2]:
# Function to load image data
def load_data(dir_list, image_size):
    data = []
    label = []
    image_width, image_height = image_size
    
    for directory in dir_list:
        for filename in os.listdir(directory):
            image = cv.imread(os.path.join(directory, filename), 0)
            image = cv.resize(image, dsize=(image_width, image_height), interpolation=cv.INTER_CUBIC)
            image = image / 255.0
            image = image.astype(np.float32)
            data.append(image.flatten())  # Flatten the image to use with Random Forest
            if directory == 'yes':
                label.append(1)  # Tumor
            else:
                label.append(0)  # No tumor
    data_array = np.array(data)
    label = np.array(label) 
    return data_array, label



In [3]:
# Load data
x, y = load_data(['yes', 'no'], (256, 256))

In [4]:
# Split data
def split_data(test_size=0.2, log=True):
    x_train, x_test_val, y_train, y_test_val = train_test_split(x, y, test_size=test_size, shuffle=True)
    if log:
        print('Input Shape:')
        print(x_train.shape, y_train.shape)
        print(x_test_val.shape, y_test_val.shape)
    x_test = x_test_val
    x_val = x_test_val
    y_test = y_test_val
    y_val = y_test_val
    if log:
        print('Output Shape:')
        print(x_train.shape, y_train.shape)
        print(x_test.shape, y_test.shape)
        print(x_val.shape, y_val.shape)
    return x_train, x_test, y_train, y_test, x_val, y_val

x_train, x_test, y_train, y_test, x_val, y_val = split_data()
validation_dataset = (x_val, y_val)


Input Shape:
(2400, 65536) (2400,)
(600, 65536) (600,)
Output Shape:
(2400, 65536) (2400,)
(600, 65536) (600,)
(600, 65536) (600,)


In [5]:

# Random Forest Model
class RandomForest:
    def __init__(self, n_estimators=100):
        self.n_estimators = n_estimators
        self.trees = []
        
    def train(self, X, y):
        for _ in range(self.n_estimators):
            tree = DecisionTreeClassifier(max_depth=None)  # Use decision tree classifier as base estimator
            tree.fit(X, y)  # Fit decision tree to data
            self.trees.append(tree)
    
    def predict(self, X):
        predictions = np.zeros((X.shape[0], len(self.trees)))
        for idx, tree in enumerate(self.trees):
            predictions[:, idx] = tree.predict(X)
        return np.mean(predictions, axis=1)  # Voting or averaging of predictions

In [6]:
# Train Random Forest Model
rf_model = RandomForest(n_estimators=100)
rf_model.train(x_train, y_train)

In [7]:
# Save Random Forest model
def save_model(model, filename):
    with open(filename, 'wb') as f:
        pickle.dump(model, f)

# Save the trained Random Forest model
save_model(rf_model, 'random_forest_model.pkl')