In [1]:
import cv2 as cv
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeRegressor
import pickle

In [2]:
# Function to load image data
def load_data(dir_list, image_size):
    data = []
    label = []
    image_width, image_height = image_size
    
    for directory in dir_list:
        for filename in os.listdir(directory):
            image = cv.imread(os.path.join(directory, filename), 0)
            image = cv.resize(image, dsize=(image_width, image_height), interpolation=cv.INTER_CUBIC)
            image = image / 255.0
            image = image.astype(np.float32)
            data.append(image.flatten())  # Flatten the image to use with Gradient Boosting
            if directory == 'yes':
                label.append(1)  # Tumor
            else:
                label.append(0)  # No tumor
    data_array = np.array(data)
    label = np.array(label) 
    return data_array, label


In [3]:
# Load data
x, y = load_data(['yes', 'no'], (256, 256))

In [4]:
# Split data
def split_data(test_size=0.2, log=True):
    x_train, x_test_val, y_train, y_test_val = train_test_split(x, y, test_size=test_size, shuffle=True)
    if log:
        print('Input Shape:')
        print(x_train.shape, y_train.shape)
        print(x_test_val.shape, y_test_val.shape)
    x_test = x_test_val
    x_val = x_test_val
    y_test = y_test_val
    y_val = y_test_val
    if log:
        print('Output Shape:')
        print(x_train.shape, y_train.shape)
        print(x_test.shape, y_test.shape)
        print(x_val.shape, y_val.shape)
    return x_train, x_test, y_train, y_test, x_val, y_val

x_train, x_test, y_train, y_test, x_val, y_val = split_data()
validation_dataset = (x_val, y_val)


Input Shape:
(2400, 65536) (2400,)
(600, 65536) (600,)
Output Shape:
(2400, 65536) (2400,)
(600, 65536) (600,)
(600, 65536) (600,)


In [5]:
# Gradient Boosting Model
class GradientBoosting:
    def __init__(self, n_estimators=100, learning_rate=0.1):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.estimators = []
        self.predictions = []
        
    def train(self, X, y):
        # Initialize predictions
        self.predictions = np.zeros(len(y))
        
        for _ in range(self.n_estimators):
            residuals = y - self.predictions  # Compute residuals
            tree = DecisionTreeRegressor(max_depth=3)  # Initialize decision tree regressor
            tree.fit(X, residuals)  # Fit decision tree to residuals
            self.estimators.append(tree)  # Add decision tree to list of estimators
            predictions = tree.predict(X)  # Make predictions using current tree
            self.predictions += self.learning_rate * predictions  # Update predictions with weighted predictions
    
    def predict(self, X):
        predictions = np.zeros(len(X))
        for tree in self.estimators:
            predictions += self.learning_rate * tree.predict(X)
        return np.sign(predictions)


In [6]:
# Train Gradient Boosting Model
gb_model = GradientBoosting(n_estimators=100, learning_rate=0.1)
gb_model.train(x_train, y_train)


In [7]:
# Save Gradient Boosting model
def save_model(model, filename):
    with open(filename, 'wb') as f:
        pickle.dump(model, f)

# Save the trained Gradient Boosting model
save_model(gb_model, 'gradient_boosting_model.pkl')