In [1]:

import numpy as np
import pandas as pd
import os
import csv
import cv2
import matplotlib.pyplot as plt
import pickle
import joblib
from sklearn.svm import SVC
from skimage.feature import hog
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score 
from sklearn.metrics import accuracy_score,f1_score, classification_report, confusion_matrix , accuracy_score, precision_score, recall_score, f1_score, roc_curve ,roc_auc_score,ConfusionMatrixDisplay
     

Path Definition: Paths for four folders are defined, each containing images of different categories of brain scans: glioma tumor, meningioma tumor, no tumor, and pituitary tumor.

In [2]:
#Path of dataset
folder1="glioma_tumor"
folder2="meningioma_tumor"
folder3="no_tumor"
folder4="pituitary_tumor"


#### Feature Extraction Using HOG


Feature Extraction Using HOG (Histogram of Oriented Gradients): The core of this script lies in extracting features from the images using the Histogram of Oriented Gradients (HOG) method. This process is repeated for each folder (each tumor type). The steps for feature extraction include:

Reading each image from the dataset.
Resizing images to a standard size (200x200 pixels) to ensure uniformity.
Converting images to grayscale, which is a common preprocessing step in image processing to reduce complexity while retaining essential features.
Applying a median filter with a 3x3 kernel to reduce noise in the images, which helps in highlighting important features while suppressing irrelevant details.
Extracting HOG features from the preprocessed images. HOG is a feature descriptor that is particularly effective for object detection in computer vision. It works by counting occurrences of gradient orientation in localized portions of an image.
Data Structuring and Storage: After extracting the HOG descriptors, they are stored in a pandas DataFrame, with each row representing the HOG feature vector of an image. A 'Class' column is added to this DataFrame to label the data according to the type of tumor (or absence thereof) it represents. This labeling is crucial for supervised learning tasks.

Saving the Data: The feature vectors along with their labels are saved into .npy files for each category of brain scans. This file format is efficient for storing and accessing large arrays, making it suitable for machine learning tasks.

In [None]:
hog_descs = []
i = 0
for filename in os.listdir(folder1):
    # print(os.path.join(folder_path, filename))
    img = cv2.imread(os.path.join(folder1, filename))
    if img is not None:
        
        #resize total  image size to 200 x 200
        resize=(200,200)
        img1=cv2.resize(img,resize)
        
        # Grayscaling the image dataset
        gray = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
        
        # Apply median filter with kernel size 3x3
        median_img = cv2.medianBlur(gray, 3)
    
        fd, hog_image = hog(median_img, orientations=6, pixels_per_cell=(4, 4),  transform_sqrt=True,cells_per_block=(1, 1), visualize=True)
        # Convert the descriptor array into a DataFrame format
        hog_descs.append(fd)
        df = pd.DataFrame(fd)
        # print(df)
        print("descriptor shape ", i, " : ", df.shape)
        i = i + 1

df = pd.DataFrame(hog_descs)
i = 0
#add row of class
df["Class"] = i

#Storing previously saved feature descriptor to numpy file .
np.save("glioma.npy", df.to_numpy())

descriptor shape  0  :  (15000, 1)
descriptor shape  1  :  (15000, 1)
descriptor shape  2  :  (15000, 1)
descriptor shape  3  :  (15000, 1)
descriptor shape  4  :  (15000, 1)
descriptor shape  5  :  (15000, 1)
descriptor shape  6  :  (15000, 1)
descriptor shape  7  :  (15000, 1)
descriptor shape  8  :  (15000, 1)
descriptor shape  9  :  (15000, 1)
descriptor shape  10  :  (15000, 1)
descriptor shape  11  :  (15000, 1)
descriptor shape  12  :  (15000, 1)
descriptor shape  13  :  (15000, 1)
descriptor shape  14  :  (15000, 1)
descriptor shape  15  :  (15000, 1)
descriptor shape  16  :  (15000, 1)
descriptor shape  17  :  (15000, 1)
descriptor shape  18  :  (15000, 1)
descriptor shape  19  :  (15000, 1)
descriptor shape  20  :  (15000, 1)
descriptor shape  21  :  (15000, 1)
descriptor shape  22  :  (15000, 1)
descriptor shape  23  :  (15000, 1)
descriptor shape  24  :  (15000, 1)
descriptor shape  25  :  (15000, 1)
descriptor shape  26  :  (15000, 1)
descriptor shape  27  :  (15000, 1)
de

In [None]:
#Folder 2 (Meningioma Tumor)
hog_descs = []
i = 0
for filename in os.listdir(folder2):
    # print(os.path.join(folder_path, filename))
    img = cv2.imread(os.path.join(folder2, filename))
    if img is not None:
        #resize total  image size to 200 x 200
        resize=(200,200)
        img1=cv2.resize(img,resize)
        
        # Grayscaling the image dataset
        gray = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
        
        # Apply median filter with kernel size 3x3
        median_img = cv2.medianBlur(gray, 3)

        fd, hog_image = hog(median_img, orientations=6, pixels_per_cell=(4, 4),  transform_sqrt=True,cells_per_block=(1, 1), visualize=True)
        # Convert the descriptor array into a DataFrame format
        hog_descs.append(fd)
        df = pd.DataFrame(fd)
        # print(df)
        print("descriptor shape ", i, " : ", df.shape)
        i = i + 1

df = pd.DataFrame(hog_descs)
i = 1
#add row class
df["Class"] = i

#Storing previously saved feature descriptor to numpy file .
np.save("meningioma.npy", df.to_numpy())

In [None]:
#Folder 3 (No tumor)
hog_descs = []
i = 0
for filename in os.listdir(folder3):
    # print(os.path.join(folder_path, filename))
    img = cv2.imread(os.path.join(folder3, filename))
    if img is not None:
        #resize total  image size to 200 x 200
        resize=(200,200)
        img1=cv2.resize(img,resize)

        # Grayscaling the image dataset
        gray = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
        
        # Apply median filter with kernel size 3x3
        median_img = cv2.medianBlur(gray, 3)

        fd, hog_image = hog(median_img, orientations=6, pixels_per_cell=(4, 4),  transform_sqrt=True,cells_per_block=(1, 1), visualize=True)
        # Convert the descriptor array into a DataFrame format
        hog_descs.append(fd)
        df = pd.DataFrame(fd)
        # print(df)
        print("descriptor shape ", i, " : ", df.shape)
        i = i + 1

df = pd.DataFrame(hog_descs)
i = 2
#add row class
df["Class"] = i

#Storing previously saved feature descriptor to numpy file .
np.save("notumor.npy", df.to_numpy())

In [None]:
folder4="pituitary_tumor"
#Folder 4 (Pituitary)
hog_descs = []
i = 0
for filename in os.listdir(folder4):
    # print(os.path.join(folder_path, filename))
    img = cv2.imread(os.path.join(folder4, filename))
    if img is not None:
        #resize total  image size to 200 x 200
        resize=(200,200)
        img1=cv2.resize(img,resize)

        # Grayscaling the image dataset
        gray = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
        
        # Apply median filter with kernel size 3x3
        median_img = cv2.medianBlur(gray, 3)

        fd, hog_image = hog(median_img, orientations=6, pixels_per_cell=(4, 4),  transform_sqrt=True,cells_per_block=(1, 1), visualize=True)
        # Convert the descriptor array into a DataFrame format
        hog_descs.append(fd)
        df = pd.DataFrame(fd)
        # print(df)
        print("descriptor shape ", i, " : ", df.shape)
        i = i + 1

df = pd.DataFrame(hog_descs)
i = 3
#add row class
df["Class"] = i

#Storing previously saved feature descriptor to numpy file .
np.save("pituitary.npy", df.to_numpy())

Loading the HOG Features: The code begins by loading the HOG (Histogram of Oriented Gradients) feature arrays for different classes of brain scans from saved .npy files. These classes include 'glioma', 'meningioma', 'no tumor', and 'pituitary tumor'. Each .npy file contains the HOG feature vectors for the respective class of images, where each row corresponds to the HOG descriptor of an individual image, and the last column represents the class label.

Concatenating Arrays: The individual arrays for each tumor type are concatenated into a single array using np.concatenate(). This operation combines the feature vectors from all the classes along the first axis (rows), resulting in a single array where each row still corresponds to a feature vector from one of the images, and the rows are ordered by the sequence in which the arrays were concatenated.

In [None]:

# Load the Hog features of All class Glioma, Meningioma, no tumor
glioma_array = np.load('glioma.npy')
meningioma_array = np.load('meningioma.npy')
notumor_array = np.load('notumor.npy')
pitutary_array = np.load('pituitary.npy')

concatenated_array = np.concatenate((glioma_array, meningioma_array,notumor_array,pitutary_array), axis=0)

# prints the shape of the concatenated array
print(concatenated_array.shape)  

Feature and Label Separation:

The features (x) and labels (y) are separated from the concatenated array. The features consist of the HOG descriptors, and the labels represent the class of each image.
For x, all columns except the last one are selected (up to 15000 features), which means each row in x contains the HOG feature vector for a corresponding image.
For y, the last column is selected, which contains the class labels for each image. These labels are converted to integers using astype(np.int) to ensure they are in a suitable format for classification algorithms.
Printing Shapes: Finally, the shapes of x and y are printed to verify their structures. x should have a shape where the number of rows equals the total number of images and the number of columns equals the number of features (up to 15000 in this case). y should be a 1-dimensional array with a length equal to the total number of images, where each element is a class label.


In [None]:

x = concatenated_array[:, :15000]   # selects columns up to 15000 for x
y = concatenated_array[:, -1]       # selects the last column for y
y = y.astype(np.int)


Shape of the Concatenated Array: The shape of the concatenated array is printed to provide insight into the data structure. The number of rows in this array equals the total number of images across all classes, and the number of columns corresponds to the number of features plus one for the class label.

In [None]:
print(x.shape)  # prints the shape of x
print(y.shape)  # prints the shape of y
     

#### PCA

This code segment employs Principal Component Analysis (PCA), a dimensionality reduction technique, to analyze and visualize the variance explained by the components extracted from the HOG (Histogram of Oriented Gradients) features of brain scans. Here's a breakdown of each step and its purpose:

PCA Object Creation: A PCA object is instantiated without specifying the number of components. This means that PCA will consider as many components as there are features in the dataset (x) by default, aiming to capture the entirety of the variance in the data.

Fitting the PCA Model: The PCA model is fit to the HOG features stored in x. This process involves calculating the eigenvalues and eigenvectors of the covariance matrix of x, which are used to determine the principal components. These components are the directions in the feature space that maximize the variance of the projected data.

Cumulative Explained Variance Ratio: The explained variance ratio of each principal component is calculated. This ratio indicates the proportion of the dataset's total variance that is captured by each principal component. The cumulative sum of these ratios (cumulative_var_ratio) is then computed to understand how much of the total variance is explained by the first n components combined.

Explained Variance Ratio Output: The explained variance ratio for each principal component is printed. These values give insight into the importance of each component—higher values mean a component captures more of the data's variance.

Variance Plot: A line plot is created to visualize the cumulative explained variance ratio against the number of components. This plot is crucial for determining the number of components needed to capture a significant portion of the variance in the data. The x-axis represents the number of components, and the y-axis represents the cumulative explained variance ratio.

The xlabel 'Number of Components' indicates the principal components on the x-axis.
The ylabel 'Cumulative Explained Variance Ratio' indicates the proportion of the dataset's total variance explained by the first n components on the y-axis.
The title 'cumulative variance plot.' labels the plot for clarity.
The purpose of this analysis is to understand the dimensionality of the HOG feature space and to identify how many principal components are necessary to capture most of the variance in the data. This is often used to reduce the number of features before applying machine learning models, improving computational efficiency and potentially reducing overfitting by eliminating noise and less informative features.








In [None]:
from sklearn.decomposition import PCA
# Create a PCA object
pca = PCA()

# Fit the PCA model to the HOG features
pca.fit(x)

# Calculate the cumulative explained variance ratio
cumulative_var_ratio = np.cumsum(pca.explained_variance_ratio_)
print(pca.explained_variance_ratio_)

# Plot the explained variance ratio against number of components
plt.plot(cumulative_var_ratio)
plt.xlabel('Number of Components')
plt.ylabel('Cumulative Explained Variance Ratio')
plt.title('cumulative variance plot.')
plt.show()

PCA Transformation:

pca.transform(x): This line applies the PCA transformation to the HOG feature matrix x. PCA is a dimensionality reduction technique that identifies the axes (principal components) along which the variance in the data is maximized. By projecting the data onto these axes, it's possible to reduce the number of dimensions (features) while retaining most of the variance (information) in the data. The result, B, is the transformed dataset where each row represents an image and columns represent the principal components. The number of columns in B is equal to the number of components in the PCA model, which was fit to the data earlier.
Converting to DataFrame:

B = pd.DataFrame(B): This line converts the transformed feature matrix B into a pandas DataFrame. This is often done for ease of data manipulation and to utilize pandas' functionality for data analysis, as DataFrames are a more flexible and intuitive structure for tabular data.
Fitting PCA with a Specified Number of Components:

pca = PCA(n_components=100): Here, a new PCA object is created with a specified number of components (n_components=100). This means that when this PCA model is fit and applied to the data, it will reduce the dimensionality of the data to 100 principal components, regardless of the original number of features.
pca.fit(x): This line fits the new PCA model to the HOG features (x). The PCA model learns the 100 principal components that capture the most variance in the data. This fitting process involves computing the eigenvectors (principal components) and eigenvalues (explained variance) of the data's covariance matrix.
The purpose of these operations is to reduce the dimensionality of the feature set to make subsequent machine learning models more efficient and potentially more effective. By reducing the number of features to a set of principal components that capture the majority of the variance in the data, it's possible to speed up training times, reduce the risk of overfitting, and possibly improve the generalization performance of the models. The choice of 100 components is a balance between retaining enough information (variance) and reducing the feature space to a manageable size.

In [None]:

B = pca.transform(x)
B = pd.DataFrame(B)
B 

In [None]:
pca = PCA(n_components=100)
pca.fit(x)

Saving the PCA Model
The PCA model that was previously fit to the HOG features is serialized (saved) to disk using Python's pickle module, allowing it to be loaded and used later without retraining. The file is named 'PCA_model.sav'.
Transforming Features Using PCA and Preparing Final Data

In [None]:
# save the model to disk
import joblib
filename = 'PCA_model.sav'
pickle.dump(pca, open(filename, 'wb'))

Transforming Features Using PCA and Preparing Final Data
The HOG features in x are transformed using the previously trained PCA model to reduce dimensionality. This transformed feature set is converted into a pandas DataFrame, B.

In [None]:

B = pca.transform(x)
B = pd.DataFrame(B)
B

The class labels stored in y are concatenated to B as the final column, ensuring each row in B now consists of the principal components followed by the corresponding class label. This forms the complete dataset for training machine learning models.

In [None]:
#Concatinate the Class ID's 
B=pd.concat([B, pd.DataFrame(y)],axis=1)
B

This final dataset is then saved to a CSV file named 'Final_HOG_Feature.csv' without headers or indexes, making it suitable for loading as raw data.

In [None]:
#Creating Final Feature File after applying PCA 
csv_data1=B.to_csv('Final_HOG_Feature.csv', mode='w',header=False,index=False)

Loading and Preprocessing the Final Dataset
The saved CSV file is loaded into a pandas DataFrame, train_data, with no header as the CSV doesn't contain column names.

The script checks for any missing values (NaN) in the data. Handling missing values is crucial to prevent errors during model training.

In [None]:
train_data = pd.read_csv('Final_HOG_Feature.csv',header=None)
     

#Check for NaN under a single DataFrame column
train_data.isnull().values.any()

The features (principal components) and labels are separated into X and Y, respectively. X contains all columns except the last one, which is assumed to be the class label column. Y is the last column, containing the class labels.

In [None]:
X = train_data.drop(columns= 100, axis=1)

In [None]:
X

In [None]:

Y  = train_data[100]

In [None]:
Y

In [None]:
 ##Applying Classifiers With K Fold Cross Validation

#### Applying Models

Logistic Regression

In [None]:
#Logistic Regression

k = 5
kf = KFold(n_splits=k, random_state=None)
model_LR = LogisticRegression(solver= 'liblinear')
 
acc_score = []
 
for train_index , test_index in kf.split(X):
    X_train , X_test = X.iloc[train_index,:],X.iloc[test_index,:]
    y_train , y_test = Y[train_index] , Y[test_index]
     
    model_LR.fit(X_train,y_train)
    pred_values = model_LR.predict(X_test)
     
    acc = accuracy_score(pred_values , y_test)
    acc_score.append(acc)
     
avg_acc_score = sum(acc_score)/k
 
print('accuracy of each fold - {}'.format(acc_score))
print('Avg accuracy : {}'.format(avg_acc_score))
joblib.dump(model_LR, "LG_HOG.sav")

Random Forest

In [None]:
#Random Forest

k = 5
kf = KFold(n_splits=k, random_state=None)
model_RF = RandomForestClassifier()
 
acc_score = []
 
for train_index , test_index in kf.split(X):
    X_train , X_test = X.iloc[train_index,:],X.iloc[test_index,:]
    y_train , y_test = Y[train_index] , Y[test_index]
     
    model_RF.fit(X_train,y_train)
    pred_values = model_RF.predict(X_test)
     
    acc = accuracy_score(pred_values , y_test)
    acc_score.append(acc)
     
avg_acc_score = sum(acc_score)/k
 
print('accuracy of each fold - {}'.format(acc_score))
print('Avg accuracy : {}'.format(avg_acc_score))

KNN Classifier

In [None]:
k = 5
kf = KFold(n_splits=k, random_state=None)
model_knn = KNeighborsClassifier()

acc_score = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index, :].to_numpy(), X.iloc[test_index, :].to_numpy()  # Convert to NumPy array
    y_train, y_test = Y[train_index], Y[test_index]
    
    model_knn.fit(X_train, y_train)
    pred_values = model_knn.predict(X_test)
    
    acc = accuracy_score(pred_values, y_test)
    acc_score.append(acc)

avg_acc_score = sum(acc_score) / k

print('accuracy of each fold - {}'.format(acc_score))
print('Avg accuracy : {}'.format(avg_acc_score))


In [None]:
!pip install xgboost

XGBoost

In [None]:
import xgboost as xgb

In [None]:
 #XGBoost

k = 5
kf = KFold(n_splits=k, shuffle=True, random_state=None)

model_xgb = xgb.XGBClassifier()

acc_score = []
y_pred_list = []
y_test_list = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index, :], X.iloc[test_index, :]
    y_train, y_test = Y[train_index], Y[test_index]

    model_xgb.fit(X_train, y_train)

    y_pred = model_xgb.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    acc_score.append(acc)

    y_pred_list.extend(y_pred)
    y_test_list.extend(y_test)

avg_acc_score = sum(acc_score) / k

print('Accuracy of each fold - {}'.format(acc_score))
print('Average accuracy: {}'.format(avg_acc_score))

joblib.dump(model_xgb, "xgb.sav")

AdaBoost

In [None]:
#AdaBoost

k = 5
kf = KFold(n_splits=k, random_state=None)
model_AB = AdaBoostClassifier()
 
acc_score = []
 
for train_index , test_index in kf.split(X):
    X_train , X_test = X.iloc[train_index,:],X.iloc[test_index,:]
    y_train , y_test = Y[train_index] , Y[test_index]
     
    model_AB.fit(X_train,y_train)
    pred_values = model_AB.predict(X_test)
     
    acc = accuracy_score(pred_values , y_test)
    acc_score.append(acc)
     
avg_acc_score = sum(acc_score)/k
 
print('accuracy of each fold - {}'.format(acc_score))
print('Avg accuracy : {}'.format(avg_acc_score))

SVM

In [None]:
#SVM

k = 5
kf = KFold(n_splits=k, random_state=None)
model_SVM = SVC(kernel='linear')

acc_score = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index, :], X.iloc[test_index, :]
    y_train, y_test = Y[train_index], Y[test_index]

    model_SVM.fit(X_train, y_train)
    pred_values = model_SVM.predict(X_test)

    acc = accuracy_score(pred_values, y_test)
    acc_score.append(acc)

avg_acc_score = sum(acc_score) / k

print('accuracy of each fold - {}'.format(acc_score))
print('Avg accuracy : {}'.format(avg_acc_score))

Decision Tree

In [None]:
#Decision Tree

k = 5
kf = KFold(n_splits=k, random_state=None)
model_DT = DecisionTreeClassifier()

acc_score = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index, :], X.iloc[test_index, :]
    y_train, y_test = Y[train_index], Y[test_index]

    model_DT.fit(X_train, y_train)
    pred_values = model_DT.predict(X_test)

    acc = accuracy_score(pred_values, y_test)
    acc_score.append(acc)

avg_acc_score = sum(acc_score) / k

print('accuracy of each fold - {}'.format(acc_score))
print('Avg accuracy : {}'.format(avg_acc_score))

Gradient Boosting

In [None]:
#Gradient Boosting

k = 5
kf = KFold(n_splits=k, random_state=None)
model_GBM = GradientBoostingClassifier()

acc_score = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index, :], X.iloc[test_index, :]
    y_train, y_test = Y[train_index], Y[test_index]

    model_GBM.fit(X_train, y_train)
    pred_values = model_GBM.predict(X_test)

    acc = accuracy_score(pred_values, y_test)
    acc_score.append(acc)

avg_acc_score = sum(acc_score) / k

print('accuracy of each fold - {}'.format(acc_score))
print('Avg accuracy : {}'.format(avg_acc_score))

In [None]:
##Results
print("Pituitary Tumor")
#Reading Image
img = cv2.imread("pituitary.jpg")
plt.imshow(img2)
plt.show()

#resize total  image size to 200 x 200
resize=(200,200)
img1=cv2.resize(img,resize)

#Grayscaling the Image
gray = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)

# Apply median filter with kernel size 3x3
median_img = cv2.medianBlur(gray, 3)

#Applying HOG Feature Descriptor
fd, hog_image = hog(median_img, orientations=6, pixels_per_cell=(4, 4),  transform_sqrt=True,cells_per_block=(1, 1), visualize=True)

# Convert the descriptor array into a DataFrame format
print("descriptor shape ", i, " : ", fd.shape)

#Getting data in a Dataframe
df = pd.DataFrame(fd)

#Appling Transpose to dataframe to convert rows into column and column into rows
df_transposed = df.transpose()
df_transposed

#Loading the PCA Model
pca = joblib.load('PCA_model.sav')

#Applying the PCA to extracted Data
fd_pca = pca.transform(df_transposed)
     

# Load the XGBoost model as it provides highest Accuracy
loaded_model = joblib.load("xgb.sav")

# Use the loaded model for prediction
y_pred = loaded_model.predict(fd_pca)

print(y_pred)
# Glioma = 0 
# Meningioma = 1
# No tumor = 2
# Pituitary = 3

if y_pred == 0:
    print("Given Image is of Glioma Tumor")
elif y_pred == 1:
    print("Given Image is of Meningioma Tumor")
elif y_pred == 2:
    print("Given Image is of No Tumor")
else:
    print("Given Image is of Pituitary Tumor")
    
print("Glioma Tumor")
img2 = cv2.imread("glioma.jpg")
plt.imshow(img2)
plt.show()

#resize total  image size to 200 x 200
resize=(200,200)
img3=cv2.resize(img2,resize)

#Grayscaling the Image
gray = cv2.cvtColor(img3, cv2.COLOR_BGR2GRAY)

# Apply median filter with kernel size 3x3
median_img = cv2.medianBlur(gray, 3)

#Applying HOG Feature Descriptor
fd, hog_image = hog(median_img, orientations=6, pixels_per_cell=(4, 4),  transform_sqrt=True,cells_per_block=(1, 1), visualize=True)

# Convert the descriptor array into a DataFrame format
print("descriptor shape ", i, " : ", fd.shape)

#Getting data in a Dataframe
df = pd.DataFrame(fd)

#Appling Transpose to dataframe to convert rows into column and column into rows
df_transposed = df.transpose()
df_transposed

#Loading the PCA Model
pca = joblib.load('PCA_model.sav')

#Applying the PCA to extracted Data
fd_pca = pca.transform(df_transposed)
     

# Load the XGBoost model as it provides highest Accuracy
loaded_model = joblib.load("xgb.sav")

# Use the loaded model for prediction
y_pred = loaded_model.predict(fd_pca)

print(y_pred)
# Glioma = 0 
# Meningioma = 1
# No tumor = 2
# Pituitary = 3

if y_pred == 0:
    print("Given Image is of Glioma Tumor")
elif y_pred == 1:
    print("Given Image is of Meningioma Tumor")
elif y_pred == 2:
    print("Given Image is of No Tumor")
else:
    print("Given Image is of Pituitary Tumor")
     