In [1]:
import pandas as pd
import numpy as np
import cv2
import os
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler

In [2]:
# Define the paths to the image folders
benign_path = "C:\\Users\\Manas\\Desktop\\OBJECT DETECTION\\BT\\LBP_extraction\\glioma\\"
malignant_path = "C:\\Users\\Manas\\Desktop\\OBJECT DETECTION\\BT\\LBP_extraction\\meningioma\\"
normal_path = "C:\\Users\\Manas\\Desktop\\OBJECT DETECTION\\BT\\LBP_extraction\\pituitary\\"
no_tumor_path = "C:\\Users\\Manas\\Desktop\\OBJECT DETECTION\\BT\\LBP_extraction\\notumor\\"

In [3]:
# Load the images and extract features
def extract_features(image_path):
    # Read the image
    img = cv2.imread(image_path)
    # Resize the image to a fixed size
    img = cv2.resize(img, (64,64))
    # Convert the image to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # Flatten the image into a 1-dimensional array
    features = gray.flatten()
    return features


In [4]:
# Create a list of all image paths and labels
images = []
labels = []

# Loop through the glioma images and add them to the list
for file_name in os.listdir(benign_path):
    if file_name.endswith(".jpg") or file_name.endswith(".png"):
        image_path = os.path.join(benign_path, file_name)
        features = extract_features(image_path)
        images.append(features)
        labels.append("glioma")
        
        
        
# Loop through the meningioma images and add them to the list
for file_name in os.listdir(malignant_path):
    if file_name.endswith(".jpg") or file_name.endswith(".png"):
        image_path = os.path.join(malignant_path, file_name)
        features = extract_features(image_path)
        images.append(features)
        labels.append("meningioma")

# Loop through the pituitary images and add them to the list
for file_name in os.listdir(normal_path):
    if file_name.endswith(".jpg"):
        image_path = os.path.join(normal_path, file_name)
        features = extract_features(image_path)
        images.append(features)
        labels.append("pituitary")

# Loop through the no tumor images and add them to the list
for file_name in os.listdir(no_tumor_path):
    if file_name.endswith(".jpg"):
        image_path = os.path.join(no_tumor_path, file_name)
        features = extract_features(image_path)
        images.append(features)
        labels.append("no tumor")

In [5]:
# Create a Pandas DataFrame from the images and labels
data = pd.DataFrame(images)
data["label"] = labels


In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7023 entries, 0 to 7022
Columns: 4097 entries, 0 to label
dtypes: object(1), uint8(4096)
memory usage: 27.5+ MB


In [7]:
# Split the data into training and testing sets
X = data.drop("label", axis=1)
y = data["label"]



X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)



# Apply feature scaling using StandardScaler
scaler = StandardScaler()
X_train= scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [9]:
#Create the KNN classifier and fit it to the training data
knn = KNeighborsClassifier(n_neighbors=1, metric='manhattan', algorithm='kd_tree')
knn.fit(X_train, y_train)



In [10]:
# Predict the labels for the test data
y_pred = knn.predict(X_test)

In [11]:
# Print the classification report
print(confusion_matrix(y_test,y_pred))

[[319  23   5   1]
 [ 51 247  19   1]
 [  3   1 390   3]
 [  1   2  13 326]]


In [12]:
print(classification_report(y_test, y_pred, digits = 5))

              precision    recall  f1-score   support

      glioma    0.85294   0.91667   0.88366       348
  meningioma    0.90476   0.77673   0.83587       318
    no tumor    0.91335   0.98237   0.94660       397
   pituitary    0.98489   0.95322   0.96880       342

    accuracy                        0.91246      1405
   macro avg    0.91399   0.90725   0.90873      1405
weighted avg    0.91386   0.91246   0.91135      1405



In [13]:
from sklearn.model_selection import train_test_split,cross_val_score
"""X = data.drop("label", axis=1)
y = data["label"]"""

scores = cross_val_score(knn, X, y, cv=5)

In [14]:
print("Average Accuracy: ", np.mean(scores))

Average Accuracy:  0.834115643154789


In [15]:
print(scores)

[0.92170819 0.74448399 0.84199288 0.85327635 0.80911681]
