In [1]:
# importing libraries
import numpy as np
import cv2
import pandas as pd
import os
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
# loading in images and turing it into an array to use as features, collecting labels
path = os.getcwd()

path = os.path.join(path, 'dataset')
df_labels = pd.read_csv(os.path.join(path, 'label.csv'))
features = []
labels = []

classes = {'meningioma_tumor': 0, 'no_tumor': 1, 'glioma_tumor': 1, 'pituitary_tumor': 1}

for row in df_labels.iterrows():
    image_name = row[1][0]
    label = row[1][1]
    # opening and flattening image
    img = cv2.imread(os.path.join(path, 'image/' + image_name))
    # images are gray scale so there is useless data using RGB
    grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img_flat = grayscale.flatten()
    # appending to lists for use
    features.append(img_flat)
    labels.append(classes.get(label))

In [3]:
# splitting into train and test sets
x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size=0.1)

print('Train size: ', len(x_train))
print('Test size: ', len(x_test))

Train size:  2700
Test size:  300


In [4]:
# creating and training SVM on training data
model = SVC(C=1, kernel='linear', gamma='auto')

model.fit(x_train, y_train)

SVC(C=1, gamma='auto', kernel='linear')

In [5]:
# getting accuracy metrics and printing
pred = model.predict(x_test)
score = accuracy_score(y_test, pred)
print('accuracy: ', score)

accuracy:  0.8433333333333334


In [7]:
# experimenting with dimensionality reduction
path = os.getcwd()

path = os.path.join(path, 'dataset')
df_labels = pd.read_csv(os.path.join(path, 'label.csv'))
features = []
labels = []

classes = {'meningioma_tumor': 0, 'no_tumor': 1, 'glioma_tumor': 1, 'pituitary_tumor': 1}

for row in df_labels.iterrows():
    image_name = row[1][0]
    label = row[1][1]
    # opening and flattening image
    img = cv2.imread(os.path.join(path, 'image/' + image_name))
    # images are gray scale so there is useless data using RGB
    grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img_flat = grayscale.flatten()
    # appending to lists for use
    features.append(img_flat)
    labels.append(classes.get(label))

# splitting into train and test sets
x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size=0.2)

In [9]:
from sklearn.preprocessing import StandardScaler

scalar = StandardScaler()

# scaling data to perform pca on it
scalar.fit(x_train)

# applying scale to xtrain and xtest
x_train = scalar.transform(x_train)
x_test = scalar.transform(x_test)

In [12]:
from sklearn.decomposition import PCA

# creating pca
pca = PCA(.95)

# fitting pca on training data
pca.fit(x_train)

# applying to train and test
x_train = pca.transform(x_train)
x_test = pca.transform(x_test)

In [13]:
# creating and training SVM on training data
model_dimension_reduction = SVC(C=1, kernel='linear', gamma='auto')

model_dimension_reduction.fit(x_train, y_train)

SVC(C=1, gamma='auto', kernel='linear')

In [15]:
# getting accuracy metrics and printing
pred = model_dimension_reduction.predict(x_test)
score = accuracy_score(y_test, pred)
print('accuracy: ', score)

accuracy:  0.815
