In [None]:
import os
import cv2
import numpy as np
from sklearn.utils import shuffle
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, roc_auc_score
from sklearn.svm import LinearSVC
from sklearn.model_selection import GridSearchCV
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from skimage.feature import hog
from sklearn.preprocessing import StandardScaler

In [None]:
train_dir = '/content/drive/MyDrive/Data/train'
test_dir = '/content/drive/MyDrive/Data/test'
valid_dir = '/content/drive/MyDrive/Data/valid'

adeno_dir = os.path.join(train_dir, 'adenocarcinoma_left.lower.lobe_T2_N0_M0_Ib')
large_dir = os.path.join(train_dir, 'large.cell.carcinoma_left.hilum_T2_N2_M0_IIIa')
normal_dir = os.path.join(train_dir, 'normal')
squamous_dir = os.path.join(train_dir, 'squamous.cell.carcinoma_left.hilum_T1_N2_M0_IIIa')

adeno_test_dir = os.path.join(test_dir, 'adenocarcinoma')
large_test_dir = os.path.join(test_dir, 'large.cell.carcinoma')
normal_test_dir = os.path.join(test_dir, 'normal')
squamous_test_dir = os.path.join(test_dir, 'squamous.cell.carcinoma')

adeno_valid_dir = os.path.join(valid_dir, 'adenocarcinoma_left.lower.lobe_T2_N0_M0_Ib')
large_valid_dir = os.path.join(valid_dir, 'large.cell.carcinoma_left.hilum_T2_N2_M0_IIIa')
normal_valid_dir = os.path.join(valid_dir, 'normal')
squamous_valid_dir = os.path.join(valid_dir, 'squamous.cell.carcinoma_left.hilum_T1_N2_M0_IIIa')

In [None]:
img_size = 400

def processing(adeno_dir, large_dir, normal_dir, squamous_dir):
  X = []
  Y = []
  for img_path in os.listdir(adeno_dir):
      img = cv2.imread(os.path.join(adeno_dir,img_path))
      img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
      img = cv2.resize(img, (img_size,img_size))
      img = img.flatten().reshape(1, -1)
      X.append(img)
      Y.append(1) # Kanser resmi
  for img_path in os.listdir(large_dir):
      img = cv2.imread(os.path.join(large_dir,img_path))
      img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)      
      img = cv2.resize(img, (img_size,img_size))
      img = img.flatten().reshape(1, -1)
      X.append(img)
      Y.append(1) # Kanser resmi
  for img_path in os.listdir(normal_dir):
      img = cv2.imread(os.path.join(normal_dir,img_path))
      img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  
      img = cv2.resize(img, (img_size,img_size))
      img = img.flatten().reshape(1, -1)
      X.append(img)
      Y.append(0) # Sağlıklı resim
  for img_path in os.listdir(squamous_dir):
      img = cv2.imread(os.path.join(squamous_dir,img_path))
      img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  
      img = cv2.resize(img, (img_size,img_size))
      img = img.flatten().reshape(1, -1)
      X.append(img)
      Y.append(1) # Kanser resmi
  
  return X, Y

In [None]:
class Acc_Score():

  def __init__(self, y_act, y_pred):
    self.y_pred = y_pred
    self.y_act = y_act
    self.precision
    self.recall
    self.tp_count = 0
    self.fp_count = 0
    self.fn_count = 0
    self.tn_count = 0
    self.roc_auc = 0.0
    label_list = []

  def f1_score(self):
    f1_score = 2 * (self.precision * self.recall) / (self.precision + self.recall)
    return f1_score

  def roc_auc_score(self, fpr, tpr):
    for i in range(1, len(fpr)):
      self.roc_auc += (tpr[i] + tpr[i-1]) * (fpr[i] - fpr[i-1]) / 2

    return self.roc_auc

  def simple_acc(self):
    true_samples = self.tp_count + self.tn_count
    all_samples = len(self.y_act)
    acc = true_samples / all_samples

    return acc

  def precision(self):
    self.precision = self.tp_count / (self.tp_count + self.fp_count)
    return self.precision

  def recall(self):
    self.recall = self.tp_count / (self.tp_count + self.fn_count)
    return self.recall

  def metrics(self):

    for i, val in enumerate(self.y_act):
      if val == self.y_pred[i] and val == self.pos_label:
        self.tp_count += 1
      elif val == self.y_pred[i] and val == self.neg_label:
        self.tn_count += 1
      elif val != self.y_pred[i] and val == self.neg_label:
        self.fp_count += 1
      elif val != self.y_pred[i] and val == self.pos_label:
        self.fn_count += 1
    
    self.precision()
    self.recall()

    return self.tp_count, self.fn_count
  
  def confusion_matrix(self):
    cm = np.array([[self.tp_count, self.fp_count], [self.fn_count, self.tn_count]])
    return cm

In [None]:
X, y = processing(adeno_dir, large_dir, normal_dir, squamous_dir)

X = np.array(X)
y = np.array(y)

X = X.reshape(X.shape[0], -1)
X, y = shuffle(X, y)

613 613


In [None]:
# print(y)
# cv2.imwrite('img-[9].png',X[9])
# for i in y:
#   print(i)

True

In [None]:
X_test, y_test = processing(adeno_test_dir, large_test_dir, normal_test_dir, squamous_test_dir)

X_test = np.array(X_test)
y_test = np.array(y_test)

X_test = X_test.reshape(X_test.shape[0], -1)
X_test, y_test = shuffle(X_test, y_test)

315 315


In [None]:
X_valid, y_valid = processing(adeno_valid_dir, large_valid_dir, normal_valid_dir, squamous_valid_dir)

X_valid = np.array(X_valid)
y_valid = np.array(y_valid)

X_valid = X_valid.reshape(X_valid.shape[0], -1)
X_valid, y_valid = shuffle(X_valid, y_valid)

72 72


In [None]:
svm_clf1 = LinearSVC(penalty='l1', dual=False, tol=0.0001, C=1.0, random_state=42)
svm_clf1.fit(X, y)

svm_clf2 = LinearSVC(penalty='l2', dual=False, tol=0.0001, C=1.0, random_state=42)
svm_clf2.fit(X, y)

In [None]:
y_pred_svm_test = svm_clf1.predict(X_test)
y_pred_svm_valid = svm_clf1.predict(X_valid)

y_pred_svm2_test = svm_clf2.predict(X_test)
y_pred_svm2_valid = svm_clf2.predict(X_valid)

f1_svm_test = f1_score(y_test, y_pred_svm_test)
f1_svm_valid = f1_score(y_valid, y_pred_svm_valid)

f1_svm2_test = f1_score(y_test, y_pred_svm2_test)
f1_svm2_valid = f1_score(y_valid, y_pred_svm2_valid)

print('With L1 Regularization : ', f1_svm_test, f1_svm_valid)
print('With L2 Regularization : ', f1_svm2_test, f1_svm2_valid)

With L1 Regularization :  0.9710982658959537 0.9666666666666667
With L2 Regularization :  0.9730769230769232 0.9747899159663865


In [None]:
# RF Classifier

# param_grid = {
    # 'max_depth': [2, 4, 6, 8, 10]
    # 'min_samples_split': [2, 4, 6, 8, 10, 12, 14, 16, 18]
    # 'n_estimators': [10, 50, 100, 200]
    # }

# max_depth = 6             # Orman derinliği
# min_samples_split = 2     # 
# n_estimators = 100        # Ağaç sayısı

# grid_search = GridSearchCV(rf_clf, param_grid=param_grid, cv=5)
# grid_search.fit(X, y)
# print(grid_search.best_params_)


rf_l1 = RandomForestClassifier(n_estimators=100, max_depth=6, criterion="gini", min_samples_split=5, random_state=42)
rf_l2 = RandomForestClassifier(n_estimators=100, max_depth=6, criterion="entropy", min_samples_split=10, random_state=42)

rf_l1.fit(X, y)
rf_l2.fit(X, y)

In [None]:
y_pred_rf_test = rf_l1.predict(X_test)
y_pred_rf_valid = rf_l1.predict(X_valid)

y_pred_rf2_test = rf_l2.predict(X_test)
y_pred_rf2_valid = rf_l2.predict(X_valid)

f1_rf_test = f1_score(y_test, y_pred_rf_test)
f1_rf_valid = f1_score(y_valid, y_pred_rf2_valid)

f1_rf2_test = f1_score(y_test, y_pred_rf2_test)
f1_rf2_valid = f1_score(y_valid, y_pred_rf2_valid)

print('With L1 Regularization: ', f1_rf_test, f1_rf_valid)
print('With L2 Regularization: ', f1_rf2_test, f1_rf2_valid)

With L1 Regularization:  0.9980879541108987 0.9752066115702479
With L2 Regularization:  0.9961832061068702 0.9752066115702479


In [None]:
mlp = Sequential()
mlp.add(Dense(400, input_dim=X.shape[1], activation='relu'))
# mlp.add(Dense(60, activation='relu'))
# mlp.add(Dense(30, activation='relu'))
# mlp.add(Dense(60, activation='relu'))
# mlp.add(Dense(40, activation='relu'))
mlp.add(Dense(1, activation='sigmoid'))

# mlp.compile(loss='binary_crossentropy', optimizer='sgd')
mlp.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
mlp.fit(X, y, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f26e55425e0>

In [None]:
y_pred_mlp_test = mlp.predict(X_test)
# y_pred_mlp_test = np.argmax(y_pred_mlp_test, axis=1)

y_pred_mlp_valid = mlp.predict(X_valid)
# y_pred_mlp_valid = np.argmax(y_pred_mlp_valid, axis=1)

# print(y_test)
# print(y)
print(y_pred_mlp_test)

f1_mlp_test = f1_score(y_test, y_pred_mlp_test)
f1_mlp_valid = f1_score(y_valid, y_pred_mlp_valid)

roc_auc_mlp_test = roc_auc_score(y_test, y_pred_mlp_test)
roc_auc_mlp_valid = roc_auc_score(y_valid, y_pred_mlp_valid)

print('F1 Score: ', f1_mlp_test, f1_mlp_valid)
print('ROC-AUC Score: ', roc_auc_mlp_test, roc_auc_mlp_valid)