In [None]:
from tensorflow import keras

In [None]:
import tensorflow as tf
#import the network
from keras.applications import DenseNet169

In [None]:
import sklearn.svm
from sklearn.model_selection import train_test_split, KFold
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report
from sklearn.metrics import precision_recall_fscore_support

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

In [None]:
conv_base = DenseNet169(weights='imagenet',
                  include_top=False,
                  input_shape=(224, 224, 3))

In [None]:
conv_base.summary()

In [None]:
#mount drive with colab to import the dataset
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os

In [None]:
main_dir="/content/drive/MyDrive/BC_dataset/M400"

In [None]:
dataset = tf.keras.utils.image_dataset_from_directory(
  main_dir,
  validation_split= None,
  subset= None,
  shuffle=False,
  seed=None,
  image_size=(224, 224),
  batch_size=32)

In [None]:
import numpy as np

In [None]:
from tensorflow.keras.models import Model

In [None]:
#feature extraction (baseline)
#change the preprocessing part based on the network
def extract_features(data):
  all_features  = []
  all_labels  = []
  for images, labels in data:
    preprocessed_img  = tf.keras.applications.densenet.preprocess_input(images)
    features  = conv_base.predict(preprocessed_img)
    all_features.append(features)
    all_labels.append(labels)
  return np.concatenate(all_features) , np.concatenate(all_labels)

In [None]:
train_features, train_labels= extract_features(dataset)

In [None]:
train_features= train_features.reshape(1820, -1 )

In [None]:
#classification with baseline features
K=10
# Initialize lists to store the metrics for each iteration
accuracies = []
precisions = []
recalls = []
f1_scores = []
roc_aucs = []
scores = []
for i in range(K):
  x_train, x_test, y_train, y_test = train_test_split(train_features, train_labels, test_size=0.20,random_state=42+i)
  SVM = sklearn.svm.SVC(kernel='rbf',C=5)
  classifier=SVM.fit(x_train, y_train)
  s=SVM.score(x_test, y_test)
  y_pred = SVM.predict(x_test)
  accuracy = accuracy_score(y_test, y_pred)
  precision = precision_score(y_test, y_pred)
  recall = recall_score(y_test, y_pred)
  f1 = f1_score(y_test, y_pred)


    # Append the metrics to the lists
  accuracies.append(accuracy)
  precisions.append(precision)
  recalls.append(recall)
  f1_scores.append(f1)

  print(i,'\n ')
  print('prediction = ',y_pred)
  print('GroundTruth = ', y_test)
  print('length of xtest', len(x_test))
  print("The score for this classification is: ", s)
  print(classification_report(y_test,y_pred))
  print(classification_report(y_test,y_pred))
  scores.append(s)
print("The mean and standard deviation of classification  is :", np.mean(scores), np.std(scores))
print("max  = ",max(scores),"min  = ",min(scores))
avg_accuracy = np.mean(accuracies)
avg_precision = np.mean(precisions)
avg_recall = np.mean(recalls)
avg_f1_score = np.mean(f1_scores)

print(f'Average Accuracy: {avg_accuracy:.4f}', "std= ", np.std(accuracies))
print(f'Average Precision: {avg_precision:.4f}', "std= ", np.std(precisions) )
print(f'Average Recall: {avg_recall:.4f}',  "std= ", np.std(recalls) )
print(f'Average F1 Score: {avg_f1_score:.4f}',  "std= ",np.std(f1_scores) )


In [None]:
#feature extraction from middle layers
#change the preprocessing part based on the network
def extract_features_middlelayers(data,layer_name):
   all_features  = []
   all_labels  = []
   intermediate_layer  = conv_base.get_layer(layer_name)
   intermediate_model  = Model(inputs = conv_base.inputs, outputs  = intermediate_layer.output)
   for images, labels in data:
      preprocessed_img  = tf.keras.applications.densenet.preprocess_input(images)
      intermediate_output = intermediate_model.predict(preprocessed_img)
      all_features.append(intermediate_output)
      all_labels.append(labels)


   return np.concatenate(all_features) , np.concatenate(all_labels)

In [None]:
#extract features from a specific layer based on its name (from network's summary)
train_features_S3B2C, train_labels_S3B2C= extract_features_middlelayers(data=dataset,layer_name="convnext_tiny_stage_3_block_2_depthwise_conv")

In [None]:
train_features_S3B2C=train_features_S3B2C.reshape(1820,-1)

In [None]:
from sklearn.feature_selection import mutual_info_classif

In [None]:
import pandas as pd

In [None]:
train_features_S3B2C = pd.DataFrame(train_features_S3B2C)
train_labels_S3B2C =  pd.DataFrame(train_labels_S3B2C)

In [None]:
def make_mi_scores(X, y):
    mi_scores = mutual_info_classif(X,y)
    mi_scores = pd.Series(mi_scores, name="MI Scores", index=X.columns)
    mi_scores = mi_scores.sort_values(ascending=False)
    return mi_scores

In [None]:
#calculate Mutual Information
mi_scorce_S3B2C= make_mi_scores(train_features_S3B2C, train_labels_S3B2C)

In [None]:
max(mi_scorce_S3B2C)

0.07791213456427082

In [None]:
import sklearn.svm
from sklearn.model_selection import train_test_split, KFold
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report
from sklearn.metrics import precision_recall_fscore_support

In [None]:
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

In [None]:
import sys

In [None]:
np.set_printoptions(precision=20)

PCA on features from selected layers

In [None]:
from sklearn.decomposition import PCA
pca = PCA(svd_solver='full').fit(train_features_S3B2C)
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.xlabel('number of components')
plt.ylabel('cumulative explained variance')
plt.grid()

In [None]:
sc = StandardScaler()
train_features_S3B2C=sc.fit_transform(train_features_S3B2C)
pca = PCA(svd_solver='full').fit(train_features_S3B2C)

In [None]:
np.set_printoptions(threshold=sys.maxsize)
sum=(pca.explained_variance_ratio_.cumsum())
print(sum)

In [None]:
indices = np.where(sum>=0.05)
n_component=(indices[0][0])
print(n_component)

In [None]:

K=10
#change CEV to find the optimum
CV_vec = range(5,105,5)
scores_no_pca = []
scores_pca = np.zeros((K,len(CV_vec)), dtype=float)
for i in range(K):
  x_train, x_test, y_train, y_test = train_test_split(train_features_S3B2C, train_labels_S3B2C, test_size=0.20,random_state=42+i)

  for idx,cv in enumerate(CV_vec):
    try:
       indices = np.where(sum >= cv/100.0)
       n_component=(indices[0][0])
    except:
       n_component=len(sum)

    pca=PCA(n_components=n_component, svd_solver='full')

    SVM = sklearn.svm.SVC(kernel='rbf',C=5)
    try:
      x_train_pca=pca.fit_transform(x_train)
    except:
      pca=PCA(n_components=len(y_train), svd_solver='full')
      x_train_pca=pca.fit_transform(x_train)

    x_test_pca=pca.transform(x_test)
    classifier=SVM.fit(x_train_pca, y_train)
    s1=SVM.score(x_test_pca, y_test)
    y_pred1 = SVM.predict(x_test_pca)

    print('cv=', cv,'\n ')
    print(i,'\n ')
    print('prediction = ',y_pred1)
    print('GroundTruth = ', y_test)
    print('length of xtest', len(x_test))
    print("The score for this classification is: ", s1)
    print("#component = ",n_component)
    scores_pca[i][idx] = s1
print("The mean and standard deviation of classification with pca is :", np.mean(scores_pca), np.std(scores_pca))

In [None]:
scores_pca_pd=pd.DataFrame(data=scores_pca)
scores_pca_pd.rename(columns={x:y for x,y in zip(scores_pca_pd.columns,range(5,105,5))},inplace=True)
scores_pca_pd

In [None]:
pca = PCA(n_components=)
pca.fit(train_features_S3B2C)
train_features_S3B2C = pca.transform(train_features_S3B2C)

In [None]:
#save features (considering a specific CEV) as a CSV file
pd.DataFrame(train_features_S3B2C).to_csv('/content/drive/MyDrive/features and labels/S3B2C.csv', index=False)