In [None]:
#Labraries
import os
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.efficientnet_v2 import preprocess_input, EfficientNetV2S
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator


In [None]:
#Connecting to Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#Path to images
base_dir = '/content/drive/My Drive/Cancer/AugImages/'

def load_and_preprocess_image(img_path, target_size=(224, 224)):
    img = image.load_img(img_path, target_size=target_size)
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    return preprocess_input(img_array)

#Sample loading of images from a folder
class_names = ['Neg250', 'AugHSIL', 'AugLSIL', 'AugSCC']
dataset = {}

for class_name in class_names:
    class_dir = os.path.join(base_dir, class_name)
    img_paths = [os.path.join(class_dir, img_name) for img_name in os.listdir(class_dir)]
    dataset[class_name] = [(img_path, load_and_preprocess_image(img_path)) for img_path in img_paths]
print(f"Loaded {len(dataset['Neg250'])} images for class 'Neg250'")

Loaded 250 images for class 'Neg250'


In [None]:
#Loading EfficientNetV2-S
base_model = EfficientNetV2S(weights='imagenet', include_top=False)

#Freezing all layers
for layer in base_model.layers:
    layer.trainable = False

#Adding new layers for classification
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(len(class_names), activation='softmax')(x)  #Number of classes

#Creating a new model
model = Model(inputs=base_model.input, outputs=predictions)

#Model setup
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

Модель для дообучения создана


In [None]:
#Data splitting
train_datagen = ImageDataGenerator(
    rescale=1.0/255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2
)

train_generator = train_datagen.flow_from_directory(
    base_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='training'
)

validation_generator = train_datagen.flow_from_directory(
    base_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='validation'
)

Found 800 images belonging to 4 classes.
Found 200 images belonging to 4 classes.
Данные для обучения и валидации подготовлены


In [None]:
#Training
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // validation_generator.batch_size,
    epochs=10
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Модель успешно обучена


In [None]:
#Unfreezing the top layers of the model
for layer in base_model.layers[-10:]:
    layer.trainable = True

#Compiling the model with a reduced learning rate
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

#Fine-tuning
history_fine = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // validation_generator.batch_size,
    epochs=10
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Модель успешно дообучена


In [None]:
#Feature extraction model
feature_extractor = Model(inputs=base_model.input, outputs=base_model.output)
x = GlobalAveragePooling2D()(feature_extractor.output)
feature_model = Model(inputs=feature_extractor.input, outputs=x)

Модель для извлечения признаков создана


In [None]:
#Feature extraction function
def extract_features(img_array):
    return feature_model.predict(img_array).flatten()

#Feature extraction for all images and saving them to CSV files
for class_name, img_list in dataset.items():
    features = []
    filenames = []
    for img_path, img_array in img_list:
        feature_vector = extract_features(img_array)
        features.append(feature_vector)
        filenames.append(os.path.basename(img_path))

    #Creating a DataFrame with filenames and features
    df = pd.DataFrame(features)
    df.insert(0, 'filename', filenames)

    #Saving the DataFrame to a CSV file
    df.to_csv(f'/content/drive/My Drive/extracted_features_{class_name}.csv', index=False)

#Checking
df_loaded = pd.read_csv('/content/drive/My Drive/extracted_features_Neg250.csv')
print(f"Loaded features shape from CSV: {df_loaded.shape}")

Loaded features shape from CSV: (250, 1281)
