In [None]:
from keras.applications import VGG16
from keras.models import Sequential
from keras.layers import Dense, Flatten, BatchNormalization, Dropout
from keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import numpy as np
import cv2
import os
import pandas as pd

from functions.rename_split import copy_and_rename_files, filter_files
from functions.load_data import collect_data, load_data
from functions.normalize import normalize_images
from functions.plot_images import plot_images

In [None]:
# Define paths
source_dir = "img/raw/"
splits_dir = "img/splits/"
train_dir = "img/splits/train/"
test_dir = "img/splits/test/"
val_dir = "img/splits/val/"

# Create train, test, and val directories if they don't exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

## Splitting data in 3 parts (train, test and val)

In [None]:
# Initialize global counter
global_counter = 1

# Loop through 'yes' and 'no' folders
for category in ['yes', 'no']:
    category_dir = os.path.join(source_dir, category)
    # Get list of image files
    files = os.listdir(category_dir)
    # Filter out files ending with 'Zone.Identifier'
    files = filter_files(files)
    # Split files into train, test, and val sets
    train_files, test_val_files = train_test_split(files, test_size=0.3, random_state=42) # 60% train
    test_files, val_files = train_test_split(test_val_files, test_size=0.5, random_state=42) # 20% test, 20% val
    # Create category directories in train, test, and val directories if they don't exist
    os.makedirs(os.path.join(train_dir, category), exist_ok=True)
    os.makedirs(os.path.join(test_dir, category), exist_ok=True)
    os.makedirs(os.path.join(val_dir, category), exist_ok=True)
    # Copy and rename files to respective directories
    global_counter = copy_and_rename_files(category_dir, os.path.join(train_dir, category), train_files, "img_", global_counter)
    global_counter = copy_and_rename_files(category_dir, os.path.join(test_dir, category), test_files, "img_", global_counter)
    global_counter = copy_and_rename_files(category_dir, os.path.join(val_dir, category), val_files, "img_", global_counter)

print("Images have been copied and renamed in train, test, and val sets successfully.")

## Creating a dataframe from data

In [None]:
splits = ['train', 'test', 'val']

data = []

for split in splits:
    for tumor_type in ['no', 'yes']:
        data += collect_data(os.path.join(splits_dir, split, tumor_type), tumor_type, split)

df = pd.DataFrame(data)

In [None]:
df

## Loading and splitting data in train, test and val

In [None]:
X_train, y_train, labels = load_data(train_dir)
X_test, y_test, _ = load_data(test_dir)
X_val, y_val, _ = load_data(val_dir)

## Normalizing data for better model fitting

In [None]:
X_train_norm = normalize_images(X_train, target_size=(224, 224), apply_sharpening=True, apply_sobel=False)
X_test_norm = normalize_images(X_test, target_size=(224, 224), apply_sharpening=True, apply_sobel=False)
X_val_norm = normalize_images(X_val, target_size=(224, 224), apply_sharpening=True, apply_sobel=False)

In [None]:
plot_images(X_train_norm, y_train, 20)

## Model training and plotting results

In [None]:
# Créer un modèle VGG-16 pré-entraîné (ne pas inclure la couche dense finale)
base_model = VGG16(include_top=False, input_shape=(224, 224, 3))

NUM_CLASSES = 1

model = Sequential()
model.add(base_model)
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(NUM_CLASSES, activation='sigmoid'))

# figer les poids du VGG
model.layers[0].trainable = False

# Compiler le modèle
model.compile(
    loss='binary_crossentropy',
    optimizer=RMSprop(lr=1e-4),
    metrics=['accuracy']
)

# Afficher la structure du modèle
model.summary()

# Créer un générateur d'images pour la data augmentation
datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.05,
    height_shift_range=0.05,
    rescale=1./255,
    shear_range=0.05,
    brightness_range=[0.1, 1.5],
    horizontal_flip=True,
    vertical_flip=True
)

# Ajuster le générateur aux données d'entraînement
datagen.fit(X_train_norm)

# Entraîner le modèle avec l'augmentation de données
history = model.fit(datagen.flow(X_train_norm, y_train, batch_size=32),
          epochs=10,
          steps_per_epoch=len(X_train_norm) // 32,
          validation_data=(X_val_norm, y_val))

In [None]:
model.evaluate(X_test_norm, y_test)

In [None]:
def plot_metrics(history):
    train_loss = history['loss']
    val_loss = history['val_loss']
    train_acc = history['accuracy']
    val_acc = history['val_accuracy']

# Loss
    plt.figure()
    plt.plot(train_loss, label='Training Loss')
    plt.plot(val_loss, label='Validation Loss')
    plt.title('Loss')
    plt.legend()
    plt.show()

# Accuracy
    plt.figure()
    plt.plot(train_acc, label='Training Accuracy')
    plt.plot(val_acc, label='Validation Accuracy')
    plt.title('Accuracy')
    plt.legend()
    plt.show()

In [None]:
plot_metrics(history.history)