In [1]:
%pip install timm
import torch
import timm
import os
import torchvision
from torchvision import transforms
from torchvision.transforms.functional import InterpolationMode
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, accuracy_score,ConfusionMatrixDisplay
import seaborn as sns
import sys
sys.path.append('../input/pytorchimagemodels')


Note: you may need to restart the kernel to use updated packages.


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
data_path = "/kaggle/input/ham10001/Data folder"
train_crop_size = 299
interpolation = "bilinear"
val_crop_size = 299
val_resize_size = 299
model_name = "inception_v3"
pretrained = True
batch_size = 32
num_workers = 4
learning_rate = 0.001
momentum = 0.9
weight_decay = 1e-4
lr_step_size = 30
lr_gamma = 0.001
epochs = 100
train_dir = os.path.join(data_path,"train")
val_dir = os.path.join(data_path, "val")

In [3]:
interpolation = InterpolationMode(interpolation)

TRAIN_TRANSFORM_IMG = transforms.Compose([

transforms.RandomResizedCrop(train_crop_size, interpolation=interpolation),
    transforms.PILToTensor(),
    transforms.ConvertImageDtype(torch.float),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                        std=[0.229, 0.224, 0.225] )
])


dataset =torchvision.datasets.ImageFolder(
    train_dir,
    transform=TRAIN_TRANSFORM_IMG
)
TEST_TRANSFORM_IMG = transforms.Compose([
    transforms.Resize(val_resize_size, interpolation=interpolation),
    transforms.CenterCrop(val_crop_size),
    transforms.PILToTensor(),
    transforms.ConvertImageDtype(torch.float),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                        std=[0.229, 0.224, 0.225] )
    ])

dataset_test = torchvision.datasets.ImageFolder(
    val_dir,
    transform=TEST_TRANSFORM_IMG
)

print("Creating data loaders")
train_sampler = torch.utils.data.RandomSampler(dataset)
test_sampler = torch.utils.data.SequentialSampler(dataset_test)

data_loader = torch.utils.data.DataLoader(
    dataset,
    batch_size=batch_size,
    sampler=train_sampler,
    num_workers=num_workers,
    pin_memory=True
)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=batch_size, sampler=test_sampler, num_workers=num_workers, pin_memory=True
)

FileNotFoundError: [Errno 2] No such file or directory: '/kaggle/input/ham10001/Data folder/train'

In [None]:
print("Creating model")
print("Num classes = ", len(dataset.classes))
model = timm.create_model('inception_v4', pretrained=True)


In [None]:
image_filenames_train=[sample[0] for sample in dataset.samples]
image_filenames_test=[sample[0] for sample in dataset_test.samples]

In [None]:
model.aux_logits = False
model.AuxLogits = None

In [None]:
model.to(device)

In [None]:
criterion = torch.nn.CrossEntropyLoss()

In [None]:
optimizer = torch.optim.SGD(
    model.parameters(),
    lr=learning_rate,
    momentum=momentum,
    weight_decay=weight_decay,

)

In [None]:
n_epochs = 10
early_stopping_tolerance = 3
early_stopping_threshold = 0.03
early_stopping_counter = 0
best_loss = float('inf')
best_model_wts = model.state_dict()

In [None]:
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=lr_step_size, gamma=lr_gamma)

In [None]:
train_loss_history = []
epoch_train_losses = []
epoch_accuracies = []
epoch_precisions = []
epoch_recalls = []
epoch_f1_scores = []
print("Start training")
for epoch in range(epochs):
        model.train()
        epoch_loss = 0
        len_dataset = 0
        for step, (image, target) in enumerate(data_loader):
            image, target = image.to(device), target.to(device)
            output = model(image)
            loss = criterion(output, target)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_loss += output.shape[0] * loss.item()
            len_dataset += output.shape[0];
            if step % 10 == 0:
                print('Epoch: ', epoch, '| step : %d' % step, '| train loss : %0.4f' % loss.item() )
        epoch_loss = epoch_loss / len_dataset
        epoch_train_losses.append(epoch_loss)
        print('Epoch: ', epoch, '| train loss :  %0.4f' % epoch_loss )
        print('Lenght Data Set :  %0.4f' % len_dataset )
        # Early stopping
        if epoch>29:
          torch.save(model.state_dict(), '/kaggle/working/inceptionv4.pt')
        if epoch_loss < best_loss:
            best_loss = epoch_loss
            best_model_wts = model.state_dict()
            early_stopping_counter = 0
        else:
            early_stopping_counter += 1

        if early_stopping_counter >= early_stopping_tolerance or best_loss <= early_stopping_threshold:
            print("\nTerminating: early stopping")
            epochs=epoch
            break
        lr_scheduler.step()

model.load_state_dict(best_model_wts)

In [None]:
torch.save(model.state_dict(), '/kaggle/working/inceptionv4.pt')

In [None]:
epoch_test_loss_intrain=[]
misclassified_images_train = []

# Set the model to evaluation mode
model.eval()

# Iterate over epochs
for epoch in range(epochs):
    predicted_labels_train = []
    ground_truth_labels_train = []
    with torch.no_grad():  # No gradient calculation during evaluation
        running_loss = 0
        # Iterate over the training data loader
        for step, (image, target) in enumerate(data_loader):
            image, target = image.to(device), target.to(device)
            output = model(image)
            _, predicted = torch.max(output, 1)  # Get the predicted labels
            predicted_labels_train.extend(predicted.cpu().numpy())  # Convert to numpy array and add to predicted labels list
            ground_truth_labels_train.extend(target.cpu().numpy())
            loss = criterion(output, target)
            running_loss += loss.item()
        running_loss /= len(data_loader)
        epoch_test_loss_intrain.append(running_loss)
        print(f'Epoch {epoch + 1} test loss: {running_loss:.4f}')

        # Identify misclassified images
        misclassified_indices = [i for i, (true, pred) in enumerate(zip(ground_truth_labels_train, predicted_labels_train)) if true != pred]
        misclassified_images_epoch = [image_filenames_train[i] for i in misclassified_indices]
        misclassified_images_train.extend(misclassified_images_epoch)

# Save misclassified images to a .txt file
misclassified_file_train = 'misclassified_images_train.txt'
with open(misclassified_file_train, 'w') as f:
    for img in misclassified_images_train:
        f.write(f"{img}\n")

print(f"Misclassified images during training saved to {misclassified_file_train}")

In [None]:
epoch_test_loss_intest=[]
misclassified_images = []

# Set the model to evaluation mode
model.eval()

# Iterate over epochs
for epoch in range(epochs):
    predicted_labels_test = []
    ground_truth_labels_test = []
    with torch.no_grad():  # No gradient calculation during evaluation
        running_loss = 0
        # Iterate over the test data loader
        for step, (image, target) in enumerate(data_loader_test):
            image, target = image.to(device), target.to(device)
            output = model(image)
            _, predicted = torch.max(output, 1)  # Get the predicted labels
            predicted_labels_test.extend(predicted.cpu().numpy())  # Convert to numpy array and add to predicted labels list
            ground_truth_labels_test.extend(target.cpu().numpy())
            loss = criterion(output, target)
            running_loss += loss.item()
        running_loss /= len(data_loader_test)
        epoch_test_loss_intest.append(running_loss)
        print(f'Epoch {epoch + 1} test loss: {running_loss:.4f}')

        # Identify misclassified images
        misclassified_indices = [i for i, (true, pred) in enumerate(zip(ground_truth_labels_test, predicted_labels_test)) if true != pred]
        misclassified_images_epoch = [image_filenames_test[i] for i in misclassified_indices]
        misclassified_images.extend(misclassified_images_epoch)

# Save misclassified images to a .txt file
misclassified_file = 'misclassified_images_test.txt'
with open(misclassified_file, 'w') as f:
    for img in misclassified_images:
        f.write(f"{img}\n")

print(f"Misclassified images saved to {misclassified_file}")

In [None]:
cm_train= confusion_matrix(ground_truth_labels_train,predicted_labels_train)
disp = ConfusionMatrixDisplay(confusion_matrix=cm_train, display_labels=["Benign", "Malignant"])
disp.plot()
plt.show()
TP = cm_train[1, 1]
TN = cm_train[0, 0]
FP = cm_train[0, 1]
FN = cm_train[1, 0]
accuracy = (TP + TN) / float(TP + TN + FP + FN)
precision = TP / float(TP + FP)
recall = TP / float(TP + FN)
f1 = 2 * (precision * recall) / (precision + recall)
sensitivity = recall  # Sensitivity is the same as recall
specificity = TN / float(TN + FP)
print(f'Precision train: {precision:.2f}')
print(f'Recall train: {recall:.2f}')
print(f'F1 Score train: {f1:.2f}')
print(f'Accuracy train: {accuracy:.2f}')
print(f'Sensitivity:{sensitivity:2f}')
print(f'Specificity:{specificity:2f}')

In [None]:
cm_test= confusion_matrix(ground_truth_labels_test,predicted_labels_test)
disp = ConfusionMatrixDisplay(confusion_matrix=cm_test, display_labels=["Benign", "Malignant"])
disp.plot()
plt.show()
TP = cm_test[1, 1]
TN = cm_test[0, 0]
FP = cm_test[0, 1]
FN = cm_test[1, 0]
accuracy = (TP + TN) / float(TP + TN + FP + FN)
precision = TP / float(TP + FP)
recall = TP / float(TP + FN)
f1 = 2 * (precision * recall) / (precision + recall)
sensitivity = recall  # Sensitivity is the same as recall
specificity = TN / float(TN + FP)
print(f'Precision Test: {precision:.2f}')
print(f'Recall Test: {recall:.2f}')
print(f'F1 Score Test: {f1:.2f}')
print(f'Accuracy Test: {accuracy:.2f}')
print(f'Sensitivity Test:{sensitivity:2f}')
print(f'Specificity Test:{specificity:2f}')

In [None]:
plt.figure(figsize=(12, 5))
plt.plot(range(1, len(epoch_train_losses) + 1), epoch_train_losses, label='Training Loss')
plt.plot(range(1, len(epoch_test_loss_intrain) + 1), epoch_test_loss_intrain, label='Test Loss in Train')
plt.plot(range(1, len(epoch_test_loss_intest) + 1), epoch_test_loss_intest, label='Test Loss in Test')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()