<a href="https://colab.research.google.com/github/navyarawal/Osteosarcoma_Detection/blob/main/test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Mount Google Drive to access the data
from google.colab import drive
drive.mount('/content/drive')

# Import necessary libraries
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Define the paths to the data in your Google Drive
data_dir = '/content/drive/MyDrive/Osteosarcoma'
train_dir = os.path.join(data_dir, 'TrainingSet')
val_dir = os.path.join(data_dir, 'ValidationSet')
test_dir = os.path.join(data_dir, 'TestSet')
train_csv = os.path.join(data_dir, 'TrainingSetData.csv')
val_csv = os.path.join(data_dir, 'ValidationSetData.csv')
test_csv = os.path.join(data_dir, 'TestSetData.csv')

# Define label-to-integer mapping
label_map = {
    'Non-Viable-Tumor': 0,
    'Non-Tumor': 1,
    'Viable': 2
}

# Data augmentation and transformation
data_gen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    vertical_flip=True
)

# Custom dataset class with one-hot encoding for labels
class CustomDataset(tf.keras.utils.Sequence):
    def __init__(self, csv_file, root_dir, batch_size=16, shuffle=True):
        self.data = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.data) / self.batch_size))

    def __getitem__(self, idx):
        batch_data = self.data.iloc[idx * self.batch_size:(idx + 1) * self.batch_size]
        images = []
        labels = []
        for _, row in batch_data.iterrows():
            img_name = os.path.join(self.root_dir, row['img_name'])
            try:
                image = tf.keras.preprocessing.image.load_img(img_name, target_size=(375, 375))
                image = tf.keras.preprocessing.image.img_to_array(image)
                images.append(image)
                label = label_map[row['class']]
                labels.append(tf.keras.utils.to_categorical(label, num_classes=3))
            except FileNotFoundError:
                print()

        images = np.array(images)
        labels = np.array(labels)
        return images, labels

    def on_epoch_end(self):
        if self.shuffle:
            self.data = self.data.sample(frac=1).reset_index(drop=True)

# Create datasets and dataloaders with the updated column names
train_dataset = CustomDataset(train_csv, train_dir, batch_size=32, shuffle=True)  # Reduce batch size for training
val_dataset = CustomDataset(val_csv, val_dir, batch_size=16, shuffle=False)
test_dataset = CustomDataset(test_csv, test_dir, batch_size=8, shuffle=False)  # Reduce batch size for testing

# Create a simpler CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(375, 375, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.7),  # Increase dropout rate
    Dense(256, activation='relu'),
    Dropout(0.7),  # Increase dropout rate
    Dense(3, activation='softmax')
])

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:



pretrained_weights_path = '/content/drive/MyDrive/Osteosarcoma/model_weights_simpler_cnn(8090).h5'
if os.path.exists(pretrained_weights_path):
    model.load_weights(pretrained_weights_path)
    print("Pre-trained weights loaded successfully!")
else:
    print("Pre-trained weights not found. Starting training from scratch.")

# Compile the model with categorical cross-entropy loss and a smaller learning rate
optimizer = Adam(learning_rate=0.00001)  # Reduce the learning rate
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# Define EarlyStopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Define Redu1eLROnPlateau callback
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=2, verbose=1, min_lr=0.0001)

# Training the model with EarlyStopping and ReduceLROnPlateau callbacks
num_epochs = 1  # Change this to the desired number of epochs
for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}")
    model.fit(train_dataset, validation_data=val_dataset, epochs=1, callbacks=[early_stopping, reduce_lr])
#model.fit(train_dataset, validation_data=val_dataset, epochs=1, callbacks=[early_stopping, reduce_lr])

# Save model weights
model.save_weights('/content/drive/MyDrive/Osteosarcoma/model_weights_simpler_cnn.h5')



# Evaluate on test set
test_loss, test_accuracy = model.evaluate(test_dataset)
test_accuracy_percentage = test_accuracy * 100
print(f'Test Loss: {test_loss:.4f} | Test Accuracy: {test_accuracy_percentage:.2f}%')



Pre-trained weights loaded successfully!
Epoch 1/1


 4/31 [==>...........................] - ETA: 13s - loss: 0.3130 - accuracy: 0.8504

Test Loss: 0.5187 | Test Accuracy: 80.90%


In [None]:
from sklearn.metrics import confusion_matrix, classification_report
!pip install scikit-learn

y_true = []
y_pred = []

for batch in test_dataset:
    images, labels = batch
    y_true.extend(np.argmax(labels, axis=1))  # Convert one-hot encoded labels back to integer labels
    predictions = model.predict(images)
    y_pred.extend(np.argmax(predictions, axis=1))

# Calculate the confusion matrix
cm = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:")
print(cm)

# Calculate the recall, precision, and F1 score
report = classification_report(y_true, y_pred, target_names=label_map.keys())
print("Classification Report:")
print(report)


Confusion Matrix:
[[64  9  4]
 [16 78  2]
 [ 4  3 19]]
Classification Report:
                  precision    recall  f1-score   support

Non-Viable-Tumor       0.76      0.83      0.80        77
       Non-Tumor       0.87      0.81      0.84        96
          Viable       0.76      0.73      0.75        26

        accuracy                           0.81       199
       macro avg       0.80      0.79      0.79       199
    weighted avg       0.81      0.81      0.81       199

