In [None]:
import os
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Function to load the dataset
def load_dataset(data_dir):
    filepaths = []
    labels = []
    folders = os.listdir(data_dir)
    for folder in folders:
        folder_path = os.path.join(data_dir, folder)
        if os.path.isdir(folder_path):
            filelist = os.listdir(folder_path)
            for file in filelist:
                fpath = os.path.join(folder_path, file)
                filepaths.append(fpath)
                labels.append(folder)
    df = pd.DataFrame({'filepaths': filepaths, 'labels': labels})
    return df

# Load the dataset
data_dir = '/content/drive/MyDrive/lung_colon_image_set'  # Update with your dataset path
df = load_dataset(data_dir)
print(df.head())


                                           filepaths           labels
0  /content/drive/MyDrive/lung_colon_image_set/lu...  lung_image_sets
1  /content/drive/MyDrive/lung_colon_image_set/lu...  lung_image_sets
2  /content/drive/MyDrive/lung_colon_image_set/lu...  lung_image_sets


In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split

# Part 1: Load the dataset
def loading_the_data(data_dir):
    filepaths = []
    labels = []
    classes = os.listdir(data_dir)
    for cls in classes:
        class_path = os.path.join(data_dir, cls)
        if os.path.isdir(class_path):
            for filename in os.listdir(class_path):
                fpath = os.path.join(class_path, filename)
                filepaths.append(fpath)
                labels.append(cls)
    df = pd.DataFrame({'filepaths': filepaths, 'labels': labels})
    return df

# Set your dataset path
data_dir = '/content/drive/MyDrive/lung_colon_image_set'  # Update this path as necessary
df = loading_the_data(data_dir)

# Check class distribution
class_counts = df['labels'].value_counts()
print("Class distribution:\n", class_counts)

# Determine a suitable test size
# Calculate the number of instances to use for testing
n_classes = df['labels'].nunique()
max_test_size = min(class_counts.min(), 1)  # We can only take 1 instance from each class
test_size = max_test_size * n_classes / len(df)  # Proportional test size

# Part 2: Split the dataset into training and testing
train_df, test_df = train_test_split(df, test_size=test_size, stratify=df['labels'], random_state=42)

print("Training set size:", train_df.shape)
print("Test set size:", test_df.shape)


Class distribution:
 labels
lung_image_sets    3
Name: count, dtype: int64
Training set size: (2, 2)
Test set size: (1, 2)


activation function changed

In [None]:
import os

# Specify the path to your folder containing the images
folder_path = '/content/drive/MyDrive/lung_colon_image_set/lung_image_sets'

# Get a list of all subdirectories (classes)
classes = [d for d in os.listdir(folder_path) if os.path.isdir(os.path.join(folder_path, d))]

# Initialize a dictionary to store the count of images for each class
class_counts = {}

# Iterate over each class
for cls in classes:
    # Construct the full path to the class directory
    class_path = os.path.join(folder_path, cls)
    # Count the number of files in the class directory
    num_images = len([f for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))])
    # Store the count in the dictionary
    class_counts[cls] = num_images

# Print the count of images for each class
for cls, count in class_counts.items():
    print(f"Class '{cls}' has {count} images.")

Class 'lung_aca' has 5074 images.
Class 'lung_n' has 5040 images.
Class 'lung_scc' has 5051 images.


In [None]:
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import pandas as pd
import os
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report
from collections import Counter

# Function to load data and preprocess
def loading_the_data(data_dir):
    filepaths = []
    labels = []
    folders = os.listdir(data_dir)
    for folder in folders:
        folder_path = os.path.join(data_dir, folder)
        if os.path.isdir(folder_path):  # Check if it's a directory
            filelist = os.listdir(folder_path)
            for file in filelist:
                fpath = os.path.join(folder_path, file)
                filepaths.append(fpath)
                labels.append(folder)
    df = pd.DataFrame({'filepaths': filepaths, 'labels': labels})
    return df

# Load the data
data_dir = '/content/drive/MyDrive/lung_colon_image_set/lung_image_sets'
df = loading_the_data(data_dir)

# Split data into train and validation sets
from sklearn.model_selection import train_test_split
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['labels'], random_state=42)

# Image preprocessing and augmentation
batch_size = 32
img_size = (224, 224)

train_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = train_datagen.flow_from_dataframe(
    train_df,
    x_col='filepaths',
    y_col='labels',
    target_size=img_size,
    class_mode='categorical',
    color_mode='rgb',
    shuffle=True,
    batch_size=batch_size
)

validation_datagen = ImageDataGenerator(rescale=1. / 255)
validation_generator = validation_datagen.flow_from_dataframe(
    val_df,
    x_col='filepaths',
    y_col='labels',
    target_size=img_size,
    class_mode='categorical',
    color_mode='rgb',
    shuffle=False,
    batch_size=batch_size
)

# Load InceptionV3 model
inceptionv3 = InceptionV3(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
inceptionv3.trainable = False

# Hybrid model architecture
inputs = Input(shape=(224, 224, 3))
inceptionv3_features = inceptionv3(inputs)
x = Conv2D(128, (3, 3), activation='relu', padding='same')(inceptionv3_features)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2))(x)
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
outputs = Dense(3, activation='softmax')(x)  # Assuming 3 classes

# Create the hybrid model
hybrid_model = Model(inputs=inputs, outputs=outputs)

# Compile the model
hybrid_model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

# Define early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Train the hybrid model with early stopping
history = hybrid_model.fit(train_generator, epochs=10, validation_data=validation_generator, callbacks=[early_stopping])

# Save the model
model_save_path = '/content/drive/MyDrive/Model/inception.h5'
hybrid_model.save(model_save_path)
print("Model saved at:", model_save_path)

# Predict classes for validation data
val_preds = hybrid_model.predict(validation_generator)
val_preds = np.argmax(val_preds, axis=1)

# Get true labels for validation data
true_labels = validation_generator.classes


# Display confusion matrix
conf_matrix = confusion_matrix(true_labels, val_preds)
print("Confusion Matrix:")
print(conf_matrix)

# Display classification report
class_names = ['Lung_adenocarcinoma', 'Lung_benign_tissue', 'Lung squamous_cell_carcinoma']
print("\nClassification Report:")
print(classification_report(true_labels, val_preds, target_names=class_names))

# Count the occurrences of each class label in the training data
train_class_counts = Counter(train_generator.classes)

# Count the occurrences of each class label in the validation data
val_class_counts = Counter(validation_generator.classes)

# Print the number of images used for training and testing in each class
print("\nTraining data class counts:")
for class_name, count in train_class_counts.items():
    print(f"{class_name}: {count}")

print("\nValidation data class counts:")
for class_name, count in val_class_counts.items():
    print(f"{class_name}: {count}")

Found 12132 validated image filenames belonging to 3 classes.
Found 3033 validated image filenames belonging to 3 classes.
Epoch 1/10


  self._warn_if_super_not_called()


[1m380/380[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5184s[0m 13s/step - accuracy: 0.8392 - loss: 0.4564 - val_accuracy: 0.9565 - val_loss: 0.1128
Epoch 2/10
[1m380/380[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m111s[0m 272ms/step - accuracy: 0.9590 - loss: 0.1124 - val_accuracy: 0.9542 - val_loss: 0.1490
Epoch 3/10
[1m380/380[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 272ms/step - accuracy: 0.9686 - loss: 0.0844 - val_accuracy: 0.9397 - val_loss: 0.1862
Epoch 4/10
[1m380/380[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 270ms/step - accuracy: 0.9799 - loss: 0.0515 - val_accuracy: 0.9664 - val_loss: 0.0892
Epoch 5/10
[1m380/380[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m129s[0m 334ms/step - accuracy: 0.9850 - loss: 0.0388 - val_accuracy: 0.9677 - val_loss: 0.1046
Epoch 6/10
[1m380/380[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m116s[0m 267ms/step - accuracy: 0.9888 - loss: 0.0354 - val_accuracy: 0.9690 - val_loss: 0.1186
Epoch 7/10
[1m3



Model saved at: /content/drive/MyDrive/Model/inception.h5
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 253ms/step
Confusion Matrix:
[[ 987    2   26]
 [   3 1005    0]
 [  71    0  939]]

Classification Report:
                              precision    recall  f1-score   support

         Lung_adenocarcinoma       0.93      0.97      0.95      1015
          Lung_benign_tissue       1.00      1.00      1.00      1008
Lung squamous_cell_carcinoma       0.97      0.93      0.95      1010

                    accuracy                           0.97      3033
                   macro avg       0.97      0.97      0.97      3033
                weighted avg       0.97      0.97      0.97      3033


Training data class counts:
1: 4032
0: 4059
2: 4041

Validation data class counts:
0: 1015
2: 1010
1: 1008


In [None]:
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os

# Function to load data and preprocess
def loading_the_data(data_dir):
    filepaths = []
    labels = []
    folds = os.listdir(data_dir)
    for fold in folds:
        foldpath = os.path.join(data_dir, fold)
        filelist = os.listdir(foldpath)
        for file in filelist:
            fpath = os.path.join(foldpath, file)
            filepaths.append(fpath)
            labels.append(fold)
    df = pd.DataFrame({'filepaths': filepaths, 'labels': labels})
    return df

# Change label names to its original names
def change_label_names(df, column_name):
    index = {'lung_aca': 'Lung_adenocarcinoma', 'lung_n': 'Lung_benign_tissue', 'lung_scc': 'Lung squamous_cell_carcinoma'}
    df[column_name] = df[column_name].replace(index)

# Load the data
data_dir = '/content/drive/MyDrive/lung_colon_image_set/lung_image_sets'
df = loading_the_data(data_dir)
change_label_names(df, 'labels')

# Split the dataset
train_df, ts_df = train_test_split(df, train_size=0.8, shuffle=True, random_state=42)
valid_df, test_df = train_test_split(ts_df, train_size=0.5, shuffle=True, random_state=42)

# Image preprocessing and augmentation
batch_size = 32
img_size = (224, 224)

train_datagen = ImageDataGenerator(rescale=1. / 255)
valid_test_datagen = ImageDataGenerator(rescale=1. / 255)

train_generator = train_datagen.flow_from_dataframe(
    train_df,
    x_col='filepaths',
    y_col='labels',
    target_size=img_size,
    class_mode='categorical',
    color_mode='rgb',
    shuffle=True,
    batch_size=batch_size
)

valid_generator = valid_test_datagen.flow_from_dataframe(
    valid_df,
    x_col='filepaths',
    y_col='labels',
    target_size=img_size,
    class_mode='categorical',
    color_mode='rgb',
    shuffle=True,
    batch_size=batch_size
)

test_generator = valid_test_datagen.flow_from_dataframe(
    test_df,
    x_col='filepaths',
    y_col='labels',
    target_size=img_size,
    class_mode='categorical',
    color_mode='rgb',
    shuffle=False,
    batch_size=batch_size
)

def yolo_v2(input_shape=(224, 224, 3), num_classes=3):
    inputs = Input(input_shape)

    # Feature extraction layers
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Flatten()(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.5)(x)
    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=inputs, outputs=outputs)
    return model

# Define the YOLOv2 model
model = yolo_v2()

# Compile the model
model.compile(optimizer=Adam(),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Print model summary
model.summary()

# Train the model
history = model.fit(
    train_generator,
    epochs=10,
    validation_data=valid_generator
)

# Specify the folder path where you want to save the model
model_save_folder = '/content/drive/MyDrive/Model/yolo.h5'

# Save the trained model
model_save_path = os.path.join(model_save_folder, "lung_cancer_detection_model.h5")
model.save(model_save_path)

# Print the path where the model is saved
print("Model saved at:", model_save_path)

# Whenever you want to test the model, load the saved model from the specific folder
saved_model = load_model(model_save_path)

# Evaluate the model on the test set
test_loss, test_accuracy = saved_model.evaluate(test_generator)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)

Found 12132 validated image filenames belonging to 3 classes.
Found 1516 validated image filenames belonging to 3 classes.
Found 1517 validated image filenames belonging to 3 classes.


Epoch 1/10


  self._warn_if_super_not_called()


[1m380/380[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m162s[0m 263ms/step - accuracy: 0.7654 - loss: 0.5826 - val_accuracy: 0.8832 - val_loss: 0.2585
Epoch 2/10
[1m380/380[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m96s[0m 246ms/step - accuracy: 0.9032 - loss: 0.2424 - val_accuracy: 0.9274 - val_loss: 0.1835
Epoch 3/10
[1m380/380[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 241ms/step - accuracy: 0.9183 - loss: 0.1968 - val_accuracy: 0.9380 - val_loss: 0.1578
Epoch 4/10
[1m380/380[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 241ms/step - accuracy: 0.9288 - loss: 0.1694 - val_accuracy: 0.9096 - val_loss: 0.2067
Epoch 5/10
[1m380/380[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m147s[0m 254ms/step - accuracy: 0.9471 - loss: 0.1382 - val_accuracy: 0.9551 - val_loss: 0.1260
Epoch 6/10
[1m380/380[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 243ms/step - accuracy: 0.9654 - loss: 0.0844 - val_accuracy: 0.9439 - val_loss: 0.1463
Epoch 7/10
[1m3



Model saved at: /content/drive/MyDrive/Model/yolo.h5/lung_cancer_detection_model.h5




[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 267ms/step - accuracy: 0.9600 - loss: 0.1062
Test Loss: 0.14264294505119324
Test Accuracy: 0.9545155167579651


In [None]:
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Dense, Dropout, Flatten, GlobalAveragePooling2D
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.optimizers import Adam
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os

# Function to load data and preprocess
def loading_the_data(data_dir):
    filepaths = []
    labels = []
    folds = os.listdir(data_dir)
    for fold in folds:
        foldpath = os.path.join(data_dir, fold)
        filelist = os.listdir(foldpath)
        for file in filelist:
            fpath = os.path.join(foldpath, file)
            filepaths.append(fpath)
            labels.append(fold)
    df = pd.DataFrame({'filepaths': filepaths, 'labels': labels})
    return df

# Change label names to their original names
def change_label_names(df, column_name):
    index = {'lung_aca': 'Lung_adenocarcinoma', 'lung_n': 'Lung_benign_tissue', 'lung_scc': 'Lung squamous_cell_carcinoma'}
    df[column_name] = df[column_name].replace(index)

# Load the data
data_dir = '/content/drive/MyDrive/lung_colon_image_set/lung_image_sets'
df = loading_the_data(data_dir)
change_label_names(df, 'labels')

# Split the dataset
train_df, ts_df = train_test_split(df, train_size=0.8, shuffle=True, random_state=42)
valid_df, test_df = train_test_split(ts_df, train_size=0.5, shuffle=True, random_state=42)

# Image preprocessing and augmentation
batch_size = 32
img_size = (224, 224)

train_datagen = ImageDataGenerator(rescale=1. / 255)
valid_test_datagen = ImageDataGenerator(rescale=1. / 255)

train_generator = train_datagen.flow_from_dataframe(
    train_df,
    x_col='filepaths',
    y_col='labels',
    target_size=img_size,
    class_mode='categorical',
    color_mode='rgb',
    shuffle=True,
    batch_size=batch_size
)

valid_generator = valid_test_datagen.flow_from_dataframe(
    valid_df,
    x_col='filepaths',
    y_col='labels',
    target_size=img_size,
    class_mode='categorical',
    color_mode='rgb',
    shuffle=True,
    batch_size=batch_size
)

test_generator = valid_test_datagen.flow_from_dataframe(
    test_df,
    x_col='filepaths',
    y_col='labels',
    target_size=img_size,
    class_mode='categorical',
    color_mode='rgb',
    shuffle=False,
    batch_size=batch_size
)

# Function to define ResNet-50 model
def resnet_50(input_shape=(224, 224, 3), num_classes=3):
    # Load the ResNet-50 model with pretrained weights from ImageNet, exclude top layers
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)

    # Freeze the base model layers to not update during training
    base_model.trainable = False

    # Add custom layers on top
    inputs = Input(input_shape)
    x = base_model(inputs, training=False)
    x = GlobalAveragePooling2D()(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.5)(x)
    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=inputs, outputs=outputs)
    return model

# Define the ResNet-50 model
model = resnet_50()

# Compile the model
model.compile(optimizer=Adam(),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Print model summary
model.summary()

# Train the model
history = model.fit(
    train_generator,
    epochs=10,
    validation_data=valid_generator
)

# Specify the folder path where you want to save the model
model_save_folder = '/content/drive/MyDrive/Model/resNet.h5'

# Save the trained model
model_save_path = os.path.join(model_save_folder, "lung_cancer_detection_resnet50_model.h5")
model.save(model_save_path)

# Print the path where the model is saved
print("Model saved at:", model_save_path)

# Whenever you want to test the model, load the saved model from the specific folder
saved_model = load_model(model_save_path)

# Evaluate the model on the test set
test_loss, test_accuracy = saved_model.evaluate(test_generator)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)

Found 12132 validated image filenames belonging to 3 classes.
Found 1516 validated image filenames belonging to 3 classes.
Found 1517 validated image filenames belonging to 3 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


Epoch 1/10


  self._warn_if_super_not_called()


[1m380/380[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3311s[0m 8s/step - accuracy: 0.4081 - loss: 1.1297 - val_accuracy: 0.5152 - val_loss: 0.9866
Epoch 2/10
[1m380/380[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m114s[0m 249ms/step - accuracy: 0.4935 - loss: 0.9977 - val_accuracy: 0.5607 - val_loss: 0.9077
Epoch 3/10
[1m380/380[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 246ms/step - accuracy: 0.5403 - loss: 0.9331 - val_accuracy: 0.6319 - val_loss: 0.8494
Epoch 4/10
[1m380/380[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 241ms/step - accuracy: 0.5868 - loss: 0.8627 - val_accuracy: 0.6926 - val_loss: 0.7460
Epoch 5/10
[1m380/380[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 245ms/step - accuracy: 0.6313 - loss: 0.7986 - val_accuracy: 0.6168 - val_loss: 0.8166
Epoch 6/10
[1m380/380[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 253ms/step - accuracy: 0.6292 - loss: 0.7941 - val_accuracy: 0.7546 - val_loss: 0.6780
Epoch 7/10
[1m380/



Model saved at: /content/drive/MyDrive/Model/resNet.h5/lung_cancer_detection_resnet50_model.h5




[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m339s[0m 7s/step - accuracy: 0.7825 - loss: 0.5643
Test Loss: 0.550525426864624
Test Accuracy: 0.7910349369049072


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


**RESNET  50**

In [None]:
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.optimizers import Adam
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
from sklearn.metrics import classification_report
import numpy as np

# Function to load data and preprocess
def loading_the_data(data_dir):
    filepaths = []
    labels = []
    folds = os.listdir(data_dir)
    for fold in folds:
        foldpath = os.path.join(data_dir, fold)
        filelist = os.listdir(foldpath)
        for file in filelist:
            fpath = os.path.join(foldpath, file)
            filepaths.append(fpath)
            labels.append(fold)
    df = pd.DataFrame({'filepaths': filepaths, 'labels': labels})
    return df

# Change label names to their original names
def change_label_names(df, column_name):
    index = {'lung_aca': 'Lung_adenocarcinoma', 'lung_n': 'Lung_benign_tissue', 'lung_scc': 'Lung squamous_cell_carcinoma'}
    df[column_name] = df[column_name].replace(index)

# Load the data
data_dir = '/content/drive/MyDrive/lung_image_sets_1'
df = loading_the_data(data_dir)
change_label_names(df, 'labels')

# Split the dataset
train_df, ts_df = train_test_split(df, train_size=0.8, shuffle=True, random_state=42)
valid_df, test_df = train_test_split(ts_df, train_size=0.5, shuffle=True, random_state=42)

# Image preprocessing and augmentation
batch_size = 32
img_size = (224, 224)

train_datagen = ImageDataGenerator(rescale=1. / 255)
valid_test_datagen = ImageDataGenerator(rescale=1. / 255)

train_generator = train_datagen.flow_from_dataframe(
    train_df,
    x_col='filepaths',
    y_col='labels',
    target_size=img_size,
    class_mode='categorical',
    color_mode='rgb',
    shuffle=True,
    batch_size=batch_size
)

valid_generator = valid_test_datagen.flow_from_dataframe(
    valid_df,
    x_col='filepaths',
    y_col='labels',
    target_size=img_size,
    class_mode='categorical',
    color_mode='rgb',
    shuffle=True,
    batch_size=batch_size
)

test_generator = valid_test_datagen.flow_from_dataframe(
    test_df,
    x_col='filepaths',
    y_col='labels',
    target_size=img_size,
    class_mode='categorical',
    color_mode='rgb',
    shuffle=False,
    batch_size=batch_size
)

# Function to define ResNet-50 model
def resnet_50(input_shape=(224, 224, 3), num_classes=3):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)
    base_model.trainable = False
    inputs = Input(input_shape)
    x = base_model(inputs, training=False)
    x = GlobalAveragePooling2D()(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.5)(x)
    outputs = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=inputs, outputs=outputs)
    return model

# Define the ResNet-50 model
model = resnet_50()

# Compile the model
model.compile(optimizer=Adam(),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Print model summary
model.summary()

# Train the model
history = model.fit(
    train_generator,
    epochs=10,
    validation_data=valid_generator
)

# Specify the folder path where you want to save the model
model_save_folder = '/content/drive/MyDrive/Model'
model_save_path = os.path.join(model_save_folder, "lung_cancer_detection_resnet50_model.h5")
model.save(model_save_path)

# Print the path where the model is saved
print("Model saved at:", model_save_path)

# Whenever you want to test the model, load the saved model from the specific folder
saved_model = load_model(model_save_path)

# Evaluate the model on the test set
test_loss, test_accuracy = saved_model.evaluate(test_generator)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)

# Get the true labels and predicted labels for the test set
test_generator.reset()  # Reset the generator to ensure we get the right order
predictions = saved_model.predict(test_generator)
predicted_classes = np.argmax(predictions, axis=1)

# Get the true classes from the test generator
true_classes = test_generator.classes
class_labels = list(test_generator.class_indices.keys())  # Getting the labels for the classes

# Generate classification report
report = classification_report(true_classes, predicted_classes, target_names=class_labels)
print(report)


Found 4020 validated image filenames belonging to 3 classes.
Found 502 validated image filenames belonging to 3 classes.
Found 503 validated image filenames belonging to 3 classes.


Epoch 1/10


  self._warn_if_super_not_called()


[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m909s[0m 7s/step - accuracy: 0.4030 - loss: 1.1889 - val_accuracy: 0.5120 - val_loss: 1.0005
Epoch 2/10
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m949s[0m 8s/step - accuracy: 0.4887 - loss: 1.0217 - val_accuracy: 0.5120 - val_loss: 0.9747
Epoch 3/10
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m928s[0m 7s/step - accuracy: 0.5165 - loss: 0.9827 - val_accuracy: 0.5976 - val_loss: 0.9540
Epoch 4/10
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m934s[0m 7s/step - accuracy: 0.5355 - loss: 0.9604 - val_accuracy: 0.5737 - val_loss: 0.9262
Epoch 5/10
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m936s[0m 7s/step - accuracy: 0.5616 - loss: 0.9501 - val_accuracy: 0.5657 - val_loss: 0.8971
Epoch 6/10
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m963s[0m 7s/step - accuracy: 0.5671 - loss: 0.9080 - val_accuracy: 0.5857 - val_loss: 0.8704
Epoch 7/10
[1m126/126[0m [32m━



Model saved at: /content/drive/MyDrive/Model/lung_cancer_detection_resnet50_model.h5




[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 6s/step - accuracy: 0.7122 - loss: 0.7531
Test Loss: 0.7480602264404297
Test Accuracy: 0.7137176990509033
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 6s/step
                              precision    recall  f1-score   support

Lung squamous_cell_carcinoma       0.77      0.77      0.77       205
         Lung_adenocarcinoma       0.61      0.41      0.49       161
          Lung_benign_tissue       0.71      0.99      0.83       137

                    accuracy                           0.71       503
                   macro avg       0.70      0.72      0.70       503
                weighted avg       0.70      0.71      0.70       503

