In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
import shutil
import random
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers 
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Activation, Dropout
from tensorflow.keras import regularizers
from tensorflow.keras.optimizers import Adam, Adamax
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [3]:
from PIL import Image
img2 = Image.open("C:\\Users\\pidik\\Downloads\\lung_image_sets\\lung_scc\\lungscc992.jpeg")
img2.show()

In [4]:
# Create ImageDataGenerator with validation split
datagen = ImageDataGenerator(
    rescale=1./255,         # Rescale images
    validation_split=0.2    # Split 20% for validation
)

base_dir = 'C:\\Users\\pidik\\Downloads\\lung_image_sets'

# Load training data (80% of total data)
train_generator = datagen.flow_from_directory(
    base_dir,              # Use the same base directory for both train and validation
    target_size=(150, 150), # Resize images to 150x150 pixels
    batch_size=32,
    class_mode='categorical',
    subset='training'       # Training data (80%)
)

# Load validation data (20% of total data)
validation_generator = datagen.flow_from_directory(
    base_dir,              # Use the same base directory for validation
    target_size=(150, 150),
    batch_size=32,
    class_mode='categorical',
    subset='validation'     # Validation data (20%)
)

Found 12000 images belonging to 3 classes.
Found 3000 images belonging to 3 classes.


In [5]:
model = tf.keras.models.Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(3, activation='softmax')  # 3 output classes
])

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [6]:
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // validation_generator.batch_size,
    epochs=2
)

Epoch 1/2


  self._warn_if_super_not_called()


[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m424s[0m 1s/step - accuracy: 0.7861 - loss: 0.5145 - val_accuracy: 0.8673 - val_loss: 0.3276
Epoch 2/2


  self.gen.throw(typ, value, traceback)


[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.8333 - val_loss: 0.4812


In [7]:
train_dir = "C:\\Users\\pidik\\Downloads\\train_lung"
test_dir = "C:\\Users\\pidik\\Downloads\\test_lung"

test_datagen = ImageDataGenerator(rescale=1./255)

test_generator = test_datagen.flow_from_directory(
    test_dir,  
    target_size=(150, 150),
    batch_size=32,
    class_mode='categorical'
)

test_loss, test_acc = model.evaluate(test_generator)
print(f"Test accuracy: {test_acc * 100:.2f}%")

Found 3000 images belonging to 3 classes.
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 449ms/step - accuracy: 0.8821 - loss: 0.3026
Test accuracy: 87.93%


In [9]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import GlobalAveragePooling2D

# Load ResNet50 model without top layers and use pre-trained weights from ImageNet
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(150, 150, 3))

# Freeze the layers of the base model to retain learned features
for layer in base_model.layers:
    layer.trainable = False

# Add custom layers on top of ResNet50
transfer_model = Sequential([
    base_model,
    GlobalAveragePooling2D(),  # Replace flattening with global pooling
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(3, activation='softmax')  # 3 classes for classification
])

# Compile the model
transfer_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
transfer_history = transfer_model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=3
)

# Evaluate the model
val_loss, val_acc = transfer_model.evaluate(validation_generator)
print(f"Validation accuracy with ResNet50: {val_acc * 100:.2f}%")

Epoch 1/3
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m547s[0m 1s/step - accuracy: 0.3848 - loss: 1.1069 - val_accuracy: 0.4540 - val_loss: 1.0267
Epoch 2/3
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m456s[0m 1s/step - accuracy: 0.4923 - loss: 0.9987 - val_accuracy: 0.5673 - val_loss: 0.9482
Epoch 3/3
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m458s[0m 1s/step - accuracy: 0.5092 - loss: 0.9610 - val_accuracy: 0.5607 - val_loss: 0.9110
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m94s[0m 1s/step - accuracy: 0.5491 - loss: 0.9120
Validation accuracy with ResNet50: 56.07%


In [12]:
import os

base_dir = 'C:\\Users\\pidik\\Downloads\\lung_image_sets'
classes = os.listdir(base_dir)
print("Available directories in the dataset:", classes)

Available directories in the dataset: ['lung_aca', 'lung_n', 'lung_scc']


In [20]:
# Define ANN model
model_ann = Sequential([
    Flatten(input_shape=(150, 150, 3)), # Flatten the 2D image to 1D
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(3, activation='softmax')      # 3 classes: benign, adenocarcinoma, squamous
])

  super().__init__(**kwargs)


In [21]:
# Compile the model
model_ann.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [23]:
# Train the model
model_ann.fit(train_generator, epochs=2, validation_data=validation_generator)

Epoch 1/2
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m352s[0m 926ms/step - accuracy: 0.6065 - loss: 6.0871 - val_accuracy: 0.6560 - val_loss: 1.6024
Epoch 2/2
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m340s[0m 892ms/step - accuracy: 0.7256 - loss: 0.9941 - val_accuracy: 0.7153 - val_loss: 0.8032


<keras.src.callbacks.history.History at 0x1c22a030730>

In [24]:
# Evaluate the model
loss, accuracy = model_ann.evaluate(validation_generator)
print(f"ANN Validation Accuracy: {accuracy * 100:.2f}%")

[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 289ms/step - accuracy: 0.7161 - loss: 0.7893
ANN Validation Accuracy: 71.53%


In [27]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Reshape
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [28]:
# Define data generators with validation split
datagen = ImageDataGenerator(
    rescale=1./255,         # Rescale images
    validation_split=0.2    # Split 20% for validation
)

In [29]:
# Define RNN model
model_rnn = Sequential([
    Reshape((150, 150 * 3), input_shape=(150, 150, 3)),  # Reshape to 3D for SimpleRNN (timesteps=150, input_dim=450)
    SimpleRNN(128, activation='relu', return_sequences=False),  # Simple RNN layer
    Dense(64, activation='relu'),
    Dense(3, activation='softmax')  # 3 classes: benign, adenocarcinoma, squamous
])

  super().__init__(**kwargs)


In [30]:
# Compile the model
model_rnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [31]:
# Train the model
model_rnn.fit(train_generator, epochs=3, validation_data=validation_generator)

Epoch 1/3
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m349s[0m 905ms/step - accuracy: 0.6481 - loss: 0.6682 - val_accuracy: 0.7340 - val_loss: 0.5050
Epoch 2/3
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m340s[0m 894ms/step - accuracy: 0.7169 - loss: 0.5269 - val_accuracy: 0.7263 - val_loss: 0.5219
Epoch 3/3
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m338s[0m 889ms/step - accuracy: 0.7462 - loss: 0.4942 - val_accuracy: 0.6963 - val_loss: 0.5387


<keras.src.callbacks.history.History at 0x1c29f9c6e30>

In [32]:
# Evaluate the model
loss, accuracy = model_rnn.evaluate(validation_generator)
print(f"RNN Validation Accuracy: {accuracy * 100:.2f}%")

[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 666ms/step - accuracy: 0.6961 - loss: 0.5394
RNN Validation Accuracy: 69.63%
