<a href="https://colab.research.google.com/github/roshisaxena/lungcancerprediction/blob/main/LungCancerPrediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from zipfile import ZipFile

data_path = 'lung-cancer-dataset.zip'

with ZipFile(data_path,'r') as zip:
  zip.extractall()
  print('The data set has been extracted.')

# Set image size and paths
IMG_SIZE = 256
BATCH_SIZE = 32
DATASET_PATH = 'lung-cancer-dataset/lung_images'  # Replace with the correct path

# Simple image loader without augmentation
datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2  # 20% validation split
)

train_generator = datagen.flow_from_directory(
    DATASET_PATH,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training',
    shuffle=True
)

val_generator = datagen.flow_from_directory(
    DATASET_PATH,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation',
    shuffle=False
)

# CNN Model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(3, activation='softmax')  # 3 classes: lung_n, lung_aca, lung_scc
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Show model summary
model.summary()

# Early stopping
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model
history = model.fit(
    train_generator,
    epochs=20,
    validation_data=val_generator,
    callbacks=[early_stop]
)

# Save the model
model.save("Lungcancerdetection_model.h5")


The data set has been extracted.
Found 2402 images belonging to 3 classes.
Found 600 images belonging to 3 classes.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  self._warn_if_super_not_called()


Epoch 1/20
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 257ms/step - accuracy: 0.5827 - loss: 0.9808 - val_accuracy: 0.8383 - val_loss: 0.3661
Epoch 2/20
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 165ms/step - accuracy: 0.8289 - loss: 0.3960 - val_accuracy: 0.8667 - val_loss: 0.3764
Epoch 3/20
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 162ms/step - accuracy: 0.8615 - loss: 0.3531 - val_accuracy: 0.8333 - val_loss: 0.3674
Epoch 4/20
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 167ms/step - accuracy: 0.8722 - loss: 0.3080 - val_accuracy: 0.8667 - val_loss: 0.2976
Epoch 5/20
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 164ms/step - accuracy: 0.8800 - loss: 0.2922 - val_accuracy: 0.8900 - val_loss: 0.3266
Epoch 6/20
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 178ms/step - accuracy: 0.8838 - loss: 0.3067 - val_accuracy: 0.7500 - val_loss: 0.5168
Epoch 7/20
[1m76/76[



In [4]:
# Save the model
model.save("a1_model.h5")



In [5]:
from google.colab import files
files.download('a1_model.h5')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [9]:
import numpy as np
import cv2
import tensorflow as tf

# Load model
model = tf.keras.models.load_model("Lungcancerdetection_model.h5")

# Define correct class order based on your training
# Replace with your actual class_indices order
classes = ['lung_aca', 'lung_n', 'lung_scc']

# Test image path
image_path = "lungaca4999.jpeg"  # CHANGE THIS for each test

# Preprocess the image
IMG_SIZE = 256
img = cv2.imread(image_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
img = img / 255.0  # Only if you used rescale=1./255 during training
img = np.expand_dims(img, axis=0)

# Predict
predictions = model.predict(img)
print("Raw prediction scores:", predictions)
print("Predicted class:", classes[np.argmax(predictions)])




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 388ms/step
Raw prediction scores: [[9.8682863e-01 4.2132574e-06 1.3167129e-02]]
Predicted class: lung_aca
