In [2]:
import zipfile

zip_file_path = '/content/Lung Cancer Dataset.zip'

extract_dir = '/content/lung_cancer_dataset'
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

import os
os.listdir(extract_dir)


['__MACOSX', 'Lung Cancer Dataset']

In [3]:
import os


extracted_dir = '/content/lung_cancer_dataset/Lung Cancer Dataset'
os.listdir(extracted_dir)


['.DS_Store', 'Normal cases', 'Bengin cases']

In [5]:
input_folder = '/content/lung_cancer_dataset/Lung Cancer Dataset'


In [8]:
print("Normal folder contents:", os.listdir(os.path.join(input_folder, 'Normal cases')))
print("Benign folder contents:", os.listdir(os.path.join(input_folder, 'Bengin cases')))


Normal folder contents: ['Normal case (75).jpg', 'Normal case (212).jpg', 'Normal case (218).jpg', 'Normal case (309).jpg', 'Normal case (314).jpg', 'Normal case (240).jpg', 'Normal case (195).jpg', 'Normal case (313).jpg', 'Normal case (331).jpg', 'Normal case (332).jpg', 'Normal case (84).jpg', 'Normal case (277).jpg', 'Normal case (36).jpg', 'Normal case (320).jpg', 'Normal case (299).jpg', 'Normal case (5).jpg', 'Normal case (376).jpg', 'Normal case (338).jpg', 'Normal case (256).jpg', 'Normal case (90).jpg', 'Normal case (114).jpg', 'Normal case (230).jpg', 'Normal case (361).jpg', 'Normal case (105).jpg', 'Normal case (298).jpg', 'Normal case (377).jpg', 'Normal case (346).jpg', 'Normal case (390).jpg', 'Normal case (227).jpg', 'Normal case (196).jpg', 'Normal case (311).jpg', 'Normal case (66).jpg', 'Normal case (149).jpg', 'Normal case (172).jpg', 'Normal case (1).jpg', 'Normal case (199).jpg', 'Normal case (88).jpg', 'Normal case (3).jpg', 'Normal case (175).jpg', 'Normal case

In [10]:
import os
import shutil
from sklearn.model_selection import train_test_split


input_folder = '/content/lung_cancer_dataset/Lung Cancer Dataset'

output_folder = '/content/lung_cancer_split'
os.makedirs(output_folder, exist_ok=True)


for split in ['train', 'val', 'test']:
    for category in ['Normal cases', 'Bengin cases']:
        os.makedirs(os.path.join(output_folder, split, category), exist_ok=True)


def split_and_copy_images(category):

    category_folder = os.path.join(input_folder, category)
    image_paths = [os.path.join(category_folder, img) for img in os.listdir(category_folder) if img.endswith(('.jpg', '.jpeg', '.png'))]


    train_paths, test_paths = train_test_split(image_paths, test_size=0.3)
    val_paths, test_paths = train_test_split(test_paths, test_size=0.5)


    for split, paths in zip(['train', 'val', 'test'], [train_paths, val_paths, test_paths]):
        for img_path in paths:
            shutil.copy(img_path, os.path.join(output_folder, split, category))


for category in ['Normal cases', 'Bengin cases']:
    split_and_copy_images(category)

print("Dataset split and copied successfully.")


Dataset split and copied successfully.


In [12]:
for split in ['train', 'val', 'test']:
    for category in ['Normal cases', 'Bengin cases']:
        path = os.path.join(output_folder, split, category)
        print(f"{split} - {category}: {len(os.listdir(path))} images")


train - Normal cases: 291 images
train - Bengin cases: 84 images
val - Normal cases: 62 images
val - Bengin cases: 18 images
test - Normal cases: 63 images
test - Bengin cases: 18 images


In [14]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder


def load_images_from_folder(folder, image_size=(64, 64)):
    images = []
    labels = []
    for label in os.listdir(folder):
        category_folder = os.path.join(folder, label)
        if os.path.isdir(category_folder):
            for img_name in os.listdir(category_folder):
                img_path = os.path.join(category_folder, img_name)
                img = cv2.imread(img_path, cv2.IMREAD_COLOR)
                img = cv2.resize(img, image_size)
                img = img.flatten()
                images.append(img)
                labels.append(label)
    return np.array(images), np.array(labels)


train_folder = '/content/lung_cancer_split/train'
test_folder = '/content/lung_cancer_split/test'


X_train, y_train = load_images_from_folder(train_folder)
X_test, y_test = load_images_from_folder(test_folder)

X_train = X_train / 255.0
X_test = X_test / 255.0


In [15]:

label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_test = label_encoder.transform(y_test)


In [30]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report


svm_model = SVC(kernel='rbf')
svm_model.fit(X_train, y_train)


y_pred = svm_model.predict(X_test)


print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.9629629629629629
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.83      0.91        18
           1       0.95      1.00      0.98        63

    accuracy                           0.96        81
   macro avg       0.98      0.92      0.94        81
weighted avg       0.96      0.96      0.96        81

