In [1]:
import cv2
import numpy as np
from PIL import Image, ImageFilter
from pathlib import Path
import os

In [2]:
base_data_path = 'data'
stages = ['raw', 'enhanced', 'filtrated', 'segmented']
dataset_splits = ['train', 'test', 'valid']
disease_types = ['normal', 'adenocarcinoma', 'large.cell.carcinoma', 'squamous.cell.carcinoma']

for stage in stages:
    for split in dataset_splits:
        for disease_type in disease_types:
            path = Path(f'{base_data_path}/{stage}/{split}/{disease_type}')
            path.mkdir(parents=True, exist_ok=True)
            print(f'successfully created directory: {path.absolute()}')

successfully created directory: /home/rahul/rahul/be-project/lung-cancer-detection/notebooks/data/raw/train/normal
successfully created directory: /home/rahul/rahul/be-project/lung-cancer-detection/notebooks/data/raw/train/adenocarcinoma
successfully created directory: /home/rahul/rahul/be-project/lung-cancer-detection/notebooks/data/raw/train/large.cell.carcinoma
successfully created directory: /home/rahul/rahul/be-project/lung-cancer-detection/notebooks/data/raw/train/squamous.cell.carcinoma
successfully created directory: /home/rahul/rahul/be-project/lung-cancer-detection/notebooks/data/raw/test/normal
successfully created directory: /home/rahul/rahul/be-project/lung-cancer-detection/notebooks/data/raw/test/adenocarcinoma
successfully created directory: /home/rahul/rahul/be-project/lung-cancer-detection/notebooks/data/raw/test/large.cell.carcinoma
successfully created directory: /home/rahul/rahul/be-project/lung-cancer-detection/notebooks/data/raw/test/squamous.cell.carcinoma
succes

In [3]:
# enhancement
def do_enhancement(ip_img, op_img):
    print(f'starting enhancing image: {ip_img}')
    img = cv2.imread(ip_img)
    xp = [0, 64, 128, 192, 255]
    fp = [0, 16, 128, 240, 255]
    x = np.arange(256)
    table = np.interp(x, xp, fp).astype('uint8')
    img = cv2.LUT(img, table)
    saved = cv2.imwrite(op_img, img)
    if saved:
        print(f'successfully enhanced image, saved at: {op_img}')
    else:
        print(f'failed to enhanced image: {ip_img}')

# performing enhancement
for disease_type in disease_types:
    imgs_dir=f'{base_data_path}/raw/train/{disease_type}'
    imgs_names=os.listdir(imgs_dir)
    for img_name in imgs_names:
        ip_img=f'{imgs_dir}/{img_name}'
        op_img=ip_img.replace('/raw','/enhanced',1)
        do_enhancement(ip_img,op_img)


starting enhancing image: data/raw/train/normal/n9 - Copy.jpg
successfully enhanced image, saved at: data/enhanced/train/normal/n9 - Copy.jpg
starting enhancing image: data/raw/train/normal/17 - Copy - Copy.png
successfully enhanced image, saved at: data/enhanced/train/normal/17 - Copy - Copy.png
starting enhancing image: data/raw/train/normal/10 - Copy (2).png
successfully enhanced image, saved at: data/enhanced/train/normal/10 - Copy (2).png
starting enhancing image: data/raw/train/normal/17 - Copy (2) - Copy.png
successfully enhanced image, saved at: data/enhanced/train/normal/17 - Copy (2) - Copy.png
starting enhancing image: data/raw/train/normal/4 (2) - Copy.png
successfully enhanced image, saved at: data/enhanced/train/normal/4 (2) - Copy.png
starting enhancing image: data/raw/train/normal/4.png
successfully enhanced image, saved at: data/enhanced/train/normal/4.png
starting enhancing image: data/raw/train/normal/20 - Copy.png
successfully enhanced image, saved at: data/enhanced

In [4]:
# filtration
def do_filtration(ip_img, op_img):
    print(f'starting filtrating image: {ip_img}')
    enhanced_img = Image.open(ip_img)
    filtered_img = enhanced_img.filter(ImageFilter.MedianFilter(size=3))
    saved = cv2.imwrite(op_img, np.array(filtered_img))
    if saved:
        print(f'successfully filtrated image, saved at: {op_img}')
    else:
        print(f'failed to filtrated image: {ip_img}')

# performing filtration
for disease_type in disease_types:
    imgs_dir=f'{base_data_path}/enhanced/train/{disease_type}'
    imgs_names=os.listdir(imgs_dir)
    for img_name in imgs_names:
        ip_img=f'{imgs_dir}/{img_name}'
        op_img=ip_img.replace('/enhanced','/filtrated',1)
        do_filtration(ip_img,op_img)


starting filtrating image: data/enhanced/train/normal/n9 - Copy.jpg
successfully filtrated image, saved at: data/filtrated/train/normal/n9 - Copy.jpg
starting filtrating image: data/enhanced/train/normal/17 - Copy - Copy.png
successfully filtrated image, saved at: data/filtrated/train/normal/17 - Copy - Copy.png
starting filtrating image: data/enhanced/train/normal/10 - Copy (2).png
successfully filtrated image, saved at: data/filtrated/train/normal/10 - Copy (2).png
starting filtrating image: data/enhanced/train/normal/17 - Copy (2) - Copy.png
successfully filtrated image, saved at: data/filtrated/train/normal/17 - Copy (2) - Copy.png
starting filtrating image: data/enhanced/train/normal/4 (2) - Copy.png
successfully filtrated image, saved at: data/filtrated/train/normal/4 (2) - Copy.png
starting filtrating image: data/enhanced/train/normal/4.png
successfully filtrated image, saved at: data/filtrated/train/normal/4.png
starting filtrating image: data/enhanced/train/normal/20 - Copy.pn

In [5]:
# segmentation
def do_segmentation(ip_img, op_img):
    print(f'starting segmenting image: {ip_img}')
    filtrated_img = cv2.imread(ip_img, 0)

    bins_num = 256
    hist, bin_edges = np.histogram(filtrated_img, bins=bins_num)

    is_normalized = True
    if is_normalized:
        hist = np.divide(hist.ravel(), hist.max())

    bin_mids = (bin_edges[:-1] + bin_edges[1:]) / 2.

    weight1 = np.cumsum(hist)
    weight2 = np.cumsum(hist[::-1])[::-1]

    mean1 = np.cumsum(hist * bin_mids) / weight1
    mean2 = (np.cumsum((hist * bin_mids)[::-1]) / weight2[::-1])[::-1]

    inter_class_variance = weight1[:-1] * weight2[1:] * (mean1[:-1] - mean2[1:]) ** 2

    index_of_max_val = np.argmax(inter_class_variance)

    threshold = bin_mids[:-1][index_of_max_val]

    otsu_threshold, segmented_img = cv2.threshold(
        filtrated_img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU,
    )

    saved = cv2.imwrite(op_img, segmented_img)
    if saved:
        print(f'successfully segmented image, saved at: {op_img}')
    else:
        print(f'failed to segmented image: {ip_img}')

# performing segmentation
for disease_type in disease_types:
    imgs_dir=f'{base_data_path}/filtrated/train/{disease_type}'
    imgs_names=os.listdir(imgs_dir)
    for img_name in imgs_names:
        ip_img=f'{imgs_dir}/{img_name}'
        op_img=ip_img.replace('/filtrated','/segmented',1)
        do_segmentation(ip_img,op_img)

starting segmenting image: data/filtrated/train/normal/n9 - Copy.jpg
successfully segmented image, saved at: data/segmented/train/normal/n9 - Copy.jpg
starting segmenting image: data/filtrated/train/normal/17 - Copy - Copy.png
successfully segmented image, saved at: data/segmented/train/normal/17 - Copy - Copy.png
starting segmenting image: data/filtrated/train/normal/10 - Copy (2).png
successfully segmented image, saved at: data/segmented/train/normal/10 - Copy (2).png
starting segmenting image: data/filtrated/train/normal/17 - Copy (2) - Copy.png
successfully segmented image, saved at: data/segmented/train/normal/17 - Copy (2) - Copy.png
starting segmenting image: data/filtrated/train/normal/4 (2) - Copy.png
successfully segmented image, saved at: data/segmented/train/normal/4 (2) - Copy.png
starting segmenting image: data/filtrated/train/normal/4.png
successfully segmented image, saved at: data/segmented/train/normal/4.png
starting segmenting image: data/filtrated/train/normal/20 - 

In [9]:
# Input preparation

import pandas as pd
import os
from skimage.transform import resize
from skimage.io import imread
import numpy as np
import matplotlib.pyplot as plt

RESIZE_DIMENS=(150,150,3)

flat_data_arr=[] # img data array
target_arr=[] # image label array

for disease_type in disease_types:
    imgs_dir=f'{base_data_path}/filtrated/train/{disease_type}'
    imgs_names=os.listdir(imgs_dir)
    for img_name in imgs_names:
        ip_img=f'{imgs_dir}/{img_name}'
        img_array=cv2.imread(ip_img)
        img_resized=resize(img_array,RESIZE_DIMENS)
        flat_data_arr.append(img_resized.flatten())
        target_arr.append(disease_type)

flat_data=np.array(flat_data_arr)
target=np.array(target_arr)
df=pd.DataFrame(flat_data) #dataframe
df['Target']=target
x=df.iloc[:,:-1] #input data
y=df.iloc[:,-1] #output data

In [11]:
# Model creation
from sklearn import svm
from sklearn.model_selection import GridSearchCV
param_grid={'C':[0.1,1,10,100],'gamma':[0.0001,0.001,0.1,1],'kernel':['rbf','poly']}
svc=svm.SVC(probability=True)
model=GridSearchCV(svc,param_grid)

In [None]:
# Model training
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.20,random_state=77,stratify=y)
print('Splitted Successfully')
model.fit(x_train,y_train)
print('The Model is trained well with the given images')
# model.best_params_ contains the best parameters obtained from GridSearchCV

Splitted Successfully


In [None]:
from sklearn.metrics import accuracy_score

# Model testing
y_pred=model.predict(x_test)
print("The predicted Data is :")
print(y_pred)
print("The actual data is:")
print(np.array(y_test))
print(f"The model is {accuracy_score(y_pred,y_test)*100}% accurate")

In [None]:
# Model evaluation
url=input('Enter URL of Image :')
img=imread(url)
plt.imshow(img)
plt.show()
img_resize=resize(img,(150,150,3))
l=[img_resize.flatten()]
probability=model.predict_proba(l)
for ind,val in enumerate(Categories):
    print(f'{val} = {probability[0][ind]*100}%')
print("The predicted image is : "+Categories[model.predict(l)[0]])