# DSC 180B CNN Prototype

### Getting the Labels

In [None]:
import pandas as pd
import pydicom
import matplotlib.pylab as plt
import numpy as np
import torch 
import cv2 
import tensorflow as tf
from sklearn.model_selection import train_test_split
from torchvision import models
import torch.nn as nn

In [None]:
df = pd.read_csv('../data/toy_v3.csv')
df = df.sample(frac=1).reset_index(drop=True)
df['Abnormal'] = df['Abnormal'].apply(lambda x: 0 if x == -1 else 1)
df.head()


In [None]:
X = []
for i in range(len(df)):
    X.append(pydicom.dcmread('/Users/rohan/DSC180BXRayImageAuto/res/xray_imgs/' + df['dicom_id'][i] + '.dcm'))
    
print(X[0])
plt.imshow(X[0].pixel_array, cmap='gray')

### Preprocessing

In [None]:
def crop_img(img):
    # valid_img = True
    center = (img.shape[0] // 2, img.shape[1] // 2)
    t, l, b, r = 0, 0, img.shape[0]-1, img.shape[1]-1
    
    # iterate through the image until we find a pixel that is black
    while t < img.shape[0] and img[t, center[1]] < 100:
        t += 1
    while l < img.shape[1] and img[center[0], l] < 100:
        l += 1
    while b > 0 and img[b, center[1]] < 100:
        b -= 1
    while r > 0 and img[center[0], r] < 100:
        r -= 1
        
    if t >= b or l >= r:
        # valid_img = False
        return img
        
    return img[t:b, l:r]

def normalize_image(img):
    img = (img - np.min(img)) / (np.max(img) - np.min(img))  # Scale to [0,1]
    img = (img * 255).astype(np.uint8)  # Convert to uint8
    return img

def apply_clahe(img, clip_limit=2.0, tile_grid_size=(8,8)):
    clahe = cv2.createCLAHE(clipLimit=clip_limit, tileGridSize=tile_grid_size)
    return clahe.apply(img)

def resize_image(img, target_size=(224, 224)):
    return cv2.resize(img, target_size, interpolation=cv2.INTER_AREA)

def add_random_rotation(img, max_angle=5):
    angle = np.random.uniform(-max_angle, max_angle)
    M = cv2.getRotationMatrix2D((img.shape[1] // 2, img.shape[0] // 2), angle, 1)
    return cv2.warpAffine(img, M, (img.shape[1], img.shape[0]))

def convert_to_rgb(img):
    return cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)

In [None]:
def preprocess_image(img):
    img = crop_img(img)    
    img = apply_clahe(img)
    img = resize_image(img)
    img = normalize_image(img)
    img = add_random_rotation(img)
    img = convert_to_rgb(img)

    return img

In [None]:
# test preprocess_dicom
for i in range(20):
    test_img = X[i].pixel_array
    # create side-by-side plots
    fig, ax = plt.subplots(1, 2, figsize=(12, 6))
    ax[0].imshow(test_img, cmap='gray')
    ax[0].set_title('Original Image')
    processed = preprocess_image(test_img)
    ax[1].imshow(processed, cmap = 'gray') if processed is not None else None
    ax[1].set_title('Preprocessed Image')
    
    plt.show()
    

In [None]:
# Preprocess all images
preprocessed_X = [preprocess_image(img.pixel_array) for img in X]

preprocessed_X = np.array(preprocessed_X)

# Convert to tensor to be fed to ResNet50
preprocessed_X = torch.tensor(preprocessed_X)   

# train test split
X_train, X_test, y_train, y_test = train_test_split(preprocessed_X, df['Abnormal'], test_size=0.2, random_state=42)

In [None]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

### Healthy vs Unhealthy Lung Comparison

In [None]:
healthy_x_rays = []
abnormal_x_rays = []
i = 0
while len(healthy_x_rays) < 10 or len(abnormal_x_rays) < 10:
    if y_train.iloc[i] == 0:
        healthy_x_rays.append(X_train[i])
    else:
        abnormal_x_rays.append(X_train[i])
    i += 1
        
for i in range(10):
    fig, ax = plt.subplots(1, 2, figsize=(12, 6))
    ax[0].imshow(healthy_x_rays[i], cmap='gray')
    ax[0].set_title('Healthy X-Ray')
    ax[1].imshow(abnormal_x_rays[i], cmap='gray')
    ax[1].set_title('Abnormal X-Ray')
    plt.show()


In [None]:
y_test.value_counts()

In [None]:
y_train.value_counts()

In [None]:
X_train[0].shape

In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.layers import Dense, Dropout, Flatten
from keras.applications import ResNet50V2
from sklearn.utils import class_weight

# Compute class weights
class_weights = class_weight.compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weights_dict = dict(enumerate(class_weights))


model = Sequential()
resnet50 = ResNet50V2(weights = "imagenet", input_shape = (3, 224, 224), include_top = False)
for layer in resnet50.layers:
    layer.trainable = False
    
model.add(resnet50)

model.add(Flatten())

model.add(Dense(units = 128, activation = "relu"))
model.add(Dropout(0.5))

model.add(Dense(units = 1, activation = "sigmoid"))

# Optimizer & Compilation
initial_lr = 0.0005  # Lowered learning rate for stability
model.compile(optimizer=Adam(learning_rate=initial_lr), 
              loss='binary_crossentropy', 
              metrics=['accuracy'])

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=6, restore_best_weights=True, verbose=1)
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6, verbose=1)
model_checkpoint = ModelCheckpoint('best_model.keras', monitor='val_loss', save_best_only=True, verbose=1)

# Training the model
history = model.fit(X_train, y_train, 
                    validation_data=(X_test, y_test), 
                    epochs=20, 
                    batch_size=32, 
                    class_weight=class_weights_dict,
                    callbacks=[early_stopping, lr_scheduler, model_checkpoint])

# Save final trained model
model.save('cnn_chest_xray_model_final.keras')


In [None]:
# create a confusion matrix
from sklearn.metrics import confusion_matrix
import seaborn as sns

y_pred = model.predict(X_test)
y_pred = np.argmax(y_pred, axis=1)
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')

# output precision, recall, f1-score
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

