## Quantitative Analysis of Histopathological Images for Autoimmune Diseases Diagnosis

### Brief about the Autoimmune diseases

In [None]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

# Load images
img_lupus=mpimg.imread('Medical/Disease Images/Lupus.png')
img_arthritis=mpimg.imread('Medical/Disease Images/Arthritis.png')
img_sclerosis=mpimg.imread('Medical/Disease Images/Sclerosis.png')

# Create a figure with a single row and three columns
fig, axis=plt.subplots(1, 3, figsize=(12, 4))

# Display each image
axis[0].imshow(img_lupus)
axis[0].set_title('Lupus')
axis[0].axis('off')

axis[1].imshow(img_arthritis)
axis[1].set_title('Arthritis')
axis[1].axis('off')

axis[2].imshow(img_sclerosis)
axis[2].set_title('Sclerosis')
axis[2].axis('off')

# Adjust layout and display
plt.tight_layout()
plt.show()

### Importing Libraries

In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import  GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet import preprocess_input
import seaborn as sns
import warnings
warnings.filterwarnings('always')
warnings.filterwarnings('ignore')

### Data Collection

In [None]:
data_dir='Medical'
subdirectories=['Lupus', 'Arthritis', 'Sclerosis']

In [None]:
fig,axis=plt.subplots(len(subdirectories),3,figsize=(8, 8))

for i,subdir in enumerate(subdirectories):
    subdir_path = os.path.join(data_dir, subdir)
    for j in range(3):
        filename=os.listdir(subdir_path)[j]
        imagepath=os.path.join(subdir_path,filename)
        image=cv2.imread(imagepath)
        image=cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
        axis[i,j].imshow(image)
        axis[i,j].set_title(subdir)
        axis[i,j].axis('off')

plt.tight_layout()
plt.show()

### Data Preprocessing

In [None]:
cleaned_images = []

for subdir in subdirectories:
    subdir_path=os.path.join(data_dir, subdir)
    for filename in os.listdir(subdir_path):
        imagepath=os.path.join(subdir_path, filename)
        image=cv2.imread(imagepath)
        if image is not None:
            cleaned_img=cv2.GaussianBlur(image, (5, 5), 0)
            cleaned_images.append((cleaned_img, subdir))
        else:
            print(f"Could not read image: {imagepath}")


In [None]:
# Preprocessing Function
def preprocess_image(img):
    resized_img=cv2.resize(img, (224, 224))
    return resized_img

In [None]:
preprocessed_images=[]
labels=[]

In [None]:
for image,label in cleaned_images:
    preprocessed_img=preprocess_image(image)
    preprocessed_images.append(preprocessed_img)
    labels.append(label)

### Data Visualization

In [None]:
# Display preprocessed images after Gaussian blur
fig,axis=plt.subplots(len(subdirectories),3,figsize=(8, 8))

for i, subdir in enumerate(subdirectories):
    subdir_preprocessed_images=[image for image,label in cleaned_images if label == subdir]
    for j in range(3):
        image=subdir_preprocessed_images[j]
        image=cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  
        axis[i,j].imshow(image)
        axis[i,j].set_title(subdir)
        axis[i,j].axis('off')

plt.tight_layout()
plt.show()

In [None]:
# Convert lists to numpy arrays
preprocessed_images=np.array(preprocessed_images)
labels=np.array(labels)

In [None]:
# Encode labels as integers
label_encoder=LabelEncoder()
encoded_labels=label_encoder.fit_transform(labels)

In [None]:
# Convert encoded labels to categorical (one-hot encoding)
categorical_labels=to_categorical(encoded_labels)

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test=train_test_split(preprocessed_images, categorical_labels, test_size=0.2, random_state=42)

In [None]:
datagen=ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    preprocessing_function=preprocess_input
)

In [None]:
# Apply data augmentation only to training data
train_generator=datagen.flow(X_train, y_train, batch_size=32)

In [None]:
# Load VGG16 model without the top layer
base_model=VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.summary()

## Feature Extraction

In [None]:
# Extract features for each preprocessed image using VGG16
def extract_features(data, model):
    features=model.predict(data)
    return features

In [None]:
# Extract features for training and testing data
train_features=[]
for image in X_train:
    image=np.expand_dims(image,axis=0) 
    image=preprocess_input(image)
    feature=extract_features(image,base_model)
    train_features.append(feature.flatten())

In [None]:
test_features=[]
for image in X_test:
    image=np.expand_dims(image,axis=0)
    image=preprocess_input(image)
    feature=extract_features(image, base_model)
    test_features.append(feature.flatten())

In [None]:
train_features=np.array(train_features)
test_features=np.array(test_features)

In [None]:
# Define the model
model=Sequential([
    Dense(512, activation='relu', input_shape=(train_features.shape[1],)),
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(subdirectories), activation='softmax')
])

In [None]:
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
history=model.fit(
    train_features,y_train,
    epochs=20,
    batch_size=30,
    validation_data=(test_features, y_test)
)

In [None]:
plt.figure(figsize=(6, 6))
plt.plot(history.history['accuracy'], color='blue', label='Training Accuracy')
plt.plot(history.history['val_accuracy'], color='red', label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(6, 6))
plt.plot(history.history['loss'], color='blue', label='Training Loss')
plt.plot(history.history['val_loss'], color='red', label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

## Analysis Using Patterns

In [None]:
# Function to identify patterns
def identify_patterns(image):
    gray=cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
    # Threshold the image
    i,thresh=cv2.threshold(gray,200,255,cv2.THRESH_BINARY)
    # Find contours
    contours, i=cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
    pattern_img=cv2.drawContours(image.copy(),contours,-1,(0,255,0),3)
    return pattern_img

In [None]:
# Function to display identified patterns
num_images_per_directory=5 

num_rows=len(subdirectories)
num_cols=num_images_per_directory

fig,axis=plt.subplots(num_rows,num_cols,figsize=(3*num_cols,3*num_rows))

for i, subdir in enumerate(subdirectories):
    subdir_images = preprocessed_images[labels==subdir][:num_images_per_directory]
    for j in range(num_images_per_directory):
        image=subdir_images[j]
        pattern_img=identify_patterns(cv2.cvtColor((image*255).astype(np.uint8), cv2.COLOR_RGB2BGR))

        axis[i,j].imshow(pattern_img, cmap='gray')
        axis[i,j].set_title(f'{subdir}')
        axis[i,j].axis('off')


for i in range(num_rows):
    for j in range(num_images_per_directory, num_cols):
        axis[i, j].axis('off')

plt.tight_layout()
plt.show()

In [None]:
# Load the base ResNet50 model without the top layer
base_model=ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

In [None]:
# Add custom layers on top of the base model
x=base_model.output
x=GlobalAveragePooling2D()(x)
x=Dense(512, activation='relu')(x)
x=Dropout(0.5)(x)
x=Dense(256, activation='relu')(x)
x=Dropout(0.5)(x)
predictions=Dense(len(subdirectories), activation='softmax')(x)

### Model Evaluation

In [None]:
# Create the complete model
model=Model(inputs=base_model.input,outputs=predictions)

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=20, validation_data=(X_test, y_test))

In [None]:
final_train_accuracy=history.history['accuracy'][-1]
final_val_accuracy =history.history['val_accuracy'][-1]
print(f'Final Training Accuracy: {final_train_accuracy:.4f}')
print(f'Final Validation Accuracy: {final_val_accuracy:.4f}')

In [None]:
final_train_loss = history.history['loss'][-1]
final_val_loss = history.history['val_loss'][-1]

print(f"Final Training Loss: {final_train_loss:.4f}")
print(f"Final Validation Loss: {final_val_loss:.4f}")

In [None]:
# Prepare to identify correct predictions
correct_images_per_dir={subdir: [] for subdir in subdirectories}

In [None]:
# Predict and identify correct images
for subdir in subdirectories:
    subdir_images=[image for image, label in zip(preprocessed_images, labels) if label==subdir][:50]
    
    if len(subdir_images)==0:
        continue

    subdir_images=np.array(subdir_images)
    predictions=model.predict(subdir_images)
    
    for image, prediction in zip(subdir_images, predictions):
        predicted_label=label_encoder.inverse_transform([np.argmax(prediction)])[0]
        if predicted_label==subdir:
            correct_images_per_dir[subdir].append(image)
        if len(correct_images_per_dir[subdir])>=5:
            break