In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Define labels
labels = ["squamous.cell.carcinoma", "normal", "adenocarcinoma", "large.cell.carcinoma"]

# Initialize an empty list to store the counts
test_list = []

# Iterate through the labels and retrieve the counts from the test_set dictionary
for label in labels:
    count = test_set.get(label, 0)
    # Get the count for the label or 0 if label not found
    test_list.append(count)

# Convert test_list to numpy array
test_list = np.array(test_list)

x = np.arange(len(labels))
width = 0.25

fig, ax = plt.subplots()
test_bar = ax.bar(x, test_list, width, label="Test")

ax.set_ylabel('# of Images')
ax.set_title('Chest CT Scan Dataset')
ax.set_xticks(x)
ax.set_xticklabels(labels, rotation=15)  # Set labels with rotation
ax.legend()

# Add labels to the bars
for bar in test_bar:
    height = bar.get_height()
    ax.annotate('{}'.format(height),
                xy=(bar.get_x() + bar.get_width() / 2, height),
                xytext=(0, 3),  # 3 points vertical offset
                textcoords="offset points",
                ha='center', va='bottom')
# Create an instance of ImageDataGenerator for the test dataset
test_datagen = ImageDataGenerator()

# Use flow_from_directory to generate a DirectoryIterator for the test dataset
test_generator = test_datagen.flow_from_directory(
    directory=os.path.join(dataset_dir, "test"),
    target_size=(224, 224),
    batch_size=1,  # set batch size to 1 to count files individually
    class_mode='categorical',  # set class_mode to categorical
    shuffle=False
)
# Similarly, create DirectoryIterators for the train and validation datasets
train_generator = test_datagen.flow_from_directory(
    directory=os.path.join(dataset_dir, "train"),
    target_size=(224, 224),
    batch_size=1,
    class_mode='categorical',
    shuffle=False
)

val_generator = test_datagen.flow_from_directory(
    directory=os.path.join(dataset_dir, "valid"),
    target_size=(224, 224),
    batch_size=1,
    class_mode='categorical',
    shuffle=False
)

# Get the number of samples for each class in the test dataset
test_class_counts = test_generator.classes.sum(axis=0)

# Get the number of samples for each class in the train and validation datasets
train_class_counts = train_generator.classes.sum(axis=0)
val_class_counts = val_generator.classes.sum(axis=0)

print("Test dataset class counts:", test_class_counts)
print("Train dataset class counts:", train_class_counts)
print("Validation dataset class counts:", val_class_counts)
# Function to get the number of files for each class in a specified directory
def get_file_count(directory):
    counts = []
    for label in class_labels:
        class_dir = os.path.join(directory, label)
        if os.path.exists(class_dir):
            counts.append(len(os.listdir(class_dir)))
        else:
            counts.append(0)
    return counts

# Get the number of files for each class in the test dataset
test_counts = get_file_count(os.path.join(dataset_dir, "test"))

# Get the number of files for each class in the train dataset
train_counts = get_file_count(os.path.join(dataset_dir, "train"))

# Get the number of files for each class in the validation dataset
val_counts = get_file_count(os.path.join(dataset_dir, "valid"))
# Assuming you have already defined test_generator and test_class_names
test_class_names = ["squamous.cell.carcinoma", "normal", "adenocarcinoma", "large.cell.carcinoma"]

# Assuming you have evaluated the test set and obtained predictions
# Let's create some sample predictions for demonstration
y_true = np.random.randint(0, 4, size=(72,))
y_pred = np.random.randint(0, 4, size=(72,))

plt.figure(figsize=(12, 12))
for i in range(12):
    image, label = next(test_generator)
    ax = plt.subplot(3, 4, i + 1)
    plt.imshow(image[0].astype("uint8"))  # Assuming batch size is 1
    plt.title(f"True: {test_class_names[y_true[i]]}, Pred: {test_class_names[y_pred[i]]}")
    plt.axis("off")
plt.show()
# Initialize empty lists to store data
X, Y = [], []

# Obtain data
obtain_data(X, Y, train_dir)
obtain_data(X, Y, test_dir)
obtain_data(X, Y, val_dir)

# Convert lists to numpy arrays
X = np.array(X)
Y = np.array(Y)

# Encode labels
Y_encoded = np.array([np.argmax(np.array(unique_classes) == label) for label in Y])

# Split data into train, test, and validation sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y_encoded, test_size=0.2, random_state=42)
X_test, X_val, Y_test, Y_val = train_test_split(X_test, Y_test, test_size=0.5, random_state=42)
# Print the shape of the data
print("Train Data:")
print("X_train shape:", X_train.shape)
print("Y_train shape:", Y_train.shape)
print("\nTest Data:")
print("X_test shape:", X_test.shape)
print("Y_test shape:", Y_test.shape)
print("\nValidation Data:")
print("X_val shape:", X_val.shape)
print("Y_val shape:", Y_val.shape)

# Build the model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(256, 256, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(len(np.unique(Y)), activation='softmax')
])

# Compile the model
model.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    metrics=['accuracy']
)

# Train the model
history = model.fit(
    X_train, Y_train,
    epochs=10,
    batch_size=32,
    verbose=1,
    validation_data=(X_val, Y_val)
)

# Evaluate the model on test data
test_loss, test_accuracy = model.evaluate(X_test, Y_test)
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')
def predict(model, img):
    img_array = tf.keras.preprocessing.image.img_to_array(images[i].numpy())
    img_array = tf.expand_dims(img_array, 0)  # create a batch

    predictions = model.predict(img_array)

    predicted_class = test_class_names[np.argmax(predictions[0])]
    confidence = round(100 * (np.max(predictions[0])), 2)
    return predicted_class, confidence
from sklearn.metrics import confusion_matrix
import seaborn as sns

# Predict labels for the test set
Y_pred = model.predict(X_test)
Y_pred_classes = np.argmax(Y_pred, axis=1)

# Compute confusion matrix
conf_matrix = confusion_matrix(Y_test, Y_pred_classes)

# Plot confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
            xticklabels=np.unique(Y), yticklabels=np.unique(Y))
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()
