In [19]:
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
import pandas as pd 
# Set up paths
images_dir = r"B:\sap\Capstone project\thyroid\data"
data = pd.read_excel(r"B:\sap\Capstone project\Thyroid Cancer Risk Prediction\thyroid_data_img.xlsx")
# Split the data into training and validation sets
train_data, validate_data = train_test_split(data, test_size=0.2, random_state=42)

# Image dimensions and batch size
img_height, img_width = 224, 224
batch_size = 32

# Data generators
datagen = ImageDataGenerator(
    rescale=1.0 / 255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

train_generator = datagen.flow_from_dataframe(
    train_data,
    directory=images_dir,
    x_col="ImageID",
    y_col="CancerRisk",
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode="raw"  # Raw values for regression
)

validate_generator = datagen.flow_from_dataframe(
    validate_data,
    directory=images_dir,
    x_col="ImageID",
    y_col="CancerRisk",
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode="raw"
)

# Build the CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation="relu", input_shape=(img_height, img_width, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation="relu"),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation="relu"),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation="relu"),
    Dropout(0.5),
    Dense(1, activation="linear")  # Output cancer risk percentage
])

model.compile(optimizer="adam", loss="mean_squared_error", metrics=["mae"])

# Train the model
history = model.fit(
    train_generator,
    validation_data=validate_generator,
    epochs=20
)

# Save the trained model
model.save("thyroid_cancer_risk_model.h5")

# Predict cancer risk for new images
from tensorflow.keras.models import load_model

model = load_model("thyroid_cancer_risk_model.h5")

# Directory with new images for prediction
predict_images_dir = r"B:\sap\Capstone project\Thyroid Cancer Risk Prediction\test\first_stage"
predict_data = pd.DataFrame({
    'ImageID': os.listdir(predict_images_dir)  # Assumes all images in directory need predictions
})

predict_generator = datagen.flow_from_dataframe(
    predict_data,
    directory=predict_images_dir,
    x_col="ImageID",
    target_size=(img_height, img_width),
    batch_size=1,
    class_mode=None,  # No labels for prediction
    shuffle=False
)

# Predict and save results
predictions = model.predict(predict_generator)
predict_data["PredictedCancerRisk"] = predictions
predict_data.to_excel("predictions.xlsx", index=False)


Found 83 validated image filenames.
Found 34 validated image filenames.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  self._warn_if_super_not_called()


Epoch 1/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1s/step - loss: 15.7828 - mae: 2.3200 - val_loss: 0.7729 - val_mae: 0.8460
Epoch 2/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 782ms/step - loss: 0.6501 - mae: 0.7239 - val_loss: 0.6994 - val_mae: 0.8016
Epoch 3/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 753ms/step - loss: 0.6074 - mae: 0.6935 - val_loss: 0.6135 - val_mae: 0.7459
Epoch 4/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1s/step - loss: 0.5064 - mae: 0.6290 - val_loss: 0.5530 - val_mae: 0.7038
Epoch 5/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1s/step - loss: 0.4578 - mae: 0.6056 - val_loss: 0.4720 - val_mae: 0.6435
Epoch 6/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 978ms/step - loss: 0.3732 - mae: 0.5284 - val_loss: 0.3471 - val_mae: 0.5359
Epoch 7/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 988ms/step - loss: 0.3155 - mae: 0.



Found 14 validated image filenames.
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step


In [20]:
# Save the trained model
model.save("cnn_img_model.h5")

