In [None]:
!unzip /content/PCOS.zip

Archive:  /content/PCOS.zip
   creating: PCOS/infected/
  inflating: PCOS/infected/img_0_1023.jpg  
  inflating: PCOS/infected/img_0_1033.jpg  
  inflating: PCOS/infected/img_0_1071.jpg  
  inflating: PCOS/infected/img_0_1075.jpg  
  inflating: PCOS/infected/img_0_1079.jpg  
  inflating: PCOS/infected/img_0_1083.jpg  
  inflating: PCOS/infected/img_0_1087.jpg  
  inflating: PCOS/infected/img_0_1088.jpg  
  inflating: PCOS/infected/img_0_1099.jpg  
  inflating: PCOS/infected/img_0_113.jpg  
  inflating: PCOS/infected/img_0_1130.jpg  
  inflating: PCOS/infected/img_0_1141.jpg  
  inflating: PCOS/infected/img_0_1142.jpg  
  inflating: PCOS/infected/img_0_1182.jpg  
  inflating: PCOS/infected/img_0_1195.jpg  
  inflating: PCOS/infected/img_0_1223.jpg  
  inflating: PCOS/infected/img_0_1226.jpg  
  inflating: PCOS/infected/img_0_1234.jpg  
  inflating: PCOS/infected/img_0_1257.jpg  
  inflating: PCOS/infected/img_0_126.jpg  
  inflating: PCOS/infected/img_0_1260.jpg  
  inflating: PCOS/infe

In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout, Input, Concatenate
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Count number of images in each directory
def count_images(directory):
    infected_count = len(os.listdir(os.path.join(directory, 'infected')))
    non_infected_count = len(os.listdir(os.path.join(directory, 'non_infected')))
    return infected_count + non_infected_count

# Generate synthetic clinical data
def generate_clinical_data(n_samples):
    np.random.seed(42)

    data = {
        'Age': np.random.normal(28, 5, n_samples),
        'BMI': np.random.normal(27, 5, n_samples),
        'Cycle_length': np.random.normal(35, 10, n_samples),
        'Weight_gain': np.random.choice([0, 1], n_samples, p=[0.3, 0.7]),
        'Hair_growth': np.random.choice([0, 1], n_samples, p=[0.4, 0.6]),
        'Skin_darkening': np.random.choice([0, 1], n_samples, p=[0.6, 0.4]),
        'Hair_loss': np.random.choice([0, 1], n_samples, p=[0.5, 0.5]),
        'Acne': np.random.choice([0, 1], n_samples, p=[0.4, 0.6]),
        'FSH': np.random.normal(6.5, 2, n_samples),
        'LH': np.random.normal(12, 5, n_samples),
        'FSH_LH_ratio': np.random.normal(0.5, 0.2, n_samples),
        'TSH': np.random.normal(2.5, 1, n_samples),
        'AMH': np.random.normal(8, 3, n_samples),
        'PRL': np.random.normal(15, 5, n_samples),
        'Testosterone': np.random.normal(60, 20, n_samples),
        'Glucose': np.random.normal(100, 20, n_samples),
        'Insulin': np.random.normal(15, 8, n_samples)
    }

    df = pd.DataFrame(data)

    def determine_pcos(row):
        score = 0
        if row['BMI'] > 25: score += 1
        if row['Cycle_length'] > 35: score += 1
        if row['Weight_gain'] == 1: score += 1
        if row['Hair_growth'] == 1: score += 1
        if row['FSH_LH_ratio'] < 1: score += 1
        if row['Testosterone'] > 70: score += 1
        if row['AMH'] > 4: score += 1
        if row['Insulin'] > 20: score += 1
        return 1 if score >= 4 else 0

    df['label'] = df.apply(determine_pcos, axis=1)
    return df

# Set paths and parameters
train_dir = '/content/PCOS'
BATCH_SIZE = 32
IMG_SIZE = (224, 224)

# Generate clinical data
total_images = count_images(train_dir)
clinical_data = generate_clinical_data(total_images)
print("Generated clinical data shape:", clinical_data.shape)

# Prepare clinical data
X_clinical = clinical_data.drop(['label'], axis=1).values
y_clinical = clinical_data['label'].values

# Split and standardize clinical data
X_train_clinical, X_val_clinical, y_train_clinical, y_val_clinical = train_test_split(
    X_clinical, y_clinical, test_size=0.2, random_state=42
)

scaler = StandardScaler()
X_train_clinical = scaler.fit_transform(X_train_clinical)
X_val_clinical = scaler.transform(X_val_clinical)

# Image data preparation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Create generators
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='training',
    classes=['non_infected', 'infected'],
    shuffle=True
)

validation_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='validation',
    classes=['non_infected', 'infected'],
    shuffle=False
)

# Hybrid data generator
class HybridDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, image_generator, clinical_data, batch_size):
        self.image_generator = image_generator
        self.clinical_data = clinical_data
        self.batch_size = batch_size

    def __len__(self):
        return len(self.image_generator)

    def __getitem__(self, idx):
        x_image, y = self.image_generator[idx]
        indices = np.random.randint(0, len(self.clinical_data), size=x_image.shape[0])
        x_clinical = self.clinical_data[indices]
        return (x_image, x_clinical), y  # Return tuple instead of list

# Build the model
def create_hybrid_model(clinical_features):
    # Image branch
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(*IMG_SIZE, 3))
    for layer in base_model.layers:
        layer.trainable = False

    image_input = Input(shape=(*IMG_SIZE, 3))
    x = base_model(image_input)
    x = Flatten()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    image_output = Dense(128, activation='relu')(x)

    # Clinical data branch
    clinical_input = Input(shape=(clinical_features,))
    y = Dense(64, activation='relu')(clinical_input)
    y = Dropout(0.5)(y)

    # Combine branches
    combined = Concatenate()([image_output, y])
    z = Dense(128, activation='relu')(combined)
    z = Dropout(0.5)(z)
    final_output = Dense(1, activation='sigmoid')(z)

    model = Model(inputs=[image_input, clinical_input], outputs=final_output)
    return model

# Create and compile model
model = create_hybrid_model(X_clinical.shape[1])
model.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Create hybrid generators
train_hybrid_gen = HybridDataGenerator(train_generator, X_train_clinical, BATCH_SIZE)
val_hybrid_gen = HybridDataGenerator(validation_generator, X_val_clinical, BATCH_SIZE)

# Callbacks
callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    ModelCheckpoint('best_pcos_model.keras', save_best_only=True, monitor='val_loss', mode='min')
]

# Train the model
history = model.fit(
    train_hybrid_gen,
    validation_data=val_hybrid_gen,
    epochs=50,
    callbacks=callbacks
)

# Evaluate the model
val_loss, val_accuracy = model.evaluate(val_hybrid_gen)
print(f'Validation Loss: {val_loss:.4f}')
print(f'Validation Accuracy: {val_accuracy:.4f}')

# Save the model
model.save('final_pcos_model.keras')

# Plot training history
plt.figure(figsize=(12, 4))

# Plot training & validation accuracy
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

# Plot training & validation loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

plt.tight_layout()
plt.savefig('training_history.png')
plt.close()

print("Training completed. Model and training history have been saved.")


Generated clinical data shape: (1924, 18)
Found 1540 images belonging to 2 classes.
Found 384 images belonging to 2 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/50


  self._warn_if_super_not_called()


[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 660ms/step - accuracy: 0.7457 - loss: 0.5845 - val_accuracy: 1.0000 - val_loss: 0.0015
Epoch 2/50
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 456ms/step - accuracy: 0.9815 - loss: 0.0558 - val_accuracy: 1.0000 - val_loss: 0.0017
Epoch 3/50
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 488ms/step - accuracy: 0.9858 - loss: 0.0334 - val_accuracy: 1.0000 - val_loss: 4.8742e-04
Epoch 4/50
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 469ms/step - accuracy: 0.9959 - loss: 0.0169 - val_accuracy: 1.0000 - val_loss: 1.9566e-04
Epoch 5/50
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 451ms/step - accuracy: 0.9931 - loss: 0.0195 - val_accuracy: 1.0000 - val_loss: 4.6090e-04
Epoch 6/50
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 469ms/step - accuracy: 0.9969 - loss: 0.0092 - val_accuracy: 1.0000 - val_loss: 5.4399e-05
Epoch 7/50
[1m49

In [None]:
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array

def predict_pcos_with_image_and_medical_logic(model, scaler, img_size=(224, 224)):
    """
    Predict PCOS outcome using both image and clinical data, with cross-verification
    and warnings based on medical value thresholds.

    Args:
        model (tf.keras.Model): Trained hybrid model.
        scaler (StandardScaler): Scaler for clinical data normalization.
        img_size (tuple): Target size for resizing images.

    Returns:
        None
    """
    # Clinical thresholds for warnings (based on medical references)
    thresholds = {
        "age_min": 18,
        "age_max": 45,
        "bmi_max": 25,  # Above 25 indicates overweight
        "cycle_length_max": 35,  # Longer cycles may indicate irregularity
        "fsh_min": 3.5,  # Below 3.5 can be low
        "fsh_max": 10.0,  # Above 10 can be high
        "lh_max": 15.0,  # Above 15 can be high
        "fsh_lh_ratio_min": 0.5,  # Below 0.5 can indicate imbalance
        "amh_max": 6.8,  # Above 6.8 indicates ovarian reserve issues
        "tsh_max": 4.5,  # Above 4.5 indicates thyroid issues
        "prl_max": 20.0,  # Above 20 indicates high prolactin levels
        "testosterone_max": 70.0,  # Above 70 indicates hyperandrogenism
        "glucose_max": 125.0,  # Above 125 indicates hyperglycemia
        "insulin_max": 25.0,  # Above 25 indicates insulin resistance
    }

    # Collect clinical data inputs from the user
    print(".....Initializing Polycystic Ovary Syndrome Analysis.....")
    print("Enter the following clinical data:")
    age = float(input("Age (e.g., 25-30): "))
    bmi = float(input("BMI (e.g., 20-30): "))
    cycle_length = float(input("Cycle Length (e.g., 28-40): "))
    weight_gain = int(input("Weight Gain (1 for Yes, 0 for No): "))
    hair_growth = int(input("Hair Growth (1 for Yes, 0 for No): "))
    skin_darkening = int(input("Skin Darkening (1 for Yes, 0 for No): "))
    hair_loss = int(input("Hair Loss (1 for Yes, 0 for No): "))
    acne = int(input("Acne (1 for Yes, 0 for No): "))
    fsh = float(input("FSH (e.g., 5-10): "))
    lh = float(input("LH (e.g., 10-15): "))
    fsh_lh_ratio = float(input("FSH_LH Ratio (e.g., 0.5-1.0): "))
    tsh = float(input("TSH (e.g., 2.0-3.0): "))
    amh = float(input("AMH (e.g., 4-10): "))
    prl = float(input("PRL (e.g., 10-20): "))
    testosterone = float(input("Testosterone (e.g., 50-80): "))
    glucose = float(input("Glucose (e.g., 90-120): "))
    insulin = float(input("Insulin (e.g., 10-25): "))

    # Clinical data as a single test sample
    clinical_data_point = [
        age, bmi, cycle_length, weight_gain, hair_growth,
        skin_darkening, hair_loss, acne, fsh, lh, fsh_lh_ratio,
        tsh, amh, prl, testosterone, glucose, insulin
    ]

    # Medical logic for warnings
    warnings = []
    pcos_likelihood = 0  # Counter for clinical conditions indicating PCOS

    if bmi > thresholds["bmi_max"]:
        warnings.append("BMI is above 25, indicating potential overweight or obesity.")
        pcos_likelihood += 1
    if cycle_length > thresholds["cycle_length_max"]:
        warnings.append("Cycle length exceeds 35 days, which may indicate irregular periods.")
        pcos_likelihood += 1
    if fsh < thresholds["fsh_min"] or fsh > thresholds["fsh_max"]:
        warnings.append("FSH levels are abnormal. Consult a doctor.")
        pcos_likelihood += 1
    if lh > thresholds["lh_max"]:
        warnings.append("LH levels are high, which may indicate hormonal imbalance.")
        pcos_likelihood += 1
    if fsh_lh_ratio < thresholds["fsh_lh_ratio_min"]:
        warnings.append("FSH_LH ratio is below 0.5, which may indicate PCOS.")
        pcos_likelihood += 1
    if amh > thresholds["amh_max"]:
        warnings.append("AMH is above normal, indicating potential ovarian issues.")
        pcos_likelihood += 1
    if testosterone > thresholds["testosterone_max"]:
        warnings.append("Testosterone is above normal, indicating hyperandrogenism.")
        pcos_likelihood += 1
    if insulin > thresholds["insulin_max"]:
        warnings.append("Insulin levels are high, indicating insulin resistance.")
        pcos_likelihood += 1

    # Display warnings if any
    if warnings:
        print("\n*** Medical Warnings ***")
        for warning in warnings:
            print(f"- {warning}")
        print("Please consult a healthcare provider for further evaluation.\n")

    # Get image location from the user
    image_path = input("Enter the path to the test image (e.g., /content/image.jpg): ")

    # Process the image
    img = load_img(image_path, target_size=img_size)
    img_array = img_to_array(img) / 255.0  # Normalize the image
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension

    # Scale clinical data
    clinical_data_point = np.array([clinical_data_point])  # Add batch dimension
    clinical_data_point = scaler.transform(clinical_data_point)

    # Perform prediction using the model
    prediction = model.predict([img_array, clinical_data_point])
    predicted_class = int(prediction[0, 0] > 0.5)  # Extract scalar value explicitly
    prediction_probability = float(prediction[0, 0])  # Extract scalar value explicitly

    # Cross-verify predictions
    print("\n*** Prediction Results ***")
    print(f"Image-Based Prediction: {'PCOS-Positive' if predicted_class == 1 else 'PCOS-Negative'}")
    print(f"Prediction Probability: {prediction_probability:.4f}")

    if pcos_likelihood >= 4 and predicted_class == 0:
        print("\n*** Inconsistent Results ***")
        print("Clinical data suggests a high likelihood of PCOS, but the image-based model predicts otherwise.")
        print("This may indicate that the condition is not visually evident, or further testing is needed.")
    elif pcos_likelihood < 4 and predicted_class == 1:
        print("\n*** Inconsistent Results ***")
        print("The image-based model predicts PCOS, but clinical data indicates a low likelihood.")
        print("Consider consulting a healthcare provider for further evaluation.")

    print("\n*** Final Recommendation ***")
    if pcos_likelihood >= 4 or predicted_class == 1:
        print("There are indications of PCOS. Please consult a healthcare provider for confirmation.")
    else:
        print("No significant indications of PCOS detected. Maintain regular health check-ups.")

# Example usage
# Ensure model and scaler are already loaded or defined
# Replace `model` and `scaler` with the appropriate variables from your trained setup
predict_pcos_with_image_and_medical_logic(model=model, scaler=scaler)


.....Initializing Polycystic Ovary Syndrome Analysis.....
Enter the following clinical data:
Age (e.g., 25-30): 26
BMI (e.g., 20-30): 24
Cycle Length (e.g., 28-40): 32
Weight Gain (1 for Yes, 0 for No): 0
Hair Growth (1 for Yes, 0 for No): 1
Skin Darkening (1 for Yes, 0 for No): 1
Hair Loss (1 for Yes, 0 for No): 0
Acne (1 for Yes, 0 for No): 1
FSH (e.g., 5-10): 9
LH (e.g., 10-15): 12
FSH_LH Ratio (e.g., 0.5-1.0): 08
TSH (e.g., 2.0-3.0): 2.5
AMH (e.g., 4-10): 6
PRL (e.g., 10-20): 15
Testosterone (e.g., 50-80): 75
Glucose (e.g., 90-120): 110
Insulin (e.g., 10-25): 13

- Testosterone is above normal, indicating hyperandrogenism.
Please consult a healthcare provider for further evaluation.

Enter the path to the test image (e.g., /content/image.jpg): /content/PCOS/non_infected/img_0_1022.jpg
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step

*** Prediction Results ***
Image-Based Prediction: PCOS-Negative
Prediction Probability: 0.0000

*** Final Recommendation ***
N