In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
DATASET_PATH = "/content/drive/MyDrive/AneRBC-I"


In [33]:
import os

# List directories inside aneRBC-1
print("Dataset Structure:")
for folder in os.listdir(DATASET_PATH):
    print(f"- {folder}")
    sub_folder_path = os.path.join(DATASET_PATH, folder)
    if os.path.isdir(sub_folder_path):
        for sub_folder in os.listdir(sub_folder_path):
            print(f"  - {sub_folder}")


Dataset Structure:
- .DS_Store
- Healthy_individuals
  - .DS_Store
  - Binary_segmented
  - Morphology_reports
  - CBC_reports
  - Original_images
  - RGB_segmented
- Anemic_individuals
  - .DS_Store
  - CBC_reports
  - Original_images
  - Binary_segmented
  - Morphology_reports
  - RGB_segmented
- combined_cbc_reports_output.csv
- train
  - No_Anemia
  - Mild_Anemia
  - Moderate_Anemia
  - Severe_Anemia
- anemic_cbc_reports.csv


In [None]:
import cv2
import numpy as np
import os
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [42]:
import os
import pandas as pd
import re

# Define folder path for anemic individuals' CBC reports
folder_path = "/content/drive/MyDrive/AneRBC-I/Anemic_individuals/CBC_reports"

# List all TXT files in the folder (Only files with "_A" in the name)
txt_files = [f for f in os.listdir(folder_path) if f.endswith(".txt") and "_a" in f]
print(f"Found {len(txt_files)} anemic CBC report files.")

# Initialize a list to store structured data
cbc_data_list = []

# Define a regex pattern to extract numerical values
value_pattern = re.compile(r"([\d.]+)")

# Columns to extract
required_columns = ["Image_Path", "WBC", "RBC", "HGB", "HCT", "MCV",
                    "MCH", "MCHC", "RDW", "PLT", "MPV"]

# Read each file
for txt_file in txt_files:
    file_path = os.path.join(folder_path, txt_file)

    # Open and read file content
    with open(file_path, "r") as file:
        lines = file.readlines()

    # Dictionary to store extracted values
    patient_data = {"Image_Path": txt_file.replace(".txt", ".png")}  # Convert to image filename format

    for line in lines:
        parts = line.strip().split(",")  # CSV-like structure
        if len(parts) >= 2:  # Ensure the line contains test data
            test_name = parts[0].strip()  # Test name (e.g., "WBC", "RBC")
            result = parts[1].strip()  # Result value (e.g., "6.44 x10.e 3/μl")

            # Extract only the numerical value using regex
            match = value_pattern.search(result)
            if match and test_name in required_columns:
                patient_data[test_name] = match.group(1)  # Store extracted number

    # Add patient data to list
    cbc_data_list.append(patient_data)

# Convert to DataFrame with only required columns
cbc_df = pd.DataFrame(cbc_data_list, columns=required_columns)

# Save to CSV
csv_path = "/content/drive/MyDrive/AneRBC-I/anemic_cbc_reports.csv"
cbc_df.to_csv(csv_path, index=False)

print(f" Structured Anemic CBC data saved to {csv_path}")
cbc_df.head()  # Display first few rows


Found 527 anemic CBC report files.
✅ Structured Anemic CBC data saved to /content/drive/MyDrive/AneRBC-I/anemic_cbc_reports.csv


Unnamed: 0,Image_Path,WBC,RBC,HGB,HCT,MCV,MCH,MCHC,RDW,PLT,MPV
0,416_a.png,7.89,5.0,14.6,42.6,85.2,29.2,34.3,,246,12.0
1,058_a.png,7.27,4.84,8.2,28.9,59.7,16.9,28.4,,425,
2,149_a.png,10.18,4.85,11.6,38.5,79.4,23.9,30.1,,388,9.4
3,116_a.png,5.91,4.81,9.4,29.9,62.2,19.5,31.4,,248,
4,425_a.png,7.61,5.92,16.3,54.4,91.9,27.5,30.0,,219,11.7


Step 1: Load & Preprocess Images

In [10]:
import os

classes = ["Healthy_individuals/RGB_segmented", "Anemic_individuals/RGB_segmented"]  #  RBC images path

for cls in classes:
    folder_path = os.path.join(DATASET_PATH, cls)
    num_images = len([f for f in os.listdir(folder_path) if f.endswith(('.png', '.jpg', '.jpeg'))])
    print(f"{cls}: {num_images} images")


Healthy_individuals/RGB_segmented: 500 images
Anemic_individuals/RGB_segmented: 510 images


In [None]:
import os
import numpy as np
import cv2
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split

# Update paths to include the base dataset path
DATASET_PATH = "/content/drive/MyDrive/AneRBC-I"  # Assuming this is your base path
healthy_path = os.path.join(DATASET_PATH, "Healthy_individuals/RGB_segmented/")
anemic_path = os.path.join(DATASET_PATH, "Anemic_individuals/RGB_segmented/")

# Load images and labels
def load_images_from_folder(folder, label):
    images = []
    labels = []
    for filename in os.listdir(folder):
        img_path = os.path.join(folder, filename)
        img = cv2.imread(img_path)
        if img is not None:
            img = cv2.resize(img, (224, 224))  # Resize
            img = img / 255.0  # Normalize
            images.append(img)
            labels.append(label)
    return images, labels

# Load Healthy (Label = 0) and Anemic (Label = 1)
healthy_images, healthy_labels = load_images_from_folder(healthy_path, 0)
anemic_images, anemic_labels = load_images_from_folder(anemic_path, 1)

# Combine dataset
X = np.array(healthy_images + anemic_images)
y = np.array(healthy_labels + anemic_labels)

# Split into Train & Validation
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print(f" Loaded {len(X_train)} train images and {len(X_val)} validation images.")

✅ Loaded 808 train images and 202 validation images.


In [16]:
from tensorflow import keras
import cv2
import numpy as np

# Load the saved model
model = keras.models.load_model("/content/drive/MyDrive/anemia_classifier.h5")

print(" Model loaded successfully!")




 Model loaded successfully!


In [17]:
def preprocess_image(image_path):
    img = cv2.imread(image_path)  # Read the image
    img = cv2.resize(img, (224, 224))  # Resize to match model input size
    img = img / 255.0  # Normalize pixel values
    img = np.expand_dims(img, axis=0)  # Expand dimensions for model input
    return img


In [21]:
def classify_image(image_path):
    img = preprocess_image(image_path)  # Preprocess image
    prediction = model.predict(img)[0][0]  # Get prediction

    # Convert probability to class
    if prediction >= 0.5:
        print(" The image is classified as Anemic.")
    else:
        print("The image is classified as Healthy.")

# Example usage:
image_path = "/content/008_a.png"  # Replace with your image path
classify_image(image_path)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
 The image is classified as Anemic.


In [23]:
predictions = model.predict(X_val)  # Get probability scores
print(predictions[:10])  # Print first 10 predicted probabilities


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 836ms/step
[[0.501469  ]
 [0.5014688 ]
 [0.5014776 ]
 [0.5014703 ]
 [0.5014688 ]
 [0.5014688 ]
 [0.5014688 ]
 [0.5014719 ]
 [0.5014688 ]
 [0.50146925]]


In [25]:
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Load MobileNetV2 Pre-Trained Model (without top layers)
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze all layers (so it doesn’t lose pre-trained knowledge)
for layer in base_model.layers:
    layer.trainable = False

# Add Custom Layers for Classification
x = Flatten()(base_model.output)
x = Dense(128, activation='relu')(x)
x = Dropout(0.3)(x)  # Prevent overfitting
x = Dense(1, activation='sigmoid')(x)  # Binary Classification (Healthy vs Anemic)

# Create Final Model
model = Model(inputs=base_model.input, outputs=x)

# Compile the Model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the Model
model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=20, batch_size=16)

# Save the Model
model.save("/content/drive/MyDrive/anemia_classifier_mobilenet.h5")
print(" Pre-Trained Model Saved Successfully!")


Epoch 1/20
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 992ms/step - accuracy: 0.5610 - loss: 5.5145 - val_accuracy: 0.6931 - val_loss: 0.5784
Epoch 2/20
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 924ms/step - accuracy: 0.7292 - loss: 0.5369 - val_accuracy: 0.7723 - val_loss: 0.5627
Epoch 3/20
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 945ms/step - accuracy: 0.7532 - loss: 0.5579 - val_accuracy: 0.7574 - val_loss: 0.4608
Epoch 4/20
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 1s/step - accuracy: 0.7814 - loss: 0.4319 - val_accuracy: 0.7574 - val_loss: 0.4512
Epoch 5/20
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 1s/step - accuracy: 0.7919 - loss: 0.4215 - val_accuracy: 0.7970 - val_loss: 0.4733
Epoch 6/20
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 950ms/step - accuracy: 0.8450 - loss: 0.3459 - val_accuracy: 0.8119 - val_loss: 0.4084
Epoch 7/20
[1m51/51[0m [3



 Pre-Trained Model Saved Successfully!


In [27]:
from tensorflow.keras.models import load_model

# Load the saved model
model = load_model("/content/drive/MyDrive/anemia_classifier_mobilenet.h5")

print(" Model loaded successfully!")




 Model loaded successfully!


In [28]:
import cv2
import numpy as np

def preprocess_image(image_path):
    img = cv2.imread(image_path)  # Read image
    img = cv2.resize(img, (224, 224))  # Resize to match model input
    img = img / 255.0  # Normalize pixel values
    img = np.expand_dims(img, axis=0)  # Add batch dimension
    return img


In [31]:
def predict_anemia(image_path):
    img = preprocess_image(image_path)  # Preprocess the image
    prediction = model.predict(img)[0][0]  # Get prediction score

    # Threshold at 0.5 (since it's binary classification)
    if prediction > 0.5:
        result = "Anemic "
    else:
        result = "Healthy "

    return result, prediction

# Example usage:
image_path = "/content/008_a.png"  # Change to your test image path
result, confidence = predict_anemia(image_path)
print(f"Prediction: {result} (Confidence: {confidence:.4f})")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step
Prediction: Anemic  (Confidence: 0.9952)


In [43]:
import pandas as pd
import numpy as np

# Load the CBC report CSV
csv_path = "/content/drive/MyDrive/AneRBC-I/anemic_cbc_reports.csv"
cbc_data = pd.read_csv(csv_path)

# Select only numeric columns for calculating the mean
numeric_columns = cbc_data.select_dtypes(include=np.number).columns

# Handle missing values (fill with mean of numeric columns only)
cbc_data[numeric_columns] = cbc_data[numeric_columns].fillna(cbc_data[numeric_columns].mean())

In [48]:
print(cbc_data)

    Image_Path    WBC   RBC        HGB   HCT   MCV   MCH  MCHC  RDW     PLT  \
0    416_a.png   7.89  5.00  14.600000  42.6  85.2  29.2  34.3  NaN   246.0   
1    058_a.png   7.27  4.84   8.200000  28.9  59.7  16.9  28.4  NaN   425.0   
2    149_a.png  10.18  4.85  11.600000  38.5  79.4  23.9  30.1  NaN   388.0   
3    116_a.png   5.91  4.81   9.400000  29.9  62.2  19.5  31.4  NaN   248.0   
4    425_a.png   7.61  5.92  16.300000  54.4  91.9  27.5  30.0  NaN   219.0   
..         ...    ...   ...        ...   ...   ...   ...   ...  ...     ...   
522  248_a.png  10.74  4.66  10.600000  33.9  72.7  22.7  31.3  NaN   429.0   
523  302_a.png   9.89  3.42  10.285117  21.5  62.9  16.4  26.0  NaN  1503.0   
524  283_a.png   1.74  2.80   6.100000  20.3  72.5  21.8  30.0  NaN    32.0   
525  326_a.png  14.56  4.17  10.285117  30.6  73.4  20.1  27.5  NaN   344.0   
526  343_a.png  11.67  4.33  10.285117  23.7  54.7  15.9  29.1  NaN   489.0   

           MPV  
0    12.000000  
1    12.358368  


In [56]:
import pandas as pd
import os

# Load the CBC dataset
csv_path = "/content/drive/MyDrive/AneRBC-I/anemic_cbc_reports.csv"
cbc_data = pd.read_csv(csv_path)

# Ensure 'Image_Path' column is treated as a string
cbc_data["Image_Path"] = cbc_data["Image_Path"].astype(str).str.strip()

# 🔹 Path of the image passed to the pretrained model
full_image_path = "/content/test_images/058_a.png"  # Example, replace dynamically

# 🔹 Extract filename from the full path
uploaded_image_name = os.path.basename(full_image_path)  # Extracts '058_a.png'

# 🔹 Match image name exactly
cbc_row = cbc_data[cbc_data["Image_Path"] == uploaded_image_name]

# Check if CBC report is found
if cbc_row.empty:
    print(f" CBC report not found for {uploaded_image_name}.")
else:
    print(f" CBC report found for {uploaded_image_name}:\n", cbc_row)

    # Extract CBC values
    hgb = cbc_row["HGB"].values[0]
    hct = cbc_row["HCT"].values[0]
    mcv = cbc_row["MCV"].values[0]
    mch = cbc_row["MCH"].values[0]
    mchc = cbc_row["MCHC"].values[0]
    rdw = cbc_row["RDW"].values[0]
    rbc = cbc_row["RBC"].values[0]
    plt = cbc_row["PLT"].values[0]

    # 🔹 Function to classify anemia severity
    def classify_anemia(hgb, hct, mcv, mch, mchc, rdw, rbc, plt):
        # Default classification
        severity = "Unknown"

        # 1️⃣ No Anemia
        if hgb >= 12 and hct >= 36 and 80 <= mcv <= 100:
            severity = "No Anemia"

        # 2️⃣ Mild Anemia
        elif 10 <= hgb < 12 and hct < 36:
            severity = "Mild Anemia"

        # 3️⃣ Moderate Anemia
        elif 8 <= hgb < 10 or (hct < 30 and mcv < 80 and rbc < 4.0):
            severity = "Moderate Anemia"

        # 4️⃣ Severe Anemia
        elif hgb < 8 or (hct < 25 and rdw > 15 and plt < 100):
            severity = "Severe Anemia"

        return severity

    # 🔹 Predict severity
    anemia_severity = classify_anemia(hgb, hct, mcv, mch, mchc, rdw, rbc, plt)
    print()

    print(f" Predicted Anemia Severity for {uploaded_image_name}: {anemia_severity}")


 CBC report found for 058_a.png:
   Image_Path   WBC   RBC  HGB   HCT   MCV   MCH  MCHC  RDW    PLT  MPV
1  058_a.png  7.27  4.84  8.2  28.9  59.7  16.9  28.4  NaN  425.0  NaN

 Predicted Anemia Severity for 058_a.png: Moderate Anemia
