In [1]:
from torch import cuda
assert cuda.is_available()
assert cuda.device_count() > 0
print(cuda.get_device_name(cuda.current_device()))

NVIDIA GeForce RTX 3060


In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import os
import cv2  # OpenCV for image processing
import numpy as np
from sklearn.model_selection import train_test_split
import seaborn as sns
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.saving import load_model

import warnings
warnings.filterwarnings("ignore")





In [3]:
import os
import pandas as pd

# Path to the main folder
main_folder = "./haemoglobin color level"

# Initialize lists to store image paths and labels
image_paths = []
labels = []

# Iterate through each step folder
for step_folder in range(7, 18): 
    step_path = os.path.join(main_folder, f"step {step_folder}")

    # Extract label from the step folder
    label = float(step_folder)  # Convert step folder number to float for the label
    labels.extend([label] * len(os.listdir(step_path)))  # Repeat the label for each image in the step folder

    # Iterate through images in each step folder
    for image_file in os.listdir(step_path):
        if image_file.endswith(".jpg") or image_file.endswith(".png"):
            # Construct the full path to the image
            image_path = os.path.join(step_path, image_file)
            image_paths.append(image_path)

# Create a DataFrame
df = pd.DataFrame({"Image_Path": image_paths, "Label": labels})

# Display the DataFrame
print(df)


                                            Image_Path  Label
0    ./haemoglobin color level\step 7\1705584266560...    7.0
1    ./haemoglobin color level\step 7\1705584266570...    7.0
2    ./haemoglobin color level\step 7\1705584266579...    7.0
3    ./haemoglobin color level\step 7\1705584266586...    7.0
4    ./haemoglobin color level\step 7\1705584266595...    7.0
..                                                 ...    ...
105    ./haemoglobin color level\step 17\Picture57.png   17.0
106    ./haemoglobin color level\step 17\Picture58.png   17.0
107    ./haemoglobin color level\step 17\Picture59.png   17.0
108    ./haemoglobin color level\step 17\Picture60.png   17.0
109    ./haemoglobin color level\step 17\Picture61.png   17.0

[110 rows x 2 columns]


In [4]:
# Initialize empty lists to store images and labels
images = []
labels = []

In [5]:
# # Loop through the image files in the folder
# for folder in os.listdir(data_folder):
#     for file in folder:
#         if file.endswith('.jpg'):
#             # Load and preprocess the image
#             img = cv2.imread(os.path.join(data_folder/{folder}/{file}))
#             img = cv2.resize(img, (150, 150))  # Resize to a consistent size
#             img = img / 255.0  # Normalize pixel values to [0, 1]
            
#             # Extract the label from the filename (assuming the filename is in a specific format)
#             label = float(file.split('_')[0])  # Modify this line based on your filename format
            
#             # Append the image and label to the lists
#             images.append(img)
#             labels.append(label)

In [8]:
from PIL import Image
import numpy as np
from tqdm import tqdm  # Optional: for progress bar
from sklearn.model_selection import train_test_split

# Read the DataFrame
# df = ...  # Use the DataFrame you generated in the previous steps

# Define the target size for resizing the images
target_size = (224, 224)

# Define a function for image preprocessing
def preprocess_image(image_path, target_size=target_size):
    # Open the image
    img = Image.open(image_path)

    # Resize or crop the image to the target size
    img = img.resize(target_size)

    # Convert the image to a NumPy array
    img_array = np.array(img)

    # Normalize pixel values to be between 0 and 1
    img_array = img_array / 255.0

    return img_array

# Apply the preprocessing function to each image in the DataFrame
image_data = []
max_shape = (0, 0, 0)  # Initialize with a dummy shape

for idx, row in tqdm(df.iterrows(), total=len(df), desc="Processing Images"):
    image_path = row["Image_Path"]
    label = row["Label"]

    # Preprocess the image
    processed_image = preprocess_image(image_path)

    # Update max_shape if the current image has a larger shape
    max_shape = max(max_shape, processed_image.shape)

    # Append the image data and label to the list
    image_data.append((processed_image, label))

print(max_shape)
# Ensure all images have the same shape by resizing them to max_shape
X_resized = np.array([np.resize(item[0], max_shape) for item in image_data])
y = np.array([item[1] for item in image_data])

# Optionally, split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resized, y, test_size=0.2, random_state=42)

# Display the shapes of the arrays
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

Processing Images: 100%|██████████| 110/110 [00:00<00:00, 490.52it/s]


(224, 224, 4)
X_train shape: (88, 224, 224, 4)
y_train shape: (88,)
X_test shape: (22, 224, 224, 4)
y_test shape: (22,)


In [7]:
# X_resized

In [8]:
y

array([ 7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.,  8.,  8.,  8.,
        8.,  8.,  8.,  8.,  8.,  8.,  8.,  9.,  9.,  9.,  9.,  9.,  9.,
        9.,  9.,  9.,  9., 10., 10., 10., 10., 10., 10., 10., 10., 10.,
       10., 11., 11., 11., 11., 11., 11., 11., 11., 11., 11., 12., 12.,
       12., 12., 12., 12., 12., 12., 12., 12., 13., 13., 13., 13., 13.,
       13., 13., 13., 13., 13., 14., 14., 14., 14., 14., 14., 14., 14.,
       14., 14., 15., 15., 15., 15., 15., 15., 15., 15., 15., 15., 16.,
       16., 16., 16., 16., 16., 16., 16., 16., 16., 17., 17., 17., 17.,
       17., 17., 17., 17., 17., 17.])

In [13]:
# Define the CNN model
model = keras.Sequential([
    # Convolutional layers
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 4)),
    layers.MaxPooling2D((2, 2)),
    
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    
    layers.Conv2D(256, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    
    layers.Flatten(),
    
    # Dense layers
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    
    # Output layer (1 neuron for regression)
    layers.Dense(1)
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Display model summary
model.summary()





Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 222, 222, 32)      1184      
                                                                 
 max_pooling2d (MaxPooling2  (None, 111, 111, 32)      0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 109, 109, 64)      18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 54, 54, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 52, 52, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 26, 26, 128)     

In [14]:
# 1. ModelCheckpoint: Save the best model during training based on validation loss
model_checkpoint = ModelCheckpoint(
    'best_model2.h5', save_best_only=True, monitor='val_loss', mode='min')

# 2. EarlyStopping: Stop training if validation loss doesn't improve for a certain number of epochs
early_stopping = EarlyStopping(
    patience=10, monitor='val_loss', mode='min')

In [15]:
# # Convert lists to NumPy arrays
# images = np.array(images)
# labels = np.array(labels)

In [16]:
#sns.heatmap(images[0])

In [17]:
# Split the data into training, validation, and test sets
# train_images, test_images, train_labels, test_labels = train_test_split(images, labels, test_size=0.2, random_state=42)

In [18]:
# Train the model
history = model.fit(
    X_train, y_train,
    epochs=100,  # Adjust the number of epochs as needed
    callbacks=[model_checkpoint, early_stopping],  # Pass the defined callbacks here
)

# Evaluate the model
# test_loss, test_mae = model.evaluate(test_images, test_labels)
# print(f"Test MSE: {test_loss}, Test MAE: {test_mae}")

Epoch 1/100


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 

In [20]:
# model.save("model2.h5")

In [21]:
# Evaluate the model
test_loss, test_mae = model.evaluate(X_test , y_test)
print(f"Test MSE: {test_loss}, Test MAE: {test_mae}")

Test MSE: 0.5178684592247009, Test MAE: 0.565223217010498


In [9]:
model = load_model('model2.h5')





In [10]:
from sklearn.metrics import r2_score

# Assuming you have made predictions using your model, e.g., using model.predict() on val_images
predictions = model.predict(X_test)

# Calculate accuracy
accuracy = r2_score(y_test, predictions)

print(accuracy)


0.9455585739929776


In [11]:
y_test

array([14.,  8.,  7., 15., 13., 13., 10., 11., 16.,  8., 14., 15.,  7.,
       15.,  8., 14., 12., 14., 17., 11.,  8., 10.])

In [12]:
predictions

array([[13.958896 ],
       [ 8.358957 ],
       [ 8.229252 ],
       [15.889011 ],
       [13.773613 ],
       [13.805802 ],
       [10.3189335],
       [10.685987 ],
       [16.261082 ],
       [ 8.19717  ],
       [14.118088 ],
       [15.047202 ],
       [ 8.625523 ],
       [15.561057 ],
       [ 8.236357 ],
       [13.504295 ],
       [12.60601  ],
       [13.89983  ],
       [17.687166 ],
       [ 9.766571 ],
       [ 8.228286 ],
       [11.30698  ]], dtype=float32)

In [13]:
# Ensure both arrays are 1-dimensional
y_test = np.array(y_test).flatten()
predictions = np.array(predictions).flatten()

# Create a dictionary for DataFrame
data = {"Actual": y_test, "Predictions": predictions}

# Create the DataFrame
prediction_df = pd.DataFrame(data)

# Display the DataFrame
prediction_df

Unnamed: 0,Actual,Predictions
0,14.0,13.958896
1,8.0,8.358957
2,7.0,8.229252
3,15.0,15.889011
4,13.0,13.773613
5,13.0,13.805802
6,10.0,10.318933
7,11.0,10.685987
8,16.0,16.261082
9,8.0,8.19717
