DRIVE INTEGRATION

This algorithm was developed on Google Colaboratory to work with an online GPU system to show this algorithm can be used by anyone around the world who wants to apply for their laboratory. Therefore, Google Drive was mounted for the first step. Also, for the next steps, some parts of the code can be differentiate if anyone who wants to run on their local host. To run this algorithm on your local, please change path and other importing styles.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

COMPUTER VISION BASED ROI SEGMENTATION

In [None]:
!pip install ultralytics
import os
import cv2
import torch
from ultralytics import YOLO
import matplotlib.pyplot as plt

# Load YOLOv8 model
model = YOLO('/content/drive/MyDrive/visual titration/v2/yolo/data/runs3/weights/best.pt')

# Folder containing images
image_folder = '/content/drive/MyDrive/visual titration/v8/12122024_newdataset'
image_files = [f for f in os.listdir(image_folder) if f.endswith('.jpg')]

# Folder to save cropped images after segmentation
output_folder = '/content/drive/MyDrive/visual titration/v8/data/segmented_12122024_newdataset'
os.makedirs(output_folder, exist_ok=True)  # Create the folder if it doesn't exist

# Process segmentation results for all images
for image_file in image_files:
    # Get the full path of the image file
    image_path = os.path.join(image_folder, image_file)

    # Read the image
    image = cv2.imread(image_path)

    # Perform segmentation using YOLOv8 model
    results = model.predict(image)

    # Print segmentation results
    print(f"Segmentation Results - {image_file}:")

    # Process results and apply masks
    for idx, result in enumerate(results):
        # Extract the cropped region based on the segmentation mask
        if result.boxes is not None:
            for i, box in enumerate(result.boxes.xyxy):
                x1, y1, x2, y2 = map(int, box)  # Extract bounding box coordinates
                cropped_img = image[y1:y2, x1:x2]  # Get the cropped area

                # Save the cropped image
                cropped_filename = os.path.join(output_folder, f"{os.path.splitext(image_file)[0]}_crop_{i}.jpg")
                cv2.imwrite(cropped_filename, cropped_img)

                print(f"Segmented area saved: {cropped_filename}")

                # Optionally display the masked image
                masked_image = result.plot()
                plt.imshow(cv2.cvtColor(masked_image, cv2.COLOR_BGR2RGB))
                plt.axis('off')  # Hide axes
                plt.title(f'Segmented Area - {image_file}')
                plt.show()  # Display the masked image

# *** For details, contact the repository owner (Mirkan Emir Sancak, mrkn.sancak@gmail.com)

AVERAGE HSV VALUATION

In [None]:
import os
import cv2
import numpy as np

def get_min_max_hsv(image, mask=None):
    """
    Calculates the minimum and maximum HSV values of a specific region in the image (defined by a mask).

    :param image: Image in BGR format
    :param mask: Optional mask. Only the masked region will be analyzed.
    :return: HSV min and max values (h_min, s_min, v_min, h_max, s_max, v_max)
    """
    # Convert the image to HSV format
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

    if mask is not None:
        # Select the masked region
        hsv_values = hsv_image[mask > 0]
    else:
        # Use the entire image
        hsv_values = hsv_image.reshape((-1, 3))

    # Calculate the minimum and maximum values for each channel (H, S, V)
    h_min, s_min, v_min = np.min(hsv_values[:, 0]), np.min(hsv_values[:, 1]), np.min(hsv_values[:, 2])
    h_max, s_max, v_max = np.max(hsv_values[:, 0]), np.max(hsv_values[:, 1]), np.max(hsv_values[:, 2])

    return int(h_min), int(s_min), int(v_min), int(h_max), int(s_max), int(v_max)

# Folder containing images
image_folder = '/content/drive/MyDrive/visual titration/v8/data/segmented_12122024_newdataset'
image_files = [f for f in os.listdir(image_folder) if f.endswith('.jpg')]

# Perform HSV analysis for all images
for image_file in image_files:
    # Get the full path of the image file
    image_path = os.path.join(image_folder, image_file)

    # Load the image
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error loading image: {image_file}")
        continue

    # Create a mask - Define the region of interest (example dimensions, adjust as needed)
    mask = np.zeros(image.shape[:2], dtype=np.uint8)
    mask[***] = 255  # Define the area of interest

    # Find Min and Max HSV values for the masked region
    h_min, s_min, v_min, h_max, s_max, v_max = get_min_max_hsv(image, mask)

    # Print Min and Max HSV values
    print(f"HSV Min Values - {image_file}: H = {h_min}, S = {s_min}, V = {v_min}")
    print(f"HSV Max Values - {image_file}: H = {h_max}, S = {s_max}, V = {v_max}")

# *** For details, contact the repository owner (Mirkan Emir Sancak, mrkn.sancak@gmail.com)

HSV BASED SEGMENTATION OF ROI-REGION OF INTEREST

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from google.colab.patches import cv2_imshow

# Function: Calculate global HSV values from the entire image
def get_global_hsv_range(image):
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hsv_values = hsv_image.reshape((-1, 3))
    h_min, s_min, v_min = np.min(hsv_values[:, 0]), np.min(hsv_values[:, 1]), np.min(hsv_values[:, 2])
    h_max, s_max, v_max = np.max(hsv_values[:, 0]), np.max(hsv_values[:, 1]), np.max(hsv_values[:, 2])
    return int(h_min), int(s_min), int(v_min), int(h_max), int(s_max), int(v_max)

# Function: Calculate new HSV range to reduce pixel count by 20%
def adjust_hsv_range(image, h_min, s_min, v_min, h_max, s_max, v_max):
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hsv_values = hsv_image.reshape((-1, 3))

    # Compute histogram
    h_hist, _ = np.histogram(hsv_values[:, 0], bins=180, range=(0, 180))
    s_hist, _ = np.histogram(hsv_values[:, 1], bins=256, range=(0, 256))
    v_hist, _ = np.histogram(hsv_values[:, 2], bins=256, range=(0, 256))

    # Target 80% of pixel count
    total_pixels = hsv_values.shape[0]
    target_pixels = int(total_pixels * 0.8)

    # Limit H channel
    h_cumsum = np.cumsum(h_hist)
    h_min_new = np.searchsorted(h_cumsum, total_pixels - target_pixels)
    h_max_new = np.searchsorted(h_cumsum, target_pixels)

    # Limit S channel
    s_cumsum = np.cumsum(s_hist)
    s_min_new = np.searchsorted(s_cumsum, total_pixels - target_pixels)
    s_max_new = np.searchsorted(s_cumsum, target_pixels)

    # Limit V channel
    v_cumsum = np.cumsum(v_hist)
    v_min_new = np.searchsorted(v_cumsum, total_pixels - target_pixels)
    v_max_new = np.searchsorted(v_cumsum, target_pixels)

    return h_min_new, s_min_new, v_min_new, h_max_new, s_max_new, v_max_new

# Folder containing images
image_folder = '/content/drive/MyDrive/visual titration/v8/data/segmented_12122024_newdataset'
image_files = [f for f in os.listdir(image_folder) if f.endswith('.jpg')]

# Create DataFrame to store results for the Excel file
df_results = pd.DataFrame(columns=['Image', 'H_min', 'S_min', 'V_min', 'H_max', 'S_max', 'V_max'])

# Folder to save masked images
output_folder = '/content/drive/MyDrive/visual titration/v8/data/masked_segmented_12122024_newdataset'
os.makedirs(output_folder, exist_ok=True)

# Perform HSV analysis for all images
for image_file in image_files:
    # Get the full path of the image file
    image_path = os.path.join(image_folder, image_file)

    # Load the image
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error loading image: {image_file}")
        continue

    # Step 1: Find global HSV values
    h_min, s_min, v_min, h_max, s_max, v_max = get_global_hsv_range(image)

    # Step 2: Compute new HSV ranges (80% pixel segmentation)
    h_min_new, s_min_new, v_min_new, h_max_new, s_max_new, v_max_new = adjust_hsv_range(
        image, h_min, s_min, v_min, h_max, s_max, v_max
    )

    # Add new row to DataFrame
    new_row = pd.DataFrame({
        'Image': [image_file],
        'H_min': [h_min_new],
        'S_min': [s_min_new],
        'V_min': [v_min_new],
        'H_max': [h_max_new],
        'S_max': [s_max_new],
        'V_max': [v_max_new]
    })
    df_results = pd.concat([df_results, new_row], ignore_index=True)

    # Mask the image based on the obtained HSV range
    lower = np.array([h_min_new, s_min_new, v_min_new])
    upper = np.array([h_max_new, s_max_new, v_max_new])
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    mask_hsv = cv2.inRange(hsv_image, lower, upper)
    result = cv2.bitwise_and(image, image, mask=mask_hsv)

    # Save the masked image
    output_image_path = os.path.join(output_folder, f"masked_{image_file}")
    cv2.imwrite(output_image_path, result)

    # Display the masked image (optional)
    cv2_imshow(result)  # Using cv2_imshow instead of cv2.imshow

# Save results to an Excel file
output_excel_path = '/content/drive/MyDrive/visual titration/v8/data/hsvcodes_masked_segmented_12122024_newdataset.xlsx'
df_results.to_excel(output_excel_path, index=False)

print(f"HSV analysis results have been saved to the Excel file: {output_excel_path}")
print(f"Masked images have been saved in the folder: {output_folder}")

# *** For details, contact the repository owner (Mirkan Emir Sancak, mrkn.sancak@gmail.com)

IMAGE PROCESSING AND SUMMARY

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

# Path to the Excel file containing HSV ranges
hsv_excel_path = '/content/drive/MyDrive/visual titration/v8/data/hsvcodes_masked_segmented_12122024_newdataset.xlsx'

# Load the HSV ranges from the Excel file
hsv_df = pd.read_excel(hsv_excel_path)

# Create a DataFrame to store the results
df_results = pd.DataFrame(columns=['Image', 'Total_Pixels', 'Pixel_Percentage',
                                   'Mean_R', 'Std_R', 'Mean_G', 'Std_G', 'Mean_B', 'Std_B',
                                   'Mean_H', 'Std_H', 'Mean_S', 'Std_S', 'Mean_V', 'Std_V',
                                   'Mean_L', 'Std_L', 'Mean_A', 'Std_A', 'Mean_B_LAB', 'Std_B_LAB'])

# Folder containing the images
image_folder = '/content/drive/MyDrive/visual titration/v8/data/masked_segmented_12122024_newdataset'
image_files = [f for f in os.listdir(image_folder) if f.endswith('.jpg') or f.endswith('.png')]

# Process each image
for image_file in image_files:
    try:
        # Construct the full image path
        image_path = os.path.join(image_folder, image_file)

        # Load the image
        image = cv2.imread(image_path)

        # Skip if the image cannot be loaded
        if image is None:
            print(f"Error loading image: {image_file}")
            continue

        # Remove the 'masked_' prefix to match the Excel file
        corresponding_image_name = image_file.replace('masked_', '')

        # Get the HSV ranges for the current image from the Excel file
        hsv_row = hsv_df[hsv_df['Image'].str.contains(corresponding_image_name, na=False, case=False)]

        # Skip if no HSV data is found for the image
        if hsv_row.empty:
            print(f"No HSV data found for image: {image_file}")
            continue

        # Extract HSV ranges
        h_min, s_min, v_min, h_max, s_max, v_max = hsv_row[['H_min', 'S_min', 'V_min', 'H_max', 'S_max', 'V_max']].values[0]

        # Resize the image
        scale_percent = ***
        width = int(image.shape[1] * scale_percent / 100)
        height = int(image.shape[0] * scale_percent / 100)
        scaled_image = cv2.resize(image, (width, height))

        # *** For details, contact the repository owner (Mirkan Emir Sancak, mrkn.sancak@gmail.com)

        # Step 1: Apply Sobel Filter for Edge Detection
        sobel_x = cv2.Sobel(scaled_image, cv2.CV_64F, 1, 0, ksize=***)
        sobel_y = cv2.Sobel(scaled_image, cv2.CV_64F, 0, 1, ksize=***)
        edges = cv2.magnitude(sobel_x, sobel_y)
        edges = np.uint8(edges)

        # Step 2: Remove Reflections
        hsv_image = cv2.cvtColor(scaled_image, cv2.COLOR_BGR2HSV)
        reflection_mask = cv2.inRange(hsv_image, np.array([0, 0, 200]), np.array([180, 255, 255]))  # Detect bright regions
        cleaned_image = cv2.inpaint(scaled_image, reflection_mask, inpaintRadius=3, flags=cv2.INPAINT_TELEA)

        # Step 3: Enhance Contrast with CLAHE
        lab_image = cv2.cvtColor(cleaned_image, cv2.COLOR_BGR2LAB)
        l, a, b = cv2.split(lab_image)
        clahe = cv2.createCLAHE(clipLimit=***, tileGridSize=(8, 8))
        l = clahe.apply(l)
        lab_image = cv2.merge((l, a, b))
        enhanced_image = cv2.cvtColor(lab_image, cv2.COLOR_LAB2BGR)

        # Step 4: Adaptive Thresholding
        gray_image = cv2.cvtColor(enhanced_image, cv2.COLOR_BGR2GRAY)
        adaptive_mask = cv2.adaptiveThreshold(gray_image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)

        # Step 5: Bilateral Filtering
        bilateral_filtered = cv2.bilateralFilter(enhanced_image, 9, 75, 75)

        # Create a mask to exclude black pixels (assuming black is [0, 0, 0] in BGR)
        black_mask = cv2.inRange(bilateral_filtered, np.array([1, 1, 1]), np.array([255, 255, 255]))

        # Apply the mask to the image
        masked_image = cv2.bitwise_and(bilateral_filtered, bilateral_filtered, mask=black_mask)

        # Calculate the number of non-black pixels
        non_black_pixels = cv2.countNonZero(cv2.cvtColor(masked_image, cv2.COLOR_BGR2GRAY))
        total_pixels = scaled_image.shape[0] * scaled_image.shape[1]
        pixel_percentage = (non_black_pixels / total_pixels) * 100

        # Compute color statistics
        mean_rgb, std_rgb = np.mean(masked_image, axis=(0, 1)), np.std(masked_image, axis=(0, 1))
        mean_hsv, std_hsv = np.mean(hsv_image, axis=(0, 1)), np.std(hsv_image, axis=(0, 1))
        mean_lab, std_lab = np.mean(lab_image, axis=(0, 1)), np.std(lab_image, axis=(0, 1))

        # Add results to the DataFrame
        df_results = pd.concat([df_results, pd.DataFrame({
            'Image': [image_file],
            'Total_Pixels': [total_pixels],
            'Pixel_Percentage': [pixel_percentage],
            'Mean_R': [mean_rgb[2]], 'Std_R': [std_rgb[2]],
            'Mean_G': [mean_rgb[1]], 'Std_G': [std_rgb[1]],
            'Mean_B': [mean_rgb[0]], 'Std_B': [std_rgb[0]],
            'Mean_H': [mean_hsv[0]], 'Std_H': [std_hsv[0]],
            'Mean_S': [mean_hsv[1]], 'Std_S': [std_hsv[1]],
            'Mean_V': [mean_hsv[2]], 'Std_V': [std_hsv[2]],
            'Mean_L': [mean_lab[0]], 'Std_L': [std_lab[0]],
            'Mean_A': [mean_lab[1]], 'Std_A': [std_lab[1]],
            'Mean_B_LAB': [mean_lab[2]], 'Std_B_LAB': [std_lab[2]]
        })], ignore_index=True)

    except Exception as e:
        print(f"Error processing image {image_file}: {e}")

# Save the results to an Excel file
output_excel_path = '/content/drive/MyDrive/visual titration/v8/data/colorspaces_masked_segmented_12122024_newdataset.xlsx'
df_results.to_excel(output_excel_path, index=False)

print(f"Analysis results saved to Excel file: {output_excel_path}")

# *** For details, contact the repository owner (Mirkan Emir Sancak, mrkn.sancak@gmail.com)

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load the dataset from the Excel file
data_path = '/content/drive/MyDrive/visual titration/v8/data/cleared_colorspaces_masked_segmented_12122024_newdataset.xlsx'
df = pd.read_excel(data_path)

# Select only numerical columns for analysis
columns_to_analyze = ['Mean_R', 'Mean_G', 'Mean_B', 'Mean_H', 'Mean_S', 'Mean_V', 'Mean_L', 'Mean_A', 'Mean_B_LAB']
numeric_df = df[columns_to_analyze]

# 1. RGB, HSV, LAB Histograms and Correlation Matrix
plt.figure(figsize=(20, 15), dpi=300)  # High-quality output with increased DPI

# RGB Histograms
plt.subplot(5, 3, 1)
plt.hist(df['Mean_R'], bins=30, color='red', alpha=0.7)
plt.title('Mean R Distribution', fontsize=12)
plt.xlabel('[O$_2$]tot', fontsize=12)
plt.ylabel('Frequency', fontsize=12)

plt.subplot(5, 3, 2)
plt.hist(df['Mean_G'], bins=30, color='green', alpha=0.7)
plt.title('Mean G Distribution', fontsize=12)
plt.xlabel('[O$_2$]tot', fontsize=12)
plt.ylabel('Frequency', fontsize=12)

plt.subplot(5, 3, 3)
plt.hist(df['Mean_B'], bins=30, color='blue', alpha=0.7)
plt.title('Mean B Distribution', fontsize=12)
plt.xlabel('[O$_2$]tot', fontsize=12)
plt.ylabel('Frequency', fontsize=12)

# HSV Histograms
plt.subplot(5, 3, 4)
plt.hist(df['Mean_H'], bins=30, color='orange', alpha=0.7)
plt.title('Mean H Distribution', fontsize=12)
plt.xlabel('[O$_2$]tot', fontsize=12)
plt.ylabel('Frequency', fontsize=12)

plt.subplot(5, 3, 5)
plt.hist(df['Mean_S'], bins=30, color='green', alpha=0.7)
plt.title('Mean S Distribution', fontsize=12)
plt.xlabel('[O$_2$]tot', fontsize=12)
plt.ylabel('Frequency', fontsize=12)

plt.subplot(5, 3, 6)
plt.hist(df['Mean_V'], bins=30, color='yellow', alpha=0.7)
plt.title('Mean V Distribution', fontsize=12)
plt.xlabel('[O$_2$]tot', fontsize=12)
plt.ylabel('Frequency', fontsize=12)

# LAB Histograms
plt.subplot(5, 3, 7)
plt.hist(df['Mean_L'], bins=30, color='gray', alpha=0.7)
plt.title('Mean L Distribution', fontsize=12)
plt.xlabel('[O$_2$]tot', fontsize=12)
plt.ylabel('Frequency', fontsize=12)

plt.subplot(5, 3, 8)
plt.hist(df['Mean_A'], bins=30, color='purple', alpha=0.7)
plt.title('Mean A Distribution', fontsize=12)
plt.xlabel('[O$_2$]tot', fontsize=12)
plt.ylabel('Frequency', fontsize=12)

plt.subplot(5, 3, 9)
plt.hist(df['Mean_B_LAB'], bins=30, color='blue', alpha=0.7)
plt.title('Mean B_LAB Distribution', fontsize=12)
plt.xlabel('[O$_2$]tot', fontsize=12)
plt.ylabel('Frequency', fontsize=12)

# Correlation Matrix
plt.figure(figsize=(7, 7), dpi=300)  # High-quality output with increased DPI
corr = numeric_df.corr()
sns.heatmap(corr, annot=True, cmap='coolwarm', cbar=True, annot_kws={"size": 12})
plt.title('Correlation Matrix', fontsize=12)
plt.tight_layout()  # Optimize layout for high-quality output
plt.show()

# 2. Scatter Matrix (Pairplot) - All Parameter Combinations
sns.pairplot(numeric_df, diag_kind='hist', corner=True, plot_kws={'alpha': 0.6})
plt.suptitle('Scatter Matrix: All Parameter Combinations', fontsize=12, y=1.02)
plt.tight_layout()  # Optimize layout for high-quality output
plt.savefig('scatter_matrix.png', dpi=300, bbox_inches='tight')  # Save with high DPI
plt.show()

# 3. Boxplot - Distribution of Each Parameter Separately
plt.figure(figsize=(10, 5), dpi=300)  # High-quality output with increased DPI
sns.boxplot(data=numeric_df, orient='h', palette='Set2')
plt.xlabel('Values of Color Spaces', fontsize=12)
plt.ylabel('Color Spaces', fontsize=12)
plt.tight_layout()  # Optimize layout for high-quality output
plt.savefig('boxplot.png', dpi=300, bbox_inches='tight')  # Save with high DPI
plt.show()

# *** For details, contact the repository owner (Mirkan Emir Sancak, mrkn.sancak@gmail.com)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Load the dataset
file_path = '/content/drive/MyDrive/visual titration/v8/data/cleared_colorspaces_masked_segmented_12122024_newdataset.xlsx'
df = pd.read_excel(file_path)

# Independent variable: Change this to the appropriate concentration column name
X_column = '***'  # Replace with the correct column name representing concentration

# Dependent variables (color space values) and standard deviations
mean_columns = ['Mean_R', 'Mean_G', 'Mean_B', 'Mean_H', 'Mean_S', 'Mean_V', 'Mean_L', 'Mean_A', 'Mean_B_LAB']
std_columns = ['Std_R', 'Std_G', 'Std_B', 'Std_H', 'Std_S', 'Std_V', 'Std_L', 'Std_A', 'Std_B_LAB']

# Perform analysis for each mean column
for mean_col, std_col in zip(mean_columns, std_columns):
    # Remove rows with NaN values
    valid_rows = df[[X_column, mean_col, std_col]].dropna()

    # Independent and dependent variables
    valid_X = valid_rows[X_column].values.reshape(-1, 1)
    valid_y = valid_rows[mean_col].values.reshape(-1, 1)

    if valid_y.size == 0 or valid_X.size == 0:
        print(f"Skipped {mean_col} due to insufficient data.")
        continue

    # Linear regression model
    linear_model = LinearRegression()
    linear_model.fit(valid_X, valid_y)
    linear_y_pred = linear_model.predict(valid_X)
    linear_r2 = r2_score(valid_y, linear_y_pred)
    equation = f"y = {linear_model.coef_[0][0]:.2f}x + {linear_model.intercept_[0]:.2f}"

    # Plot results
    plt.figure(figsize=(10, 6))
    plt.errorbar(valid_rows[X_column], valid_rows[mean_col], yerr=valid_rows[std_col], fmt='o', label='Data Points', alpha=0.7)
    plt.plot(valid_rows[X_column], linear_y_pred, color='red', label=f'Linear Fit (R²={linear_r2:.2f})', linestyle='--')
    plt.xlabel(X_column)
    plt.ylabel(f'{mean_col} (with {std_col} as Error Bars)')
    plt.title(f'{X_column} vs {mean_col} with Linear Fit Line\n{equation}')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

print("Analysis completed.")

# *** For details, contact the repository owner (Mirkan Emir Sancak, mrkn.sancak@gmail.com)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Load the dataset
file_path = '/content/drive/MyDrive/visual titration/v8/data/cleared_colorspaces_masked_segmented_12122024_newdataset.xlsx'
df = pd.read_excel(file_path)

# Define the independent variable (concentration column)
X_column = '***'  # Replace with the correct column name representing concentration

# Dependent variable groups (color spaces)
rgb_channels = ['Mean_R', 'Mean_G', 'Mean_B']
hsv_channels = ['Mean_H', 'Mean_S', 'Mean_V']
lab_channels = ['Mean_L', 'Mean_A', 'Mean_B_LAB']
std_channels = ['Std_R', 'Std_G', 'Std_B', 'Std_H', 'Std_S', 'Std_V', 'Std_L', 'Std_A', 'Std_B_LAB']

# Compute and visualize the correlation matrix
correlation_matrix = df[[X_column] + rgb_channels + hsv_channels + lab_channels].corr()

plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)
plt.title("Correlation Matrix: Colorimetric Features & Ozone Concentration")
plt.show()

# Function to process color space groups
def plot_color_space_regression(color_channels, std_channels, title):
    plt.figure(figsize=(12, 8))

    for mean_col in color_channels:
        std_col = std_channels[color_channels.index(mean_col)]

        # Filter valid (non-NaN) rows
        valid_rows = df[[X_column, mean_col, std_col]].dropna()
        if valid_rows.empty:
            print(f"Skipped {mean_col} due to insufficient data.")
            continue

        # Define independent and dependent variables
        valid_X = valid_rows[X_column].values.reshape(-1, 1)
        valid_y = valid_rows[mean_col].values.reshape(-1, 1)

        # Create and train the linear regression model
        linear_model = LinearRegression()
        linear_model.fit(valid_X, valid_y)
        linear_y_pred = linear_model.predict(valid_X)
        linear_r2 = r2_score(valid_y, linear_y_pred)
        equation = f"{mean_col}: y = {linear_model.coef_[0][0]:.3f}x + {linear_model.intercept_[0]:.3f}"

        # Plot results
        plt.errorbar(valid_rows[X_column], valid_rows[mean_col], yerr=valid_rows[std_col], fmt='o', label=f'{mean_col}', alpha=0.7)
        plt.plot(valid_rows[X_column], linear_y_pred, linestyle='--', label=equation)

    plt.xlabel(X_column)
    plt.ylabel("Color Intensity")
    plt.title(title)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

# Plot separate graphs for RGB, HSV, and LAB color spaces
plot_color_space_regression(rgb_channels, std_channels[:3], "RGB Color Space vs Ozone Concentration")
plot_color_space_regression(hsv_channels, std_channels[3:6], "HSV Color Space vs Ozone Concentration")
plot_color_space_regression(lab_channels, std_channels[6:], "LAB Color Space vs Ozone Concentration")

print("Analysis completed.")

# *** For details, contact the repository owner (Mirkan Emir Sancak, mrkn.sancak@gmail.com)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Load the dataset
file_path = '/content/drive/MyDrive/visual titration/v8/data/cleared_colorspaces_masked_segmented_12122024_newdataset.xlsx'
df = pd.read_excel(file_path)

# Define the independent variable (concentration column)
X_column = '***'  # Replace with the correct column name representing concentration

# Dependent variable groups (color spaces)
rgb_channels = ['Mean_R', 'Mean_G', 'Mean_B']
hsv_channels = ['Mean_H', 'Mean_S', 'Mean_V']
lab_channels = ['Mean_L', 'Mean_A', 'Mean_B_LAB']
std_channels = ['Std_R', 'Std_G', 'Std_B', 'Std_H', 'Std_S', 'Std_V', 'Std_L', 'Std_A', 'Std_B_LAB']

# Compute and visualize the correlation matrix without a title
# Create a temporary DataFrame for the correlation matrix with renamed column for visualization
correlation_temp_df = df[[X_column] + rgb_channels + hsv_channels + lab_channels].copy()
correlation_temp_df.rename(columns={X_column: r'[O$_{x}$]$_{tot}$'}, inplace=True)

correlation_matrix = correlation_temp_df.corr()

plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)
plt.show()

# Function to process color space groups (titles removed)
def plot_color_space_regression(color_channels, std_channels):
    plt.figure(figsize=(12, 8))

    for mean_col in color_channels:
        std_col = std_channels[color_channels.index(mean_col)]

        # Filter valid (non-NaN) rows
        valid_rows = df[[X_column, mean_col, std_col]].dropna()
        if valid_rows.empty:
            print(f"Skipped {mean_col} due to insufficient data.")
            continue

        # Define independent and dependent variables
        valid_X = valid_rows[X_column].values.reshape(-1, 1)
        valid_y = valid_rows[mean_col].values.reshape(-1, 1)

        # Create and train the linear regression model
        linear_model = LinearRegression()
        linear_model.fit(valid_X, valid_y)
        linear_y_pred = linear_model.predict(valid_X)
        linear_r2 = r2_score(valid_y, linear_y_pred)
        equation = f"{mean_col}: y = {linear_model.coef_[0][0]:.3f}x + {linear_model.intercept_[0]:.3f}"

        # Plot results (title removed, label updated with LaTeX for subscript)
        plt.errorbar(valid_rows[X_column], valid_rows[mean_col], yerr=valid_rows[std_col], fmt='o', label=f'{mean_col}', alpha=0.7)
        plt.plot(valid_rows[X_column], linear_y_pred, linestyle='--', label=equation)

    plt.xlabel(r'[O$_{x}$]$_{tot}$')
    plt.ylabel("Color Density")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

# Plot separate graphs for RGB, HSV, and LAB color spaces (titles removed)
plot_color_space_regression(rgb_channels, std_channels[:3])
plot_color_space_regression(hsv_channels, std_channels[3:6])
plot_color_space_regression(lab_channels, std_channels[6:])

print("Analysis completed.")

# *** For details, contact the repository owner (Mirkan Emir Sancak, mrkn.sancak@gmail.com)

DATASET EVALUATION

interpolation according to concentration only linearization

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.colab import drive
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.metrics import r2_score

# File path (Google Drive)
file_path = "/content/drive/MyDrive/visual titration/v8/data/cleared_colorspaces_masked_segmented_12122024_newdataset.xlsx"

# Load the dataset
df = pd.read_excel(file_path)

# Independent variable (Image column) - kept unchanged
X_column = "Image"

# Dependent variables (Color values)
mean_columns = ["Mean_R", "Mean_G", "Mean_B", "Mean_H", "Mean_S", "Mean_V", "Mean_L", "Mean_A", "Mean_B_LAB"]

# Create a copy of the original data to retain structure
interpolated_data = df.copy()

# Perform interpolation
interpolation_models = {}

for mean_col in mean_columns:
    # Remove missing values
    valid_rows = df[[X_column, mean_col]].dropna()

    if valid_rows.empty:
        print(f"Skipping {mean_col} due to insufficient data.")
        continue

    valid_X = valid_rows[X_column].values.reshape(-1, 1)
    valid_y = valid_rows[mean_col].values.reshape(-1, 1)

    # Apply linear regression
    linear_model = LinearRegression()
    linear_model.fit(valid_X, valid_y)
    linear_y_pred = linear_model.predict(valid_X)
    linear_r2 = r2_score(valid_y, linear_y_pred)

    if linear_r2 >= 0.80:
        print(f"{mean_col}: Linear Regression Used (R² = {linear_r2:.2f})")
        interpolated_values = linear_model.predict(df[X_column].values.reshape(-1, 1)).flatten()
        interpolation_models[mean_col] = "Linear"
    else:
        # Apply polynomial regression (degree 2)
        poly_model = make_pipeline(PolynomialFeatures(degree=2), LinearRegression())
        poly_model.fit(valid_X, valid_y)
        poly_y_pred = poly_model.predict(valid_X)
        poly_r2 = r2_score(valid_y, poly_y_pred)

        print(f"{mean_col}: Polynomial Regression Used (R² = {poly_r2:.2f})")
        interpolated_values = poly_model.predict(df[X_column].values.reshape(-1, 1)).flatten()
        interpolation_models[mean_col] = "Polynomial"

    # Write interpolation results to the corresponding column
    interpolated_data[mean_col] = interpolated_values

# Save interpolated results
output_path = "/content/drive/MyDrive/visual titration/v8/data/interpolated_cleared_colorspaces_masked_segmented_12122024_newdataset.xlsx"
interpolated_data.to_excel(output_path, index=False)
print(f"Interpolation completed and saved: {output_path}")

# Display the interpolation methods used
plt.figure(figsize=(12, 6))
sns.barplot(x=list(interpolation_models.keys()), y=[1 if v=="Linear" else 2 for v in interpolation_models.values()])
plt.xticks(rotation=45)
plt.yticks([1, 2], ["Linear Regression", "Polynomial Regression"])
plt.ylabel("Selected Interpolation Method")
plt.title("Interpolation Method Chosen for Each Feature")
plt.grid(axis="y")
plt.show()

# *** For details, contact the repository owner (Mirkan Emir Sancak, mrkn.sancak@gmail.com)

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load the dataset
data_path = '/content/drive/MyDrive/visual titration/v8/data/interpolated_cleared_colorspaces_masked_segmented_12122024_newdataset.xlsx'
df = pd.read_excel(data_path)

# Select only numerical columns (for analysis)
columns_to_analyze = ['Mean_R', 'Mean_G', 'Mean_B', 'Mean_H', 'Mean_S', 'Mean_V', 'Mean_L', 'Mean_A', 'Mean_B_LAB']
numeric_df = df[columns_to_analyze]

# 1. RGB, HSV, LAB Histograms and Correlation Matrix
plt.figure(figsize=(20, 15))

# RGB Histograms
plt.subplot(5, 3, 1)
plt.hist(df['Mean_R'], bins=30, color='red', alpha=0.7)
plt.title('Mean R Distribution')

plt.subplot(5, 3, 2)
plt.hist(df['Mean_G'], bins=30, color='green', alpha=0.7)
plt.title('Mean G Distribution')

plt.subplot(5, 3, 3)
plt.hist(df['Mean_B'], bins=30, color='blue', alpha=0.7)
plt.title('Mean B Distribution')

# HSV Histograms
plt.subplot(5, 3, 4)
plt.hist(df['Mean_H'], bins=30, color='orange', alpha=0.7)
plt.title('Mean H Distribution')

plt.subplot(5, 3, 5)
plt.hist(df['Mean_S'], bins=30, color='green', alpha=0.7)
plt.title('Mean S Distribution')

plt.subplot(5, 3, 6)
plt.hist(df['Mean_V'], bins=30, color='yellow', alpha=0.7)
plt.title('Mean V Distribution')

# LAB Histograms
plt.subplot(5, 3, 7)
plt.hist(df['Mean_L'], bins=30, color='gray', alpha=0.7)
plt.title('Mean L Distribution')

plt.subplot(5, 3, 8)
plt.hist(df['Mean_A'], bins=30, color='purple', alpha=0.7)
plt.title('Mean A Distribution')

plt.subplot(5, 3, 9)
plt.hist(df['Mean_B_LAB'], bins=30, color='blue', alpha=0.7)
plt.title('Mean B_LAB Distribution')

# Correlation Matrix
plt.figure(figsize=(12, 10))  # Increased size for better visualization
corr = numeric_df.corr()
sns.heatmap(corr, annot=True, cmap='coolwarm', cbar=True)
plt.title('Correlation Matrix')
plt.show()

# 2. Scatter Matrix (Pairplot) - All Combinations
sns.pairplot(numeric_df, diag_kind='hist', corner=True, plot_kws={'alpha': 0.6})
plt.suptitle('Scatter Matrix: All Parameter Combinations', y=1.02)
plt.show()

# 3. Boxplot - Display the Distribution of Each Parameter Separately
plt.figure(figsize=(16, 8))
sns.boxplot(data=numeric_df, orient='h', palette='Set2')
plt.title('Boxplot: Distribution of All Parameters')
plt.xlabel('Values')
plt.show()

# *** For details, contact the repository owner (Mirkan Emir Sancak, mrkn.sancak@gmail.com)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Load the dataset
file_path = '/content/drive/MyDrive/visual titration/v8/data/interpolated_cleared_colorspaces_masked_segmented_12122024_newdataset.xlsx'
df = pd.read_excel(file_path)

# Use concentration data as the independent variable
X_column = '***'  # Replace with the correct column name representing concentration

# Dependent variables (color values)
mean_columns = ['Mean_R', 'Mean_G', 'Mean_B', 'Mean_H', 'Mean_S', 'Mean_V', 'Mean_L', 'Mean_A', 'Mean_B_LAB']

# Perform analysis for each mean column
for mean_col in mean_columns:
    # Remove rows with NaN values
    valid_rows = df[[X_column, mean_col]].dropna()

    # Independent and dependent variables
    valid_X = valid_rows[X_column].values.reshape(-1, 1)
    valid_y = valid_rows[mean_col].values.reshape(-1, 1)

    if valid_y.size == 0 or valid_X.size == 0:
        print(f"Skipped {mean_col} due to insufficient data.")
        continue

    # Linear regression model
    linear_model = LinearRegression()
    linear_model.fit(valid_X, valid_y)
    linear_y_pred = linear_model.predict(valid_X)
    linear_r2 = r2_score(valid_y, linear_y_pred)
    equation = f"y = {linear_model.coef_[0][0]:.2f}x + {linear_model.intercept_[0]:.2f}"

    # Plot results
    plt.figure(figsize=(10, 6))
    plt.scatter(valid_rows[X_column], valid_rows[mean_col], label='Data Points', alpha=0.7)
    plt.plot(valid_rows[X_column], linear_y_pred, color='red', label=f'Linear Fit (R²={linear_r2:.2f})', linestyle='--')
    plt.xlabel(X_column)
    plt.ylabel(mean_col)
    plt.title(f'{X_column} vs {mean_col} with Linear Fit Line\n{equation}')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

print("Analysis completed.")

# *** For details, contact the repository owner (Mirkan Emir Sancak, mrkn.sancak@gmail.com)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Load the dataset
file_path = '/content/drive/MyDrive/visual titration/v8/data/interpolated_cleared_colorspaces_masked_segmented_12122024_newdataset.xlsx'
df = pd.read_excel(file_path)

# Define the independent variable (concentration column)
X_column = '***'  # Replace with the correct column name representing concentration

# Dependent variable groups (color spaces)
rgb_channels = ['Mean_R', 'Mean_G', 'Mean_B']
hsv_channels = ['Mean_H', 'Mean_S', 'Mean_V']
lab_channels = ['Mean_L', 'Mean_A', 'Mean_B_LAB']

# Compute and visualize the correlation matrix
correlation_matrix = df[[X_column] + rgb_channels + hsv_channels + lab_channels].corr()

plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)
plt.title("Correlation Matrix: Colorimetric Features & Ozone Concentration")
plt.show()

# Function to process color space groups
def plot_color_space_regression(color_channels, title):
    plt.figure(figsize=(12, 8))

    for mean_col in color_channels:
        # Filter valid (non-NaN) rows
        valid_rows = df[[X_column, mean_col]].dropna()
        if valid_rows.empty:
            print(f"Skipped {mean_col} due to insufficient data.")
            continue

        # Define independent and dependent variables
        valid_X = valid_rows[X_column].values.reshape(-1, 1)
        valid_y = valid_rows[mean_col].values.reshape(-1, 1)

        # Create and train the linear regression model
        linear_model = LinearRegression()
        linear_model.fit(valid_X, valid_y)
        linear_y_pred = linear_model.predict(valid_X)
        linear_r2 = r2_score(valid_y, linear_y_pred)
        equation = f"{mean_col}: y = {linear_model.coef_[0][0]:.3f}x + {linear_model.intercept_[0]:.3f}"

        # Plot results
        plt.scatter(valid_rows[X_column], valid_rows[mean_col], label=f'{mean_col}', alpha=0.7)
        plt.plot(valid_rows[X_column], linear_y_pred, linestyle='--', label=equation)

    plt.xlabel(X_column)
    plt.ylabel("Color Intensity")
    plt.title(title)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

# Plot separate graphs for RGB, HSV, and LAB color spaces
plot_color_space_regression(rgb_channels, "RGB Color Space vs Ozone Concentration")
plot_color_space_regression(hsv_channels, "HSV Color Space vs Ozone Concentration")
plot_color_space_regression(lab_channels, "LAB Color Space vs Ozone Concentration")

print("Analysis completed.")

# *** For details, contact the repository owner (Mirkan Emir Sancak, mrkn.sancak@gmail.com)

MACHINE LEARNING

In [None]:
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_regression

# Load the Excel dataset
df = pd.read_excel('/content/drive/MyDrive/visual titration/v8/data/interpolated_cleared_colorspaces_masked_segmented_12122024_newdataset.xlsx')

# Function to extract concentration information
def extract_concentration(image_name):
    match = re.search(r'(\d+)', str(image_name))
    return int(match.group(1)) if match else None

# Add extracted concentration as a column
df['Image'] = df['Image'].apply(extract_concentration)
df.sort_values('Image', inplace=True)

# Define feature columns (excluding standard deviation values)
mean_columns = [col for col in df.columns if 'Mean_' in col and 'Std' not in col]
X = df[mean_columns].values  # Independent variables
y = df['Image'].values  # Target variable

# Display dataset statistics
print("\n### Dataset Statistics ###")
print(f"Total Data Points: {len(df)}")
print(f"Number of Features: {len(mean_columns)}")
print("Feature Names:", mean_columns)

# Display target variable (y) statistics
print("\nTarget Variable (Concentration) Statistics:")
print(f"Mean: {np.mean(y):.2f}")
print(f"Standard Deviation: {np.std(y):.2f}")
print(f"Minimum Value: {np.min(y)}")
print(f"Maximum Value: {np.max(y)}")
print(f"Number of NaN Values in Target: {np.isnan(y).sum()}")

# Check for missing values in features
nan_counts = pd.DataFrame(X, columns=mean_columns).isna().sum()
print("\nNumber of Missing Values in Features:")
print(nan_counts)

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features (necessary for Neural Networks)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define models
models = {
    'Linear Regression': LinearRegression(),
    'Ridge Regression': Ridge(),
    'Lasso Regression': Lasso(),
    'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42),
    'Neural Network': MLPRegressor(hidden_layer_sizes=(100,), max_iter=1000, random_state=42)
}

# Create a DataFrame to store model performance
results = pd.DataFrame(columns=['Model', 'R2', 'MSE', 'MAE'])

# Train and evaluate models
for model_name, model in models.items():
    if model_name == 'Neural Network':
        model.fit(X_train_scaled, y_train)
        y_pred = model.predict(X_test_scaled)
    else:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

    r2 = r2_score(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)

    new_row = pd.DataFrame({'Model': [model_name], 'R2': [r2], 'MSE': [mse], 'MAE': [mae]})
    results = pd.concat([results, new_row], ignore_index=True)

    # Scatter plot of predictions vs true values
    plt.figure(figsize=(8, 6))
    plt.scatter(y_test, y_pred, alpha=0.7, label='Predicted')
    plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linestyle='--', label='Best Fit')
    plt.xlabel('True Concentration')
    plt.ylabel('Predicted Concentration')
    plt.title(f'{model_name} - Predicted vs True')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

# Display model performance comparison
print("\nModel Performance Comparisons:")
print(results)

# Visualize model performance
plt.figure(figsize=(10, 6))
sns.barplot(x='Model', y='R2', data=results)
plt.title('Comparison of Model Performance (R²)')
plt.ylabel('R² Score')
plt.xlabel('Model')
plt.grid(True, axis='y')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Target variable distribution
plt.figure(figsize=(8, 6))
sns.histplot(y, kde=True, bins=20)
plt.title('Target Variable Distribution (Image)')
plt.xlabel('Image')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()

# Scatter plots of features vs target variable
for col in mean_columns:
    plt.figure(figsize=(6, 4))
    plt.scatter(df[col], df['Image'], alpha=0.5)
    plt.xlabel(col)
    plt.ylabel('Image')
    plt.title(f'{col} vs Image')
    plt.grid(True)
    plt.show()

# Correlation matrix
correlation_matrix = df[mean_columns + ['Image']].corr()
plt.figure(figsize=(12, 10))
sns.heatmap(correlation_matrix, annot=True, fmt='.2f', cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()

# Compare training and test performance
for model_name, model in models.items():
    if model_name == 'Neural Network':
        y_train_pred = model.predict(X_train_scaled)
        y_test_pred = model.predict(X_test_scaled)
    else:
        y_train_pred = model.predict(X_train)
        y_test_pred = model.predict(X_test)

    # Training and test R² scores
    train_r2 = r2_score(y_train, y_train_pred)
    test_r2 = r2_score(y_test, y_test_pred)

    print(f"{model_name} -> Training R²: {train_r2:.3f}, Test R²: {test_r2:.3f}")

# Feature Selection
selector = SelectKBest(score_func=f_regression, k=5)
X_new = selector.fit_transform(X, y)
selected_features = np.array(mean_columns)[selector.get_support()]
print("\nSelected Features:", selected_features)

# Retrieve F-scores and p-values
f_scores = selector.scores_
p_values = selector.pvalues_
feature_scores = pd.DataFrame({
    'Feature': mean_columns,
    'F-Score': f_scores,
    'P-Value': p_values
})
print("\nF-Scores and P-Values for All Features:")
print(feature_scores.sort_values(by='F-Score', ascending=False))

# Correlation matrix for selected features
selected_df = df[list(selected_features) + ['Image']]
correlation_matrix_selected = selected_df.corr()
plt.figure(figsize=(8, 6))
sns.heatmap(correlation_matrix_selected, annot=True, fmt='.2f', cmap='coolwarm')
plt.title('Correlation Matrix of Selected Features and Target Variable')
plt.show()

# Final performance summary
final_results = []

for model_name, model in models.items():
    if model_name == 'Neural Network':
        y_train_pred = model.predict(X_train_scaled)
        y_test_pred = model.predict(X_test_scaled)
    else:
        y_train_pred = model.predict(X_train)
        y_test_pred = model.predict(X_test)

    # Compute performance metrics
    train_r2 = r2_score(y_train, y_train_pred)
    test_r2 = r2_score(y_test, y_test_pred)
    test_mse = mean_squared_error(y_test, y_test_pred)
    test_mae = mean_absolute_error(y_test, y_test_pred)

    # Append results
    final_results.append({
        'Model': model_name,
        'Training R²': train_r2,
        'Test R²': test_r2,
        'Test MSE': test_mse,
        'Test MAE': test_mae
    })

# Convert results to DataFrame
final_results_df = pd.DataFrame(final_results)

# Print final performance summary
print("\nFinal Model Performance Summary:")
print(final_results_df)

# *** For details, contact the repository owner (Mirkan Emir Sancak, mrkn.sancak@gmail.com)

In [None]:
import shap
import matplotlib.pyplot as plt

# List of labels for the plots
labels = ["a)", "b)", "c)", "d)", "e)"]  # Extend this list if needed based on the number of models

# Perform SHAP analysis for all models
for idx, (model_name, model) in enumerate(models.items()):
    print(f"\n--- SHAP Analysis: {model_name} ---")

    # Train the model
    if model_name == 'Neural Network':
        model.fit(X_train_scaled, y_train)
        X_ref, X_plot = X_train_scaled, X_test_scaled
    else:
        model.fit(X_train, y_train)
        X_ref, X_plot = X_train, X_test

    # Select the appropriate SHAP explainer
    if model_name in ['Neural Network', 'Random Forest']:
        explainer = shap.KernelExplainer(model.predict, X_ref)
    else:
        explainer = shap.Explainer(model, X_ref)

    # Compute SHAP values
    shap_values = explainer.shap_values(X_plot)

    # Create a new figure
    plt.figure(figsize=(10, 6))

    # Generate SHAP summary plot
    shap.summary_plot(shap_values, X_plot, feature_names=mean_columns, show=False)

    # Add a label to the plot
    plt.text(0.4, 1.05, labels[idx], fontsize=16, fontweight='bold', transform=plt.gca().transAxes)

    # Display the plot
    plt.show()

# *** For details, contact the repository owner (Mirkan Emir Sancak, mrkn.sancak@gmail.com)

In [None]:
!pip install lime
import lime
import lime.lime_tabular
import numpy as np
import matplotlib.pyplot as plt

# Create a LIME explainer
explainer = lime.lime_tabular.LimeTabularExplainer(
    training_data=X_train_scaled,  # Using scaled data since features are standardized
    feature_names=mean_columns,
    class_names=['Concentration'],
    mode='regression',
    discretize_continuous=True,
    random_state=42
)

# Select a test sample to explain (using the first test data point as an example)
test_sample_index = 0
test_sample = X_test_scaled[test_sample_index].reshape(1, -1)

# Label sequence for the plots
labels = ["a)", "b)", "c)", "d)", "e)"]

# Run LIME analysis for all models
for i, (model_name, model) in enumerate(models.items()):
    print(f"\n--- LIME Analysis: {model_name} ---")

    # Train the model (if already trained, this step can be commented out)
    if model_name == 'Neural Network':
        model.fit(X_train_scaled, y_train)
    else:
        model.fit(X_train, y_train)

    # Define the model prediction function
    def model_predict(data):
        return model.predict(data).reshape(-1, 1)

    # Generate LIME explanation
    exp = explainer.explain_instance(
        test_sample.flatten(),
        model_predict,
        num_features=5  # Show top 5 most important features
    )

    # Visualize the LIME result (adjusting colors for grayscale)
    fig = exp.as_pyplot_figure()

    # Set bars to grayscale with black edges
    for bar in fig.axes[0].patches:
        bar.set_facecolor('gray')  # Set all bars to gray
        bar.set_edgecolor('black')  # Set edges to black

    fig.tight_layout()
    plt.title(f"{labels[i]}")  # Using letters instead of model names
    plt.show()

# *** For details, contact the repository owner (Mirkan Emir Sancak, mrkn.sancak@gmail.com)

MODEL VALIDATION

In [None]:
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split, learning_curve
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_regression

# Load the Excel file
df = pd.read_excel('/content/drive/MyDrive/visual titration/v8/data/interpolated_cleared_colorspaces_masked_segmented_12122024_newdataset.xlsx')

# Function to extract concentration information from image names
def extract_concentration(image_name):
    match = re.search(r'(\d+)', str(image_name))
    return int(match.group(1)) if match else None

# Add extracted concentration data as a new column
df['Image'] = df['Image'].apply(extract_concentration)
df.sort_values('Image', inplace=True)

# Identify feature columns (excluding standard deviation data)
mean_columns = [col for col in df.columns if 'Mean_' in col and 'Std' not in col]

# Feature selection using f_regression
X_all = df[mean_columns].values  # All features
y = df['Image'].values  # Target variable
selector = SelectKBest(score_func=f_regression, k=5)
X_new = selector.fit_transform(X_all, y)
selected_features = np.array(mean_columns)[selector.get_support()]
print("Selected Features:", selected_features)

# Create X with selected features
X = df[selected_features].values  # Independent variables

# Print dataset statistics
print("\n### Data Statistics ###")
print(f"Total Number of Data Points: {len(df)}")
print(f"Number of Independent Variables (Features): {len(selected_features)}")
print("Selected Feature Names:", selected_features)

# Target variable (y) statistics
print("\nTarget Variable (Concentration) Statistics:")
print(f"Mean: {np.mean(y):.2f}")
print(f"Standard Deviation: {np.std(y):.2f}")
print(f"Minimum Value: {np.min(y)}")
print(f"Maximum Value: {np.max(y)}")
print(f"NaN Values in Target: {np.isnan(y).sum()}")

# Check for NaN values in independent variables
nan_counts = pd.DataFrame(X, columns=selected_features).isna().sum()
print("\nNaN Values in Independent Variables:")
print(nan_counts)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature standardization (necessary for Neural Network)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define models
models = {
    'Linear Regression': LinearRegression(),
    'Ridge Regression': Ridge(),
    'Lasso Regression': Lasso(),
    'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42),
    'Neural Network': MLPRegressor(hidden_layer_sizes=(100,), max_iter=1000, random_state=42)
}

# Create an empty DataFrame for results
results = pd.DataFrame(columns=['Model', 'R2', 'MSE', 'MAE'])

# Train and evaluate models
for model_name, model in models.items():
    if model_name == 'Neural Network':
        model.fit(X_train_scaled, y_train)
        y_pred = model.predict(X_test_scaled)
    else:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

    r2 = r2_score(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)

    new_row = pd.DataFrame({'Model': [model_name], 'R2': [r2], 'MSE': [mse], 'MAE': [mae]})
    results = pd.concat([results, new_row], ignore_index=True)

    # Scatter plot of predicted vs. true values
    plt.figure(figsize=(8, 6))
    plt.scatter(y_test, y_pred, alpha=0.7, label='Predicted')
    plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linestyle='--', label='Best Fit')
    plt.xlabel('True Concentration')
    plt.ylabel('Predicted Concentration')
    plt.title(f'{model_name} - Predicted vs True')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

print("\nModel Performance Comparison:")
print(results)

# Bar plot of R² scores
plt.figure(figsize=(10, 6))
sns.barplot(x='Model', y='R2', data=results)
plt.title('Comparison of Model Performance (R^2)')
plt.ylabel('R^2 Score')
plt.xlabel('Model')
plt.grid(True, axis='y')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Distribution of the target variable
plt.figure(figsize=(8, 6))
sns.histplot(y, kde=True, bins=20)
plt.title('Distribution of Target Variable (Image)')
plt.xlabel('Image')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()

# Relationship between selected features and the target variable
for col in selected_features:
    plt.figure(figsize=(6, 4))
    plt.scatter(df[col], df['Image'], alpha=0.5)
    plt.xlabel(col)
    plt.ylabel('Image')
    plt.title(f'{col} vs Image')
    plt.grid(True)
    plt.show()

# Correlation matrix
correlation_matrix = df[list(selected_features) + ['Image']].corr()
plt.figure(figsize=(8, 6))
sns.heatmap(correlation_matrix, annot=True, fmt='.2f', cmap='coolwarm')
plt.title('Correlation Matrix (Selected Features)')
plt.show()

# Compare training and test performance
for model_name, model in models.items():
    if model_name == 'Neural Network':
        y_train_pred = model.predict(X_train_scaled)
        y_test_pred = model.predict(X_test_scaled)
    else:
        y_train_pred = model.predict(X_train)
        y_test_pred = model.predict(X_test)

    train_r2 = r2_score(y_train, y_train_pred)
    test_r2 = r2_score(y_test, y_test_pred)

    print(f"{model_name} -> Training R2: {train_r2:.3f}, Test R2: {test_r2:.3f}")

# Function to plot learning curves
def plot_learning_curve(estimator, X, y, title):
    train_sizes, train_scores, test_scores = learning_curve(estimator, X, y, cv=5, scoring='r2',
                                                            train_sizes=np.linspace(0.1, 1.0, 10))
    train_mean = np.mean(train_scores, axis=1)
    train_std = np.std(train_scores, axis=1)
    test_mean = np.mean(test_scores, axis=1)
    test_std = np.std(test_scores, axis=1)

    plt.figure(figsize=(8, 6))
    plt.plot(train_sizes, train_mean, 'o-', label='Training')
    plt.plot(train_sizes, test_mean, 'o-', label='Validation')
    plt.fill_between(train_sizes, train_mean - train_std, train_mean + train_std, alpha=0.2)
    plt.fill_between(train_sizes, test_mean - test_std, test_mean + test_std, alpha=0.2)
    plt.title(title)
    plt.xlabel('Size of Training Dataset')
    plt.ylabel('R^2 Score')
    plt.legend()
    plt.grid()
    plt.tight_layout()
    plt.show()

# Plot learning curves for all models
for model_name, model in models.items():
    if model_name == 'Neural Network':
        plot_learning_curve(model, X_train_scaled, y_train, f'{model_name} - Learning Curve')
    else:
        plot_learning_curve(model, X_train, y_train, f'{model_name} - Learning Curve')

# *** For details, contact the repository owner (Mirkan Emir Sancak, mrkn.sancak@gmail.com)

In [None]:
# Combine performance results of all models
final_results = []

for model_name, model in models.items():
    if model_name == 'Neural Network':
        y_train_pred = model.predict(X_train_scaled)
        y_test_pred = model.predict(X_test_scaled)
    else:
        y_train_pred = model.predict(X_train)
        y_test_pred = model.predict(X_test)

    # Performance metrics
    train_r2 = r2_score(y_train, y_train_pred)
    test_r2 = r2_score(y_test, y_test_pred)
    test_mse = mean_squared_error(y_test, y_test_pred)
    test_mae = mean_absolute_error(y_test, y_test_pred)

    # Append results
    final_results.append({
        'Model': model_name,
        'Train R2': train_r2,
        'Test R2': test_r2,
        'Test MSE': test_mse,
        'Test MAE': test_mae
    })

# Convert results to a DataFrame
final_results_df = pd.DataFrame(final_results)

# Print performance summary
print("\nModel Performance Summary:")
print(final_results_df)

# *** For details, contact the repository owner (Mirkan Emir Sancak, mrkn.sancak@gmail.com)

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import gridspec

# Load the uploaded image
image_path = "/content/drive/MyDrive/visual titration/v8/data/segmented_12122024_newdataset/12-1_crop_0.jpg"
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert to RGB for visualization

# Step 1: HSV Thresholding (Isolating Target Region)
hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
lower_bound = np.array([0, 112, 26])  # Adjust based on target color range
upper_bound = np.array([179, 255, 146])
mask = cv2.inRange(hsv, lower_bound, upper_bound)

# Step 2: Inpainting only on the background (non-ROI areas)
mask_inv = cv2.bitwise_not(mask)
background = cv2.inpaint(image, mask_inv, inpaintRadius=3, flags=cv2.INPAINT_TELEA)
inpainted = cv2.bitwise_and(background, background, mask=mask)  # Keep only ROI

# Step 3: CLAHE (Contrast Enhancement)
lab = cv2.cvtColor(inpainted, cv2.COLOR_RGB2LAB)
l, a, b = cv2.split(lab)

clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
l_clahe = clahe.apply(l)

lab_clahe = cv2.merge((l_clahe, a, b))
enhanced = cv2.cvtColor(lab_clahe, cv2.COLOR_LAB2RGB)

# Step 4: Final Processed Image (Optimized Output)
final_output = enhanced  # This is the fully processed image

# Visualization with custom layout: 3 images in first row, 2 centered in second
fig = plt.figure(figsize=(15, 10))  # Adjusted size: wider for 3 images, shorter height

# Define titles with only alphabetical labels
labels = ["a)", "b)", "c)", "d)", "e)"]
images = [image, cv2.bitwise_and(image, image, mask=mask), inpainted, enhanced, final_output]

# Create a GridSpec layout: 2 rows, 6 columns (to allow precise centering)
gs = gridspec.GridSpec(2, 6, height_ratios=[1, 1])

# Row 1: 3 images (A, B, C) evenly spaced across 6 columns
ax1 = plt.subplot(gs[0, 0:2])  # First row, spans columns 0-1 (left)
ax1.imshow(images[0])
ax1.set_title(labels[0], fontsize=18, pad=3)
ax1.axis("off")

ax2 = plt.subplot(gs[0, 2:4])  # First row, spans columns 2-3 (middle)
ax2.imshow(images[1])
ax2.set_title(labels[1], fontsize=18, pad=3)
ax2.axis("off")

ax3 = plt.subplot(gs[0, 4:6])  # First row, spans columns 4-5 (right)
ax3.imshow(images[2])
ax3.set_title(labels[2], fontsize=18, pad=3)
ax3.axis("off")

# Row 2: 2 images (D and E), centered using columns 1-4
ax4 = plt.subplot(gs[1, 1:3])  # Second row, spans columns 1-2 (left-center)
ax4.imshow(images[3])
ax4.set_title(labels[3], fontsize=18, pad=3)
ax4.axis("off")

ax5 = plt.subplot(gs[1, 3:5])  # Second row, spans columns 3-4 (right-center)
ax5.imshow(images[4])
ax5.set_title(labels[4], fontsize=18, pad=3)
ax5.axis("off")

# Adjust layout to minimize spacing
plt.tight_layout(pad=1, h_pad=1, w_pad=1)  # Tighter padding to bring images closer
plt.show()

# *** For details, contact the repository owner (Mirkan Emir Sancak, mrkn.sancak@gmail.com)

In [None]:
import cv2
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib import gridspec

# Define the path to a single image
image_path = "/content/drive/MyDrive/visual titration/v8/data/masked_segmented_12122024_newdataset/masked_12-1_crop_0.jpg"

# Load the image
image = cv2.imread(image_path)

# Skip if the image cannot be loaded
if image is None:
    print(f"Error loading image: {image_path}")
    exit()

# Path to the Excel file containing HSV ranges
hsv_excel_path = '/content/drive/MyDrive/visual titration/v8/data/hsvcodes_masked_segmented_12122024_newdataset.xlsx'

# Load the HSV ranges from the Excel file
hsv_df = pd.read_excel(hsv_excel_path)

# Remove the 'masked_' prefix to match the Excel file
corresponding_image_name = image_path.split('/')[-1].replace('masked_', '')

# Get the HSV ranges for the current image from the Excel file
hsv_row = hsv_df[hsv_df['Image'].str.contains(corresponding_image_name, na=False, case=False)]

# Skip if no HSV data is found for the image
if hsv_row.empty:
    print(f"No HSV data found for image: {image_path}")
    exit()

# Extract HSV ranges
h_min, s_min, v_min = hsv_row[['H_min', 'S_min', 'V_min']].values[0]
h_max, s_max, v_max = hsv_row[['H_max', 'S_max', 'V_max']].values[0]

# Resize the image
scale_percent = 60
width = int(image.shape[1] * scale_percent / 100)
height = int(image.shape[0] * scale_percent / 100)
scaled_image = cv2.resize(image, (width, height))

# *** For details, contact the repository owner (Mirkan Emir Sancak, mrkn.sancak@gmail.com) ***

# Step 1: Apply Sobel Filter for Edge Detection
sobel_x = cv2.Sobel(scaled_image, cv2.CV_64F, 1, 0, ksize=5)
sobel_y = cv2.Sobel(scaled_image, cv2.CV_64F, 0, 1, ksize=5)
edges = cv2.magnitude(sobel_x, sobel_y)
edges = np.uint8(edges)

# Step 2: Remove Reflections
hsv_image = cv2.cvtColor(scaled_image, cv2.COLOR_BGR2HSV)
reflection_mask = cv2.inRange(hsv_image, np.array([0, 0, 200]), np.array([180, 255, 255]))  # Detect bright regions
cleaned_image = cv2.inpaint(scaled_image, reflection_mask, inpaintRadius=3, flags=cv2.INPAINT_TELEA)

# Step 3: Enhance Contrast with CLAHE
lab_image = cv2.cvtColor(cleaned_image, cv2.COLOR_BGR2LAB)
l, a, b = cv2.split(lab_image)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
l = clahe.apply(l)
lab_image = cv2.merge((l, a, b))
enhanced_image = cv2.cvtColor(lab_image, cv2.COLOR_LAB2BGR)

# Step 4: Bilateral Filtering
bilateral_filtered = cv2.bilateralFilter(enhanced_image, 9, 75, 75)

# Create a mask to exclude black pixels (assuming black is [0, 0, 0] in BGR)
black_mask = cv2.inRange(bilateral_filtered, np.array([1, 1, 1]), np.array([255, 255, 255]))

# Apply the mask to the image
masked_image = cv2.bitwise_and(bilateral_filtered, bilateral_filtered, mask=black_mask)

# Print image processing details
print(f"\nProcessed Image: {image_path}")
print("Processing Steps Completed Successfully.")

# Visualization of different processing steps
fig = plt.figure(figsize=(15, 10))

# Define labels
labels = ["a)", "b)", "c)", "d)", "e)"]
processing_images = [
    cv2.cvtColor(scaled_image, cv2.COLOR_BGR2RGB),  # Original Image
    edges,  # Sobel Filter Output
    cv2.cvtColor(cleaned_image, cv2.COLOR_BGR2RGB),  # Reflection Removal
    cv2.cvtColor(enhanced_image, cv2.COLOR_BGR2RGB),  # CLAHE Enhanced Image
    cv2.cvtColor(masked_image, cv2.COLOR_BGR2RGB)  # Bilateral Filtered Image
]

# Create a GridSpec layout
gs = gridspec.GridSpec(2, 6, height_ratios=[1, 1])

# Row 1: Three images
ax1 = plt.subplot(gs[0, 0:2])  # First column
ax1.imshow(processing_images[0])
ax1.set_title(labels[0], fontsize=18, pad=3)
ax1.axis("off")

ax2 = plt.subplot(gs[0, 2:4])  # Second column
ax2.imshow(processing_images[1], cmap='gray')
ax2.set_title(labels[1], fontsize=18, pad=3)
ax2.axis("off")

ax3 = plt.subplot(gs[0, 4:6])  # Third column
ax3.imshow(processing_images[2])
ax3.set_title(labels[2], fontsize=18, pad=3)
ax3.axis("off")

# Row 2: Two images centered
ax4 = plt.subplot(gs[1, 1:3])  # First centered column
ax4.imshow(processing_images[3])
ax4.set_title(labels[3], fontsize=18, pad=3)
ax4.axis("off")

ax5 = plt.subplot(gs[1, 3:5])  # Second centered column
ax5.imshow(processing_images[4])
ax5.set_title(labels[4], fontsize=18, pad=3)
ax5.axis("off")

# Adjust layout
plt.tight_layout(pad=1, h_pad=1, w_pad=1)
plt.show()

# *** For details, contact the repository owner (Mirkan Emir Sancak, mrkn.sancak@gmail.com)