In [None]:
import os
import libs.data as data
import cv2
import matplotlib.pyplot as plt
import json
os.getcwd()


In [None]:
from importlib import reload
import libs.plot_context as pc
import localizable_resources as lr

def reload_libs_env():
    from dotenv import load_dotenv
    load_dotenv(".env", override=True)

    reload(pc)
    reload(lr)

reload_libs_env()

In [None]:
global_sizes = pc.rc_sizes(16, 21, 24, [8, 8])
MyPlot = pc.create_plot_context(global_sizes, reload_libs_env)

In [None]:
df = data.load_data_from_files(debug=True)
# Filter data based on user input
filtered_df = df.sort_values(by=["experience"])

filtered_df = filtered_df[
    ~filtered_df['canceled']
    & ~filtered_df["name"].str.contains("Test", case=False, na=False)
    & (0 <= filtered_df["clicked_x"]) & (filtered_df["clicked_x"] < 256)
    & (0 <= filtered_df["clicked_y"]) & (filtered_df["clicked_y"] < 256)]

filtered_df.to_csv("exports/c_min_k_max_params.csv", index=False)

filtered_df

## Processing one single image for testing

In this section we develop the code that will make the average of the images binarized using the parameters of $C_{min}$ and $K_{max}$ from many users.

In [None]:
percentage = 50

img = 0

if img == 0:   img_filename = "2-SMC-1-AL_72,99m_n=_2,5x_cesc"
elif img == 1: img_filename = "2-SMC-1-AL_73,97m_n=_2,5x_cesc"
elif img == 2: img_filename = "72.53_jpeg_escal"
else: raise ValueError("img must be 0, 1, or 2")

if not os.path.exists(f"static/imgs_sections/cuts/no-border/{percentage}/{img_filename}.jpg"):
    raise ValueError(f"File static/imgs_sections/cuts/no-border/{percentage}/{img_filename}.jpg does not exist. Please run 'make-images.ipynb' to create it.")
image = cv2.imread(f"static/imgs_sections/cuts/no-border/{percentage}/{img_filename}.jpg")
plt.imshow(image[:,:,::-1])

In [None]:
import numpy as np
import libs.colorspace as colorspace

def compute_porosity(binary_image):
    porosity = np.sum(binary_image) / (binary_image.shape[0] * binary_image.shape[1] * 255)
    return porosity

def compute_components(binary_image):
    num_labels, labels_im = cv2.connectedComponents(binary_image)
    return num_labels - 1  # Subtract 1 to ignore the background label

def compute_mean_image(image, max_ks, min_cs):
    if len(max_ks) != len(min_cs):
        raise ValueError("max_ks and min_cs must have the same length")
    
    total = len(max_ks)

    mean_image = np.zeros(image.shape[0:2], dtype=np.float32)

    porosities = []
    component_counts = []

    for it, (k, c) in enumerate(zip(max_ks, min_cs)):
        print(it, k, c)
        
        img_cmyk = colorspace.bgr2cmyk(image.astype(np.uint8))
        
        lower_range = np.array([  c,   0,   0,   0], dtype=np.uint8)
        upper_range = np.array([255, 255,  64,   k], dtype=np.uint8)
        binaryImage = cv2.inRange(
            img_cmyk,
            lower_range,
            upper_range
        )

        porosity = compute_porosity(binaryImage)
        porosities.append(porosity)

        components = compute_components(binaryImage)
        component_counts.append(components)

        mean_image += binaryImage / total
    return mean_image, porosities, component_counts


In [None]:
mean_image_main, porosities, components = compute_mean_image(
        image,
        filtered_df["clicked_x"],
        filtered_df["clicked_y"]
    )

In [None]:
mu = np.average(porosities)
sigma = np.sqrt(np.average((np.array(porosities) - mu)**2))

print(f"mu = {mu}")
print(f"sigma = {sigma}")

experience = np.array(filtered_df["experience"])
mu_weighted = np.average(porosities, weights=experience)
sigma_weighted = np.sqrt(np.average((np.array(porosities) - mu_weighted)**2, weights=experience))

print(f"mu_weighted = {mu_weighted}")
print(f"sigma_weighted = {sigma_weighted}")

plt.hist(porosities, bins=20)
plt.show()
plt.hist(np.array(porosities) - mu_weighted, bins=20)
plt.show()

In [None]:
plt.imshow(mean_image_main, cmap='CMRmap', vmin=0, vmax=255)
plt.imsave(f"static/imgs_sections/cuts/no-border/{percentage}/{img_filename}_superposition_mean.jpg", mean_image_main, cmap='CMRmap', vmin=0, vmax=255)
with open(f"static/imgs_sections/cuts/no-border/{percentage}/{img_filename}_superposition_data.json", 'w') as f:
    json.dump({
        "porosities": porosities,
        "components": components
    }, f)

In [None]:
import numpy as np
import libs.colorspace as colorspace

def compute_std_image(image, max_ks, min_cs, mean_image):
    if len(max_ks) != len(min_cs):
        raise ValueError("max_ks and min_cs must have the same length")
    
    total = len(max_ks)

    variance_image = np.zeros(image.shape[0:2], dtype=np.float32)

    for it, (k, c) in enumerate(zip(max_ks, min_cs)):
        print(it, k, c)
        
        img_cmyk = colorspace.bgr2cmyk(image.astype(np.uint8))
        
        lower_range = np.array([  c,   0,   0,   0], dtype=np.uint8)
        upper_range = np.array([255, 255,  64,   k], dtype=np.uint8)
        binaryImage = cv2.inRange(
            img_cmyk,
            lower_range,
            upper_range
        )

        variance_image += (binaryImage - mean_image)**2 / (total - 1)

    std_image = np.sqrt(variance_image)
    return std_image


In [None]:
std_image_main = compute_std_image(
        image,
        filtered_df["clicked_x"],
        filtered_df["clicked_y"],
        mean_image_main
    )

In [None]:
plt.imshow(std_image_main)
filename = f"static/imgs_sections/cuts/no-border/{percentage}/{img_filename}_superposition_stdev.jpg"
print(f"Saving {filename}")
plt.imsave(filename, std_image_main, cmap='viridis')

### Measuring porosity

## Processing all image cuts

In [None]:
import glob
import os

FORCE_RECOMPUTE = False

# Path where your JPG files are located
path = "static/imgs_sections/cuts"

# Get all .jpg files recursively
jpg_files = glob.glob(os.path.join(path, "**", "*.jpg"), recursive=True)

for file in jpg_files:
    if "superposition" in file:
        continue  # Skip already processed files
    
    print(f"Processing {file}...")
    name = os.path.splitext(os.path.basename(file))[0]  # filename without extension
    dirname = os.path.dirname(file)  # directory name
    mean_path = (os.path.join(dirname, f"{name}_superposition_mean.jpg"))
    stdev_path = (os.path.join(dirname, f"{name}_superposition_stdev.jpg"))
    if not os.path.exists(mean_path) or not os.path.exists(stdev_path) or FORCE_RECOMPUTE:
        mean_img, porosities, components = compute_mean_image(
                cv2.imread(file),
                filtered_df["clicked_x"],
                filtered_df["clicked_y"]
            )
        plt.imsave(mean_path, mean_img, cmap='CMRmap', vmin=0, vmax=255)
        with open(os.path.join(dirname, f"{name}_superposition_data.json"), 'w') as f:
            json.dump({
                "porosities": porosities,
                "components": components
            }, f, indent=4)
        print(f"    Saved {mean_path}")

        stdev_img = compute_std_image(
                cv2.imread(file),
                filtered_df["clicked_x"],
                filtered_df["clicked_y"],
                mean_img
            )
        plt.imsave(stdev_path, stdev_img, cmap='viridis')
        print(f"    Saved {stdev_path}")
    else:
        print(f"    Mean and stdev images already exist, skipping.")

In [None]:
import glob
import os
import matplotlib.pyplot as plt

FORCE_RECOMPUTE = False

# Path where your JPG files are located
path = "static/imgs_sections/cuts"

# Get all _superposition_data.json files
json_files = glob.glob(os.path.join(path, "**", "*_superposition_data.json"), recursive=True)

for file in json_files:
    with open(file, 'r') as f:
        data = json.load(f)
    print(f"Processing {file}...")
    porosities = data.get("porosities", [])
    components = data.get("components", [])
    cdf_porosity = np.sort(porosities)
    cdf_components = np.sort(components)
    plt.figure(figsize=(10, 4))
    plt.subplot(1, 2, 1)
    plt.plot(cdf_porosity, np.linspace(0, 1, len(cdf_porosity), endpoint=False), marker='.', linestyle='none')
    plt.xlabel('Porosity')
    plt.ylabel('CDF')
    plt.title('CDF of Porosity')
    plt.grid(True)

    plt.subplot(1, 2, 2)
    plt.plot(cdf_components, np.linspace(0, 1, len(cdf_components), endpoint=False), marker='.', linestyle='none', color='orange')
    plt.xlabel('Component Count')
    plt.ylabel('CDF')
    plt.title('CDF of Components')
    plt.grid(True)

    plt.tight_layout()
    plt.show()