In [1]:
import tifffile
import numpy as np
import matplotlib.pyplot as plt
import os
import numpy as np

from histomicstk.preprocessing.color_normalization import deconvolution_based_normalization
from histomicstk.saliency.tissue_detection import (
    get_slide_thumbnail, get_tissue_mask)
from skimage.transform import resize

from utils.helpers import compare_two_images, cut_out_image

In [15]:
segmentation_paths = sorted(["data/segmentations/"+path for path in os.listdir("data/segmentations/") if not path.startswith(".")])

In [16]:
segmentations = list(map(tifffile.imread, image_paths))

In [99]:
def match_image_to_segmentation(image_path):
    """
    Convert image path to corresponding segmentation path
    """
    seg_path = image_path.replace('data/images/', 'data/segmentations/normalized_')
    seg_path = seg_path.replace('.tif', '_segmented.tif')
    return seg_path

In [98]:
segmentation_paths

['data/segmentations/normalized_CTRL_1_M1_3R__0004_segmented.tif',
 'data/segmentations/normalized_CTRL_3_M2_4R__0006_segmented.tif',
 'data/segmentations/normalized_CTRL_4_M6_3L__0008_segmented.tif',
 'data/segmentations/normalized_E2+DHT_1_M13_3L_0010_segmented.tif',
 'data/segmentations/normalized_E2+DHT_2_M10_3R_0002_segmented.tif',
 'data/segmentations/normalized_E2+P4_3_M12_3R_0001_segmented.tif',
 'data/segmentations/normalized_E2_1_M8_3L_0011_segmented.tif',
 'data/segmentations/normalized_E2_2_M11_4R_0001_segmented.tif']

In [2]:
image_paths = sorted(["data/images/"+path for path in os.listdir("data/images/") if not path.startswith(".")])
image_paths

['data/images/CTRL_1_M1_3R__0004.tif',
 'data/images/CTRL_2_M6_3R__0002.tif',
 'data/images/CTRL_3_M2_4R__0006.tif',
 'data/images/CTRL_4_M6_3L__0008.tif',
 'data/images/E2+DHT_1_M13_3L_0001.tif',
 'data/images/E2+DHT_1_M13_3L_0010.tif',
 'data/images/E2+DHT_2_M10_3R_0002.tif',
 'data/images/E2+DHT_2_M13_4L_0004.tif',
 'data/images/E2+P4+DHT_1_M7_3L_0002.tif',
 'data/images/E2+P4+DHT_1_M7_3L_0013.tif',
 'data/images/E2+P4+DHT_1_M7_3L_0019.tif',
 'data/images/E2+P4+DHT_4_M14_3L_0003.tif',
 'data/images/E2+P4_2_M9_3R_0003.tif',
 'data/images/E2+P4_3_M12_3R_0001.tif',
 'data/images/E2+P4_3_M12_3R_0002.tif',
 'data/images/E2+P4_3_M12_3R_0005.tif',
 'data/images/E2_1_M8_3L_0005.tif',
 'data/images/E2_1_M8_3L_0011.tif',
 'data/images/E2_2_M11_4R_0001.tif',
 'data/images/E2_3_M5_4L_0001.tif']

In [101]:
filtered_image_paths = sorted([path for path in image_paths if match_image_to_segmentation(path) in segmentation_paths])
filtered_image_paths

['data/images/CTRL_1_M1_3R__0004.tif',
 'data/images/CTRL_3_M2_4R__0006.tif',
 'data/images/CTRL_4_M6_3L__0008.tif',
 'data/images/E2+DHT_1_M13_3L_0010.tif',
 'data/images/E2+DHT_2_M10_3R_0002.tif',
 'data/images/E2+P4_3_M12_3R_0001.tif',
 'data/images/E2_1_M8_3L_0011.tif',
 'data/images/E2_2_M11_4R_0001.tif']

In [102]:
images = list(map(tifffile.imread, filtered_image_paths))

In [157]:
# compare_two_images(segmentations[0], images[0])

In [13]:
from skimage import color, filters, feature, img_as_ubyte

def extract_features(image):
    """Extracts color + texture + edge features per pixel."""
    img_lab = color.rgb2lab(image)
    gray = color.rgb2gray(image)
    gray_uint8 = img_as_ubyte(gray)
    
    features = []
    # Raw color channels
    for i in range(3):
        features.append(image[..., i])
    for i in range(3):
        features.append(img_lab[..., i])
        
    # Smoothed color (Gaussian)
    for sigma in [1, 3]:
        features.append(filters.gaussian(gray, sigma))
    
    # Edges
    features.append(filters.sobel(gray))
    
    # Local Binary Pattern (texture)
    lbp = feature.local_binary_pattern(gray_uint8, P=8, R=1, method='uniform')
    features.append(lbp)
    
    feat_stack = np.stack(features, axis=-1)
    return feat_stack

feat_stack = extract_features(images[0])

In [158]:
# import matplotlib.pyplot as plt

# num_features_to_show = 6
# fig, axes = plt.subplots(1, num_features_to_show, figsize=(15, 5))

# for i in range(num_features_to_show):
#     ax = axes[i]
#     ax.imshow(feat_stack[..., i], cmap='gray')
#     ax.set_title(f'Feature {i}')
#     ax.axis('off')

# plt.tight_layout()
# plt.show()

In [None]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

X_all, y_all = [], []

for img, mask in zip(images, segmentations):
    feat_stack = extract_features(img)
    H, W, F = feat_stack.shape

    mask[mask == 2] = 0

    X = feat_stack.reshape(-1, F)
    y = mask.reshape(-1)

    class0_idx = np.where(y == 0)[0]
    class1_idx = np.where(y == 1)[0]

    n_samples_per_class = min(len(class0_idx), len(class1_idx), 25000)
    sampled_idx = np.concatenate([
        np.random.choice(class0_idx, n_samples_per_class, replace=False),
        np.random.choice(class1_idx, n_samples_per_class, replace=False)
    ])

    X_sample = X[sampled_idx]
    y_sample = y[sampled_idx]

    X_all.append(X_sample)
    y_all.append(y_sample)

X_all = np.concatenate(X_all)
y_all = np.concatenate(y_all)

print("Training data shape:", X_all.shape, y_all.shape)


Training data shape: (400000, 10) (400000,)


In [121]:
clf = RandomForestClassifier(
    n_estimators=200,
    max_depth=None,
    class_weight="balanced",
    n_jobs=-1,
    random_state=0
)

X_train, X_val, y_train, y_val = train_test_split(
    X_all, y_all, test_size=0.2, stratify=y_all, random_state=0
)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_val)
print(classification_report(y_val, y_pred))

              precision    recall  f1-score   support

           0       0.95      0.87      0.91     40000
           1       0.88      0.95      0.92     40000

    accuracy                           0.91     80000
   macro avg       0.92      0.91      0.91     80000
weighted avg       0.92      0.91      0.91     80000



In [9]:
def predict_mask(image, model):
    """Predicts a pixelwise mask (0,1,2) for an H&E image."""
    feat_stack = extract_features(image)
    H, W, F = feat_stack.shape
    
    X = feat_stack.reshape(-1, F)
    
    y_pred = model.predict(X)
    
    pred_mask = y_pred.reshape(H, W)
    return pred_mask

In [123]:
test_img = images[1]
test_pred = predict_mask(test_img, clf)
np.unique(test_pred)

array([0, 1], dtype=uint8)

In [159]:
# compare_two_images(test_pred, test_img)

In [10]:
from skimage.morphology import remove_small_objects, remove_small_holes, binary_closing, disk
from skimage.transform import resize

def clean_mask(mask):
    mask = mask.astype(bool)
    mask = remove_small_objects(mask, min_size=10000)
    mask = remove_small_holes(mask, area_threshold=1000)
    rad = disk(radius=2)
    smoothed_mask = binary_closing(mask, rad)
    return smoothed_mask

In [160]:
# clean_test_pred = clean_mask(test_pred)
# compare_two_images(clean_test_pred, test_img)

In [161]:
# for img in images:
#     plt.figure(figsize=(12, 6))

#     pred_mask = predict_mask(img, clf)
#     pred_mask = clean_mask(pred_mask)

#     compare_two_images(pred_mask, img, "Predicted mask", "Original image")

In [6]:
from joblib import dump, load
from pathlib import Path

model_dir = Path('models')
model_name = 'decision_tree_model.joblib'
model_path = model_dir / model_name

In [None]:
model_dir.mkdir(parents=True, exist_ok=True)

dump(clf, model_dir / 'decision_tree_model.joblib')

['models/decision_tree_model.joblib']

In [3]:
images = list(map(tifffile.imread, image_paths))

In [7]:
clf = load(model_path)
clf

0,1,2
,n_estimators,200
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [19]:
import os

mask_dir = Path('data/masks')
mask_dir.mkdir(parents=True, exist_ok=True)

In [22]:
image_paths

['data/images/CTRL_1_M1_3R__0004.tif',
 'data/images/CTRL_2_M6_3R__0002.tif',
 'data/images/CTRL_3_M2_4R__0006.tif',
 'data/images/CTRL_4_M6_3L__0008.tif',
 'data/images/E2+DHT_1_M13_3L_0001.tif',
 'data/images/E2+DHT_1_M13_3L_0010.tif',
 'data/images/E2+DHT_2_M10_3R_0002.tif',
 'data/images/E2+DHT_2_M13_4L_0004.tif',
 'data/images/E2+P4+DHT_1_M7_3L_0002.tif',
 'data/images/E2+P4+DHT_1_M7_3L_0013.tif',
 'data/images/E2+P4+DHT_1_M7_3L_0019.tif',
 'data/images/E2+P4+DHT_4_M14_3L_0003.tif',
 'data/images/E2+P4_2_M9_3R_0003.tif',
 'data/images/E2+P4_3_M12_3R_0001.tif',
 'data/images/E2+P4_3_M12_3R_0002.tif',
 'data/images/E2+P4_3_M12_3R_0005.tif',
 'data/images/E2_1_M8_3L_0005.tif',
 'data/images/E2_1_M8_3L_0011.tif',
 'data/images/E2_2_M11_4R_0001.tif',
 'data/images/E2_3_M5_4L_0001.tif']

In [25]:
paths = sorted([p for p in os.listdir("data/images/") if not p.startswith(".")])
paths

['CTRL_1_M1_3R__0004.tif',
 'CTRL_2_M6_3R__0002.tif',
 'CTRL_3_M2_4R__0006.tif',
 'CTRL_4_M6_3L__0008.tif',
 'E2+DHT_1_M13_3L_0001.tif',
 'E2+DHT_1_M13_3L_0010.tif',
 'E2+DHT_2_M10_3R_0002.tif',
 'E2+DHT_2_M13_4L_0004.tif',
 'E2+P4+DHT_1_M7_3L_0002.tif',
 'E2+P4+DHT_1_M7_3L_0013.tif',
 'E2+P4+DHT_1_M7_3L_0019.tif',
 'E2+P4+DHT_4_M14_3L_0003.tif',
 'E2+P4_2_M9_3R_0003.tif',
 'E2+P4_3_M12_3R_0001.tif',
 'E2+P4_3_M12_3R_0002.tif',
 'E2+P4_3_M12_3R_0005.tif',
 'E2_1_M8_3L_0005.tif',
 'E2_1_M8_3L_0011.tif',
 'E2_2_M11_4R_0001.tif',
 'E2_3_M5_4L_0001.tif']

In [26]:
for image, path in zip(images, paths):

    pred_mask = predict_mask(image, clf)
    pred_mask = clean_mask(pred_mask)

    save_path = os.path.join(mask_dir, f"mask_{path}")

    tifffile.imwrite(save_path, pred_mask)