In [4]:
import os
import numpy as np
from spatialdata import read_zarr

ImportError: Dask dataframe requirements are not installed.

Please either conda or pip install as follows:

  conda install dask                     # either conda install
  python -m pip install "dask[dataframe]" --upgrade  # or python -m pip install

In [None]:
from sklearn.base import BaseEstimator, TransformerMixin

class Feature_extractor(BaseEstimator, TransformerMixin):

    def __init__(self, functions, to_flatten=False):
        super().__init__()
        self.to_flatten = to_flatten
        self.functions = functions

    def fit(self, X, y=None):
        self.max_height = max(img.shape[0] for img in X)
        self.max_width = max(img.shape[1] for img in X)
        return self

    def transform(self, X):
        all_features = []

        for img in X:
            image_features = []

            img = img.astype(np.float32).squeeze()
            img = (img - img.min()) / (img.max() - img.min())
            image_features.append(img)

            h, w = img.shape

            x_coords, y_coords = np.meshgrid(np.arange(w), np.arange(h))
            x_coords = x_coords / self.max_width
            y_coords = y_coords / self.max_height
            image_features.append(x_coords)
            image_features.append(y_coords)

            for f, params in self.functions.items():
                image_features.append(f(img, **params))

            if self.to_flatten:
                image_features = [i.flatten() for i in image_features]
                all_features.append(np.stack(image_features, axis=1))
            else:
                all_features.append(np.stack(image_features))

        return np.array(all_features)

In [None]:
from sklearn.ensemble import RandomForestClassifier

class Custom_Random_Forest(RandomForestClassifier):
    def fit(self, X, y, sample_weight = None):
        images, feutures, H, W = X.shape
        X = np.transpose(X, (0, 2, 3, 1))

        X = X.reshape(images*H * W, feutures)
        y = y.reshape(images*H * W)

        return super().fit(X, y, sample_weight)
    
    def predict(self, X):
        images, feutures, H, W = X.shape
        X = np.transpose(X, (0, 2, 3, 1))

        X = X.reshape(images*H * W, feutures)
        pred = super().predict(X)
        return pred.reshape(images, H, W)

In [None]:
from scipy.ndimage import gaussian_filter, gaussian_gradient_magnitude, gaussian_laplace

trans = Feature_extractor(functions={
    gaussian_filter: {'sigma':1},
    gaussian_gradient_magnitude: {'sigma':1},
    gaussian_laplace: {'sigma':1},
})

In [None]:
from sklearn.pipeline import Pipeline

pipe = Pipeline([
    ("Feature extractor", trans),
    ("classifier", Custom_Random_Forest(n_estimators=50, n_jobs=-1, max_depth=10))
])

In [None]:
from skimage.transform import resize

def resize_image_channelwise(image, new_shape=(64, 64), max_channels=30):
    C = min(image.shape[-1], max_channels)
    resized_channels = []
    for c in range(C):
        channel = image[:, :, c]
        resized = resize(channel, new_shape, preserve_range=True, anti_aliasing=True)
        resized_channels.append(resized)
    return np.stack(resized_channels, axis=-1)

In [None]:
data = []
labels = []

new_chunks = (512, 512)
data_dir = r"E:\data\train"


for arg in os.listdir(data_dir):
    path = os.path.join(data_dir, arg)
    if os.path.isdir(path) and arg.endswith(".zarr"):
        sdata = read_zarr(path)

        
        # for name, image_obj in sdata.images.items():
            
        data.append(list(sdata.images.values())[0].data.rechunk((1,512,512)))

        
        ann = sdata["annotations"].data.rechunk(new_chunks)
        labels.append(ann)

In [None]:
from sklearn.ensemble import RandomForestClassifier
from dask.distributed import Client
import joblib

client = Client()

with joblib.parallel_backend('dask'):
    pipe.fit(data, labels)
    
joblib.dump(pipe, "random_forest.pkl")

In [None]:
from PIL import Image

data_test = sdata['annotations'].values

img = Image.fromarray(data_test)
img.show()