# Medical Image - TFM
<h4>subtitle: Generación de una tubería distribuida para la extracción de características en imágenes médicas patológicas</h4>
license: European Union Public Licence (EUPL) v1.2

<table>
  <tr> <td> author name: </td> <td> Israel Llorens </td> </tr>
  <tr> <td> email: </td> <td> sanchezis@hotmail.com </td> </tr>
</table>

<h7>date: 2024/03/22</h7>

---

In [1]:
%load_ext autoreload
%autoreload

In [3]:
# Copyright (c) 2024 Israel Llorens
# Licensed under the EUPL-1.2  

__author__ = "Israel Llorens <sanchezis@hotmail.com>"
__copyright__ = "Copyright 2024, Israel Llorens"
__license__ = "EUPL-1.2"

import logging
import os

import pandas as pd
import glob
import pyspark.sql.functions as F
import pyspark.sql.types as T

from pyspark.sql.functions import udf
from pyspark.sql.types import BooleanType

from tiatoolbox import logger
from tiatoolbox.models.architecture.unet import UNetModel
from tiatoolbox.models.engine.semantic_segmentor import (
    IOSegmentorConfig,
    SemanticSegmentor,
)
from tiatoolbox.utils.misc import download_data, imread
from tiatoolbox.utils.visualization import overlay_prediction_mask
from tiatoolbox.wsicore.wsireader import WSIReader

try:
    import pyspark
    from pyspark.sql.functions import col, isnan, when, count,to_date,year,month,expr,hour,dayofweek,lower,array_remove,collect_list,lit
    from pyspark.sql.functions import pandas_udf,split
    from pyspark.sql.types import ArrayType, DoubleType, StringType
    from pyspark.sql.types import StructField,StructType,StringType,DoubleType,FloatType,IntegerType, LongType
    import pyspark.sql.functions as F
except:
    pass


# Clear logger to use tiatoolbox.logger
import logging
import warnings

if logging.getLogger().hasHandlers():
    logging.getLogger().handlers.clear()

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import torch
from matplotlib import cm

from tiatoolbox import logger
from tiatoolbox.models.architecture.unet import UNetModel
from tiatoolbox.models.engine.semantic_segmentor import (
    IOSegmentorConfig,
    SemanticSegmentor,
)
from tiatoolbox.utils.misc import download_data, imread
from tiatoolbox.utils.visualization import overlay_prediction_mask
from tiatoolbox.wsicore.wsireader import WSIReader

from urllib import request
import certifi
import ssl
import os

import numpy as np
import histomicstk as htk
import skimage
import scipy as sp


model_file_name = os.path.join("model.pth")

images_path = os.path.join('..', 'data', 'patient_extracts')

logging.info(f"******** MODEL: {model_file_name}   -----  {os.path.exists(model_file_name)}")

if not os.path.exists(model_file_name):
    download_data(
        "https://tiatoolbox.dcs.warwick.ac.uk//models/seg/fcn-tissue_mask.pth",
        model_file_name,
        overwrite=True
    )

bcc_segmentor = SemanticSegmentor(
    pretrained_model="fcn_resnet50_unet-bcss", # Ensure this path is worker-accessible
    num_loader_workers=0,    # Avoid Multiprocessing in UDF CRUCIAL: Disable multiprocessing
    batch_size=4,
)



INFO:root:******** MODEL: model.pth   -----  True
  saved_state_dict = torch.load(pretrained_weights, map_location="cpu")



In [5]:
images_path, 

import random

images_path = os.path.join(
                os.path.dirname(
                    os.getcwd()
                ),
                'data',
                'patient_extracts/'
                )


images_list = list(os.walk(
                    images_path
                )
            )[0][2]

img_name = os.path.join(images_path, images_list[random.randint(0, len(images_list))])

'/var/folders/4l/tlk6snvn7h74kq99qnbyg3ch0000gn/T'

In [26]:
import shutil
import tempfile
import uuid

tmp_dirname = f'tiatoolbox_{uuid.uuid4()}'
shutil.rmtree(tmp_dirname, True)
output = bcc_segmentor.predict(
    [os.path.join(images_path, img_name)],
    save_dir=tmp_dirname,
    mode="tile",
    resolution=1.0,
    units="baseline",
    patch_input_shape=[1024, 1024],
    patch_output_shape=[512, 512],
    stride_shape=[512, 512],
    # on_gpu=False,
    crash_on_exception=False,
)

tile_prediction_raw = output[0][1]
tile_prediction_raw = np.load(tile_prediction_raw + '.raw.0.npy')
shutil.rmtree(tmp_dirname, True)




Process Batch: 100%|##############################| 1/1 [00:03<00:00,  3.79s/it]
INFO:root:Finish: 0
INFO:root:--Input: /Users/illorens/Projects/source/Universidad/viu-MU-BD-and-DS-Image-Pathology/data/patient_extracts/patient_010_node_3.tif_tile_33_x18432_y37888_score21285.1.png
INFO:root:--Output: /Users/illorens/Projects/source/Universidad/viu-MU-BD-and-DS-Image-Pathology/notebooks/tiatoolbox_700cfb7d-db82-4583-9037-02de858db6cc/0


In [25]:
tile_prediction_raw

array([[[1.8700549e-01, 4.3950234e-02, 6.1295276e-09, 9.0488058e-04,
         7.6813936e-01],
        [1.8701835e-01, 4.3949969e-02, 6.1284848e-09, 9.0475695e-04,
         7.6812690e-01],
        [1.8703301e-01, 4.3950342e-02, 6.1276673e-09, 9.0467604e-04,
         7.6811194e-01],
        ...,
        [1.9005522e-01, 4.3719124e-02, 5.2353863e-09, 8.3474827e-04,
         7.6539087e-01],
        [1.9007134e-01, 4.3725081e-02, 5.2358820e-09, 8.3431881e-04,
         7.6536924e-01],
        [1.9006853e-01, 4.3727256e-02, 5.2365330e-09, 8.3369948e-04,
         7.6537049e-01]],

       [[1.8701373e-01, 4.3950535e-02, 6.1288437e-09, 9.0472645e-04,
         7.6813102e-01],
        [1.8702656e-01, 4.3950282e-02, 6.1278067e-09, 9.0459629e-04,
         7.6811856e-01],
        [1.8704139e-01, 4.3950681e-02, 6.1269976e-09, 9.0451108e-04,
         7.6810342e-01],
        ...,
        [1.8999320e-01, 4.3727040e-02, 5.2245097e-09, 8.3158846e-04,
         7.6544815e-01],
        [1.9000924e-01, 4.373329