# 链接Google drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install numpy
!pip install opencv-python
!pip install SimpleITK
# !pip install concurrent



In [3]:
# 注释
# 更改工作目录到包含你的Python脚本的文件夹
import os
os.chdir('/content/drive/Othercomputers/Mac/Google_Drive/pionono_segmentation-main/src/preprocessing_tools')

In [3]:
!PYTHONPATH=$PYTHONPATH:/content/drive/Othercomputers/Mac/Google_Drive/pionono_segmentation-main/src/preprocessing_tools

In [4]:
# 预处理前列腺TMA数据集Gleason 2019

import argparse
import numpy as np
# from preprocessing_tools.preprocessing_utils import resize_all_images, create_voting_masks, \
#     create_crossvalidation_splits, convert_to_rgb, calculate_dataset_statistics, create_gold_label_proportion_folders
from preprocessing_utils import resize_all_images, create_voting_masks, \
    create_crossvalidation_splits, convert_to_rgb, calculate_dataset_statistics, create_gold_label_proportion_folders
import sys

In [5]:
# 定义颜色映射：
CLASS_COLORS_BGR = [[128, 255, 96], [32, 224, 255], [0, 104, 255], [0, 0, 255], [255, 255, 255]]

# 检查是否在 Jupyter Notebook 或 Google Colab 环境中
def in_notebook():
    try:
        shell = get_ipython().__class__.__name__
        if shell == 'ZMQInteractiveShell':
            return True   # Jupyter notebook or qtconsole
        elif shell == 'Shell':
            return True   # Google Colab
        else:
            return False  # Other type (likely standard Python interpreter)
    except NameError:
        return False     # Probably standard Python interpreter

# 根据环境设置参数
if in_notebook():
    input_dir = "/content/drive/Othercomputers/Mac/Google_Drive/dataset/Gleason19/Source/"
    output_dir = "/content/drive/Othercomputers/Mac/Google_Drive/dataset/Gleason19/resized_dataset_1024/"
else:
    parser = argparse.ArgumentParser(description="Preprocess prostate TMA dataset Gleason 2019")
    parser.add_argument("--input_dir", "-i", type=str,
                        default="/content/drive/Othercomputers/Mac/Google_Drive/dataset/Gleason19/Source/",  # 输入路径
                        help="Input directory of dataset.")
    parser.add_argument("--output_dir", "-o", type=str,
                        default="/content/drive/Othercomputers/Mac/Google_Drive/dataset/Gleason19/resized_dataset_1024/",  # 输出路径
                        help="Output directory of resized images.")
    args = parser.parse_args()
    input_dir = args.input_dir
    output_dir = args.output_dir

config = {
    'input_dir': input_dir,
    'output_dir': output_dir
}

dataset_specific_names = {'train_img_dir': 'Train_imgs/',
                          'test_img_dir': 'Test_imgs/',
                          'map_dir': 'Maps/',
                          'map_annotator_dirs': ['Maps1_T/', 'Maps2_T/', 'Maps3_T/', 'Maps4_T/', 'Maps5_T/', 'Maps6_T/'],
                          'resize_resolution': 1024}  # 添加 resize_resolution XU改

config.update(dataset_specific_names)

list_gg5 = ['slide001_core145.png', 'slide007_core005.png', 'slide007_core044.png', 'slide003_core068.png',
            'slide007_core016.png', 'slide002_core073.png', 'slide002_core144.png', 'slide001_core010.png',
            'slide002_core009.png', 'slide005_core092.png', 'slide002_core074.png', 'slide002_core140.png',
            'slide002_core143.png', 'slide002_core010.png', 'slide003_core096.png', 'slide007_core043.png']

def convert_masks(mask):  # 定义掩码转换函数：
    # The initial classes are 0 (background), 1 (normal tissue), 3 (GG3), 4 (GG4), 5 (GG5), 6 (normal tissue)
    # We move these classes to: 0 (normal tissue), 1 (GG3), 2 (GG4), 3 (GG5), 4 (background)
    ones = np.ones_like(mask)

    mask = mask - 2  # gleason classes are moved to 1,2,3
    mask = np.where(mask == 255, ones * 0, mask)  # normal tissue to 0
    mask = np.where(mask == 4, ones * 0, mask)  # normal tissue to 0
    mask = np.where(mask == 254, ones * 4, mask)  # background  to 4
    return mask

mask_fct = convert_masks

# config.update({'resize_resolution': 1024}) # 这行已不再需要，因为上面已经更新了 config

In [8]:

resize_all_images(config, config['input_dir'], mask_fct, max_workers=256)


### Resize ###
Processing input: /content/drive/Othercomputers/Mac/Google_Drive/dataset/Gleason19/Source/Train_imgs/ Output: /content/drive/Othercomputers/Mac/Google_Drive/dataset/Gleason19/resized_dataset_1024/Train_imgs/
Images found:244
### Resize ###
Processing input: /content/drive/Othercomputers/Mac/Google_Drive/dataset/Gleason19/Source/Test_imgs/ Output: /content/drive/Othercomputers/Mac/Google_Drive/dataset/Gleason19/resized_dataset_1024/Test_imgs/
Images found:87
### Resize ###
Processing input: /content/drive/Othercomputers/Mac/Google_Drive/dataset/Gleason19/Source/Maps/Maps1_T/ Output: /content/drive/Othercomputers/Mac/Google_Drive/dataset/Gleason19/resized_dataset_1024/Maps/Maps1_T/
Images found:244
### Resize ###
Processing input: /content/drive/Othercomputers/Mac/Google_Drive/dataset/Gleason19/Source/Maps/Maps2_T/ Output: /content/drive/Othercomputers/Mac/Google_Drive/dataset/Gleason19/resized_dataset_1024/Maps/Maps2_T/
Images found:141
### Resize ###
Processing input: /c

In [8]:
# -----------------
# create_voting_masks(config, 'majority', dir_name='MV/')
create_voting_masks(config, 'majority', dir_name='MV/', max_workers=1024)

### Create Voting Maps ###
Mode: majority
Total images with voting: 245
--------- Dataset Statistic of majority
GG5 image list: 
Overall classes per pixel: [1311, 1599, 3747, 0, 745983]
Overall classes per image: [4, 6, 10, 0, 245]
Class_weights: [114.81922197  94.13883677  40.17293835          inf   0.20178476]
---------


  class_weights = n_all_pixels / (len(count_pixels) * np.array(count_pixels))


In [10]:

# create_voting_masks(config, 'staple', dir_name='STAPLE/')
create_voting_masks(config, 'staple', dir_name='STAPLE/', max_workers=1024)

### Create Voting Maps ###
Mode: staple
Total images with voting: 245
--------- Dataset Statistic of staple
GG5 image list: 
- 'slide002_core009.png'
- 'slide002_core140.png'
- 'slide007_core044.png'
- 'slide007_core043.png'
- 'slide005_core092.png'
- 'slide002_core073.png'
- 'slide007_core016.png'
- 'slide003_core068.png'
- 'slide002_core143.png'
- 'slide002_core144.png'
Overall classes per pixel: [18192657, 40266622, 59161440, 982849, 114250960]
Overall classes per image: [99, 124, 158, 10, 245]
Class_weights: [ 2.55987378  1.15656351  0.78718344 47.3835814   0.40761938]
---------


In [15]:

# create_crossvalidation_splits(config, config['output_dir'] + config['train_img_dir'], list_gg5)
create_crossvalidation_splits(config, config['output_dir'] + config['train_img_dir'],
                              list_gg5, max_workers=512)

### Create Cross Validation ###
No of initial images: 244
---------
---------
---------
---------
---------
---------
---------
---------


In [12]:

# convert_to_rgb(config, ['Maps1_T/', 'Maps2_T/', 'Maps3_T/', 'Maps4_T/',
#                         'Maps5_T/', 'Maps6_T/', 'STAPLE/', 'MV/'])
convert_to_rgb(config, ['Maps1_T/', 'Maps2_T/', 'Maps3_T/', 'Maps4_T/',
                        'Maps5_T/', 'Maps6_T/', 'STAPLE/', 'MV/'], max_workers=512)

### Convert Maps to RGB images ###


In [13]:

calculate_dataset_statistics(config['output_dir'] + config['map_dir'] + 'STAPLE/', 'total')
# ----------------------------------------


--------- Dataset Statistic of total
GG5 image list: 
- 'slide002_core009.png'
- 'slide002_core140.png'
- 'slide007_core044.png'
- 'slide007_core043.png'
- 'slide005_core092.png'
- 'slide002_core073.png'
- 'slide007_core016.png'
- 'slide003_core068.png'
- 'slide002_core143.png'
- 'slide002_core144.png'
Overall classes per pixel: [18192657, 40266622, 59161440, 982849, 114250960]
Overall classes per image: [99, 124, 158, 10, 245]
Class_weights: [ 2.55987378  1.15656351  0.78718344 47.3835814   0.40761938]
---------


In [14]:
#  创建黄金标准标签比例文件夹：
# create_gold_label_proportion_folders(config['output_dir'] + config['map_dir'],
#                                      'STAPLE/', [20, 40, 60, 80, 100])
create_gold_label_proportion_folders(config['output_dir'] + config['map_dir'],
                                     'STAPLE/', [20, 40, 60, 80, 100], max_workers=512)