In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
from pathlib import Path
import shutil
import os
from PIL import Image

In [None]:
MANIFEST_DF = pd.read_csv('metadata/calc_case_description_train_set.csv')

def process_one_patch_path(patch_name):
    fold1,fold2,abnorm_id = patch_name.stem.split('-')
    fold1_bool = MANIFEST_DF['image file path'].str.find(fold1)>-1
    fold2_bool = MANIFEST_DF['image file path'].str.find(fold2)>-1
    abnorm_bool = MANIFEST_DF["abnormality id"]==int(abnorm_id.replace("abnorm_",""))
    find_bool = fold1_bool&fold2_bool&abnorm_bool
    
    fold1_bool2 = MANIFEST_DF['ROI mask file path'].str.find(fold1)>-1
    fold2_bool2 = MANIFEST_DF['ROI mask file path'].str.find(fold2)>-1
    abnorm_bool2 = MANIFEST_DF["abnormality id"]==int(abnorm_id.replace("abnorm_",""))
    find_bool2 = fold1_bool2&fold2_bool2&abnorm_bool2
    
    if find_bool.any():
        res = MANIFEST_DF.loc[find_bool,["image file path","ROI mask file path"]].iloc[0]
        name_components = res["ROI mask file path"].split('/')
        return '-'.join([name_components[1],name_components[2],abnorm_id]) + patch_name.suffix
    elif find_bool2.any():
        res = MANIFEST_DF.loc[find_bool2,["image file path","ROI mask file path"]].iloc[0]
        name_components = res["ROI mask file path"].split('/')
        return '-'.join([name_components[1],name_components[2],abnorm_id]) + patch_name.suffix
    else:
        return np.nan

def create_mapping(valid_mm):
    ''' output: dataframe with 2 columns small_patches_name and large_patches_name'''
    # this is a Series of small context patches
    valid_mm_s = pd.Series(valid_mm)
    # # remove the patches suffix from each patch filename, we're only using the filename not the path
    valid_mm_s = valid_mm_s.apply(lambda x: Path('-'.join(x.stem.split('-')[:-1]) + x.suffix))
    valid_mm_s.name ="small_patches_name"
    valid_mm_s = valid_mm_s.drop_duplicates()
    valid_mm_s = valid_mm_s.to_frame()
    valid_mm_s["large_patches_name"] = valid_mm_s["small_patches_name"].apply(process_one_patch_path)
    return valid_mm_s

In [None]:
destination_dir = Path('../cbis-ddsm-large-patch/valid')
src_dir = Path('../cbis-ddsm-large-patch/train')

label_class = "MALIGNANT_CALCIFICATION"

valid_mm = list(Path(f'../cbis-ddsm-patches/valid/{label_class}').glob('*'))
valid_mm_s = create_mapping(valid_mm)

for _,s in valid_mm_s.iterrows():
    fname_to_move_to_valid = s["large_patches_name"]
    if pd.isnull(fname_to_move_to_valid): continue
    destination = destination_dir/label_class/fname_to_move_to_valid
    src = src_dir/label_class/fname_to_move_to_valid
    if not src.exists(): continue
    if not destination.parent.exists():destination.parent.mkdir(parents=True)
    print(destination)
    src.replace(destination)