# Inpaint the Dataset

This notebook is to be used to inpaint the entire train_balanced_224x224 dataset

Any images containing DCA's will be modified and saved in the paths:

<code>./Data/train_balanced_224x224_inpainted_ns/</code>
<code>./Data/train_balanced_224x224_inpainted_telea/</code>

Images without DCA's will remain unmodified but will be re-saved in the new destination.

Both removal methods will run without any modifications being required in this notebook.

--------------

**WARNING**

This notebook takes a long time to run - it is recommended to run individual sections as required rather than the entire notebook (there are 9810 images to process multiple times)




-----------

## Standard Imports

In [2]:
# append custom system path for custom modules folder in directory if not already
import sys
if  '../../Modules' not in sys.path:
    sys.path.insert(0, '../../Modules')

import pandas as pd
import numpy as np
from PIL import Image
import os, os.path
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.utils import Bunch
import cv2
import image_modifications as im # custom image modification module
import isic_data as isic
import dca_removal

In [3]:
# Set the base (to load from) and target (to save to) filepaths
base_filepaths = [
    r'../../Data/train_balanced_224x224/train/mel/',
    r'../../Data/train_balanced_224x224/train/oth/',
    r'../../Data/train_balanced_224x224/val/mel/',
    r'../../Data/train_balanced_224x224/val/oth/',
]

ns_target_filepaths = [
    r'../../Data/train_balanced_224x224_inpainted_ns/train/mel/',
    r'../../Data/train_balanced_224x224_inpainted_ns/train/oth/',
    r'../../Data/train_balanced_224x224_inpainted_ns/val/mel/',
    r'../../Data/train_balanced_224x224_inpainted_ns/val/oth/',
]

telea_target_filepaths = [
    r'../../Data/train_balanced_224x224_inpainted_telea/train/mel/',
    r'../../Data/train_balanced_224x224_inpainted_telea/train/oth/',
    r'../../Data/train_balanced_224x224_inpainted_telea/val/mel/',
    r'../../Data/train_balanced_224x224_inpainted_telea/val/oth/',
]

In [4]:
dca_t_mel_csv = pd.read_csv(r"../../Data/Annotations/dca_intensities_train_mel.csv")
dca_t_mel_csv['Original_Image_Name'] = [dca_t_mel_csv['Image_Name'][i][:-9] + '.jpg' for i in range(len(dca_t_mel_csv.index))]

dca_t_oth_csv = pd.read_csv(r"../../Data/Annotations/dca_intensities_train_oth.csv")
dca_t_oth_csv['Original_Image_Name'] = [dca_t_oth_csv['Image_Name'][i][:-9] + '.jpg' for i in range(len(dca_t_oth_csv.index))]

dca_v_mel_csv = pd.read_csv(r"../../Data/Annotations/dca_intensities_val_mel.csv")
dca_v_mel_csv['Original_Image_Name'] = [dca_v_mel_csv['Image_Name'][i][:-9] + '.jpg' for i in range(len(dca_v_mel_csv.index))]

dca_v_oth_csv = pd.read_csv(r"../../Data/Annotations/dca_intensities_val_oth.csv")
dca_v_oth_csv['Original_Image_Name'] = [dca_v_oth_csv['Image_Name'][i][:-9] + '.jpg' for i in range(len(dca_v_oth_csv.index))]


In [8]:
# Train - Mel

for file in os.listdir(ns_target_filepaths[0]):
    os.remove(os.path.join(ns_target_filepaths[0], file))
    
for file in os.listdir(telea_target_filepaths[0]):
    os.remove(os.path.join(telea_target_filepaths[0], file))

dca_mel_t_list = dca_t_mel_csv['Original_Image_Name'].tolist()
dca_masks_t_list = dca_t_mel_csv['Image_Name'].tolist()

for img in os.listdir(base_filepaths[0]):
    image = np.asarray(Image.open(os.path.join(base_filepaths[0], img)))
    if img in dca_mel_t_list:
        index = dca_mel_t_list.index(img)
        mask = np.asarray(Image.open(os.path.join(r'../../Data/DCA_Masks/train/mel/', dca_masks_t_list[index])))
        
        inpainted_ns = dca_removal.remove_DCA(image, mask)
        inpainted_telea = dca_removal.remove_DCA(image, mask, 'inpaint_telea')
        
        Image.fromarray(inpainted_ns).save(os.path.join(ns_target_filepaths[0],img[:-4] + '.png'))
        Image.fromarray(inpainted_telea).save(os.path.join(telea_target_filepaths[0],img[:-4] + '.png'))
        
    else:
        Image.fromarray(image).save(os.path.join(ns_target_filepaths[0],img[:-4] + '.png'))
        Image.fromarray(image).save(os.path.join(telea_target_filepaths[0],img[:-4] + '.png'))

In [9]:
# Train - Oth

for file in os.listdir(ns_target_filepaths[1]):
    os.remove(os.path.join(ns_target_filepaths[1], file))
    
for file in os.listdir(telea_target_filepaths[1]):
    os.remove(os.path.join(telea_target_filepaths[1], file))

    
dca_oth_t_list = dca_t_oth_csv['Original_Image_Name'].tolist()
dca_masks_oth_t_list = dca_t_oth_csv['Image_Name'].tolist()

for img in os.listdir(base_filepaths[1]):
    image = np.asarray(Image.open(os.path.join(base_filepaths[1], img)))
    if img in dca_oth_t_list:
        index = dca_oth_t_list.index(img)
        mask = np.asarray(Image.open(os.path.join(r'../../Data/DCA_Masks/train/oth/', dca_masks_oth_t_list[index])))
        
        inpainted_ns = dca_removal.remove_DCA(image, mask)
        inpainted_telea = dca_removal.remove_DCA(image, mask, 'inpaint_telea')
        
        Image.fromarray(inpainted_ns).save(os.path.join(ns_target_filepaths[1],img[:-4] + '.png'))
        Image.fromarray(inpainted_telea).save(os.path.join(telea_target_filepaths[1],img[:-4] + '.png'))
        
    else:
        Image.fromarray(image).save(os.path.join(ns_target_filepaths[1],img[:-4] + '.png'))
        Image.fromarray(image).save(os.path.join(telea_target_filepaths[1],img[:-4] + '.png'))

In [10]:
# Val - Mel

for file in os.listdir(ns_target_filepaths[2]):
    os.remove(os.path.join(ns_target_filepaths[2], file))
    
for file in os.listdir(telea_target_filepaths[2]):
    os.remove(os.path.join(telea_target_filepaths[2], file))

    
dca_mel_v_list = dca_v_mel_csv['Original_Image_Name'].tolist()
dca_masks_mel_v_list = dca_v_mel_csv['Image_Name'].tolist()

for img in os.listdir(base_filepaths[2]):
    image = np.asarray(Image.open(os.path.join(base_filepaths[2], img)))
    if img in dca_mel_v_list:
        index = dca_mel_v_list.index(img)
        mask = np.asarray(Image.open(os.path.join(r'../../Data/DCA_Masks/val/mel/', dca_masks_mel_v_list[index])))
        
        inpainted_ns = dca_removal.remove_DCA(image, mask)
        inpainted_telea = dca_removal.remove_DCA(image, mask, 'inpaint_telea')
        
        Image.fromarray(inpainted_ns).save(os.path.join(ns_target_filepaths[2],img[:-4] + '.png'))
        Image.fromarray(inpainted_telea).save(os.path.join(telea_target_filepaths[2],img[:-4] + '.png'))
        
    else:
        Image.fromarray(image).save(os.path.join(ns_target_filepaths[2],img[:-4] + '.png'))
        Image.fromarray(image).save(os.path.join(telea_target_filepaths[2],img[:-4] + '.png'))

In [11]:
# Val - Oth

for file in os.listdir(ns_target_filepaths[3]):
    os.remove(os.path.join(ns_target_filepaths[3], file))
    
for file in os.listdir(telea_target_filepaths[3]):
    os.remove(os.path.join(telea_target_filepaths[3], file))

    
dca_oth_v_list = dca_v_oth_csv['Original_Image_Name'].tolist()
dca_masks_oth_v_list = dca_v_oth_csv['Image_Name'].tolist()

for img in os.listdir(base_filepaths[3]):
    image = np.asarray(Image.open(os.path.join(base_filepaths[3], img)))
    if img in dca_oth_v_list:
        index = dca_oth_v_list.index(img)
        mask = np.asarray(Image.open(os.path.join(r'../../Data/DCA_Masks/val/oth/', dca_masks_oth_v_list[index])))
        
        inpainted_ns = dca_removal.remove_DCA(image, mask)
        inpainted_telea = dca_removal.remove_DCA(image, mask, 'inpaint_telea')
        
        Image.fromarray(inpainted_ns).save(os.path.join(ns_target_filepaths[3],img[:-4] + '.png'))
        Image.fromarray(inpainted_telea).save(os.path.join(telea_target_filepaths[3],img[:-4] + '.png'))
        
    else:
        Image.fromarray(image).save(os.path.join(ns_target_filepaths[3],img[:-4] + '.png'))
        Image.fromarray(image).save(os.path.join(telea_target_filepaths[3],img[:-4] + '.png'))