In [14]:
import cv2, glob, pickle, csv
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

In [15]:
image_folder_path = "/home/openroot/Tanmoy/Working Stuffs/myStuffs/ISIC-2017/ISIC-2017/"
image_extension = ".jpg"

num_image_files = 0
pixel_depth = 255.0
num_labels = 2
rows, cols = 70, 100
img_channels = 3

image_augmented_path = "/home/openroot/Tanmoy/Working Stuffs/myStuffs/havss-tf/ISIC-2017/augmented/"

resized_folder_path = "/home/openroot/Tanmoy/Working Stuffs/myStuffs/havss-tf/ISIC-2017/resized/"
resized_image_size = (100, 70)

In [16]:
def resizeImage(folder_path):
    image_files = glob.glob(folder_path + "*" + image_extension)
    for image in image_files:
        try:
            image_name = image.split("/")[-1]
            image = cv2.imread(image, cv2.IMREAD_UNCHANGED)
            image = cv2.resize(image, resized_image_size)
            cv2.imwrite(resized_folder_path +  image_name, image)
        except Exception as e:
            print("Unable To Reize {0}".format(image_name))

In [17]:
def imagePickle(images):
    try:
        pickle_file = resized_folder_path + "imageDataset.pickle"
        with open(pickle_file, "wb") as f:
            pickle.dump(images, f, pickle.HIGHEST_PROTOCOL)
    except Exception as e:
        print("Error While Creating Pickle File {0}".format(e))

In [18]:
def loadPickle():
    try:
        pickle_file = resized_folder_path + "imageDataset.pickle"
        with open(pickle_file, "rb") as f:
            dataset = pickle.load(f)
    except Exception as e:
        print("Error While Loading Pickle File {0}".format(e))
    return dataset

In [19]:
def readCSV():
    label_dataset = []
    label_names = []
    label_file = resized_folder_path + "ISIC-2017-label.csv"
    count_non_cancer = 0
    try:
        with open(label_file, "rb") as f:
            reader = csv.DictReader(f)
            for row in reader:
#                 if not int(row["melanoma"]):
#                     count_non_cancer += 1
#                     if count_non_cancer > 480:
#                         continue
                label_names = np.append(label_names, row["image_id"])
                label_dataset = np.append(label_dataset, [float(row["melanoma"])])
                if not int(row["melanoma"]):
                    for i in range(15):
                        label_names = np.append(label_names, row["image_id"] + "_aug" + str(i))
                        label_dataset = np.append(label_dataset, [float(row["melanoma"])])
                else:
                    for i in range(65):
                        label_names = np.append(label_names, row["image_id"] + "_aug" + str(i))
                        label_dataset = np.append(label_dataset, [float(row["melanoma"])]) 
    except Exception as e:
        print("Error While Reading CSV Label File: {0}".format(e))
    return label_dataset, label_names

In [20]:
def formatCSV(data):
    labels = (np.arange(num_labels) == data[:,None]).astype(np.float32)
    return labels

In [21]:
def randomize(dataset, labels):
    permutation = np.random.permutation(labels.shape[0])
    shuffled_dataset = dataset[permutation,:,:,:]
    shuffled_labels = labels[permutation,:]
    return shuffled_dataset, shuffled_labels

In [22]:
def createDataset(image_dataset, labels_dataset):
    total_images = len(image_dataset)
    print("Total No. Of Images {0}".format(total_images))
    no_valid, no_test = 6152, 6152
    print("Division Of Dataset {0}".format(no_test))
    test_images = image_dataset[0:no_test, :, :, :]
    test_labels = labels_dataset[0:no_test, :]
    validation_images = image_dataset[no_test:no_test + no_valid, :, :, :]
    validation_labels = labels_dataset[no_test:no_test + no_valid, :]
    train_images = image_dataset[no_test + no_valid:, :, :, :]
    train_labels = labels_dataset[no_test + no_valid:, :]
    return train_images, train_labels, validation_images, validation_labels, test_images, test_labels

In [23]:
def main():
    print("Resizing Original Images")
    resizeImage(image_folder_path)
    print("Resizing Augmented Images")
    resizeImage(image_augmented_path)
    
    print("Reading CSV Label File")
    csv_data, csv_data_names = readCSV()
    
    print("Check Values")
    print csv_data[0:30]
    print csv_data_names[0:30]
    
    print("Formating Labels")
    labels_x = formatCSV(csv_data)
    print csv_data.shape
    print labels_x.shape
    print labels_x[0:30]
    
    print("Creating Image Array")
    image_x = np.array([cv2.imread(resized_folder_path + image + image_extension) for image in csv_data_names])    
#     image_x = np.array([(cv2.imread(resized_folder_path + image + image_extension).astype(float) - pixel_depth / 2) / pixel_depth for image in csv_data_names])
    print image_x.shape
    
    print("Saving Image Array In Pickle Form - imageDataset.pickle")
    imagePickle(image_x)
    
#     print image_x[0][2][0]
#     image_x[0][2][0][0] = 0
#     print image_x[0][2][0]
    
#     Check Equality
#     dataset = loadPickle()
#     print np.array_equal(image_x, dataset)

    print("Creating Training And Testing Dataset")
    train_images, train_labels, validation_images, validation_labels, test_images, test_labels = createDataset(image_x, labels_x)
    
    print("Randomizing Dataset")
    train_images, train_labels = randomize(train_images, train_labels)
    validation_images, validation_labels = randomize(validation_images, validation_labels)
    test_images, test_labels = randomize(test_images, test_labels)
    
    print("Length Of Training Images Data {0} Shape {1}".format(len(train_images), train_images.shape))
    print("Length Of Training Labels Data {0} Shape {1}".format(len(train_labels), train_labels.shape))
    print("Length Of Validation Images Data {0} Shape {1}".format(len(validation_images), validation_images.shape))
    print("Length Of Validation Labels Data {0} Shape {1}".format(len(validation_labels), validation_labels.shape))
    print("Length Of Test Images Data {0} Shape {1}".format(len(test_images), test_images.shape))
    print("Length Of Test Labels Data {0} Shape {1}".format(len(test_labels), test_labels.shape))
    
    print("Saving All Dataset In Pickle Form - ISIC.pickle")
    pickle_file = resized_folder_path + "ISIC.pickle"
    try:
        with open(pickle_file, "wb") as f:
            save = {
                'train_dataset': train_images,
                'train_labels': train_labels,
                'validation_dataset': validation_images,
                'validation_labels': validation_labels,
                'test_dataset': test_images,
                'test_labels': test_labels,
            }
            pickle.dump(save, f, pickle.HIGHEST_PROTOCOL)
    except Exception as e:
        print("Error While Saving ISIC Pickle {0}".format(e))
        
#     print labels_x[0:10, :]
#     print test_labels[0:10, :]
    
#     print("Mini Dataset")
#     print train_labels[0: 10, :]
    
    
#     img = Image.fromarray(test_images[0], 'RGB')
#     img.save('/home/openroot/Tanmoy/Working Stuffs/myStuffs/havss-tf/my.png')
#     img.show()

In [24]:
main()

Resizing Original Images
Unable To Reize ISIC_0015182.jpg
Unable To Reize ISIC_0015200.jpg
Unable To Reize ISIC_0015220.jpg
Unable To Reize ISIC_0015233.jpg
Unable To Reize ISIC_0015260.jpg
Unable To Reize ISIC_0015295.jpg
Unable To Reize ISIC_0015189.jpg
Unable To Reize ISIC_0015181.jpg
Unable To Reize ISIC_0015219.jpg
Unable To Reize ISIC_0015190.jpg
Unable To Reize ISIC_0015204.jpg
Unable To Reize ISIC_0015284.jpg
Resizing Augmented Images
Reading CSV Label File
Check Values
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
['ISIC_0000000' 'ISIC_0000000_aug0' 'ISIC_0000000_aug1' 'ISIC_0000000_aug2'
 'ISIC_0000000_aug3' 'ISIC_0000000_aug4' 'ISIC_0000000_aug5'
 'ISIC_0000000_aug6' 'ISIC_0000000_aug7' 'ISIC_0000000_aug8'
 'ISIC_0000000_aug9' 'ISIC_0000000_aug10' 'ISIC_0000000_aug11'
 'ISIC_0000000_aug12' 'ISIC_0000000_aug13' 'ISIC_0000000_aug14'
 'ISIC_0000001' 'ISIC_0000001_aug0' 'ISIC_0000001_aug1' 'ISIC_0000001