In [61]:
import numpy as np
import matplotlib.pyplot as plt
import math
import matplotlib.image as mpimg
import glob
import json
import os
import torch
import torch.nn as nn
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, sampler
import pandas as pd
import copy
from tqdm import tqdm

In [62]:
base_path = "/Users/nduginets/PycharmProjects/"
dataset_path = os.path.join(base_path, "master-diploma/GAN_to_box/test_data/isic_2018_boxes.csv")
shifted_path = os.path.join(base_path, "master-diploma/GAN_to_box/test_data/isic_2018_boxes_shifted.csv")

frame = pd.read_csv(dataset_path)

In [63]:
#  4 * 15
sz = 4 * 15
columns = frame.columns[1:]

attributes = [
    columns[i * sz: (i + 1) * sz]
    for i in range(6)
]
attributes

[Index(['segm_x_0', 'segm_y_0', 'segm_x_size_0', 'segm_y_size_0', 'segm_x_1',
        'segm_y_1', 'segm_x_size_1', 'segm_y_size_1', 'segm_x_2', 'segm_y_2',
        'segm_x_size_2', 'segm_y_size_2', 'segm_x_3', 'segm_y_3',
        'segm_x_size_3', 'segm_y_size_3', 'segm_x_4', 'segm_y_4',
        'segm_x_size_4', 'segm_y_size_4', 'segm_x_5', 'segm_y_5',
        'segm_x_size_5', 'segm_y_size_5', 'segm_x_6', 'segm_y_6',
        'segm_x_size_6', 'segm_y_size_6', 'segm_x_7', 'segm_y_7',
        'segm_x_size_7', 'segm_y_size_7', 'segm_x_8', 'segm_y_8',
        'segm_x_size_8', 'segm_y_size_8', 'segm_x_9', 'segm_y_9',
        'segm_x_size_9', 'segm_y_size_9', 'segm_x_10', 'segm_y_10',
        'segm_x_size_10', 'segm_y_size_10', 'segm_x_11', 'segm_y_11',
        'segm_x_size_11', 'segm_y_size_11', 'segm_x_12', 'segm_y_12',
        'segm_x_size_12', 'segm_y_size_12', 'segm_x_13', 'segm_y_13',
        'segm_x_size_13', 'segm_y_size_13', 'segm_x_14', 'segm_y_14',
        'segm_x_size_14', 'segm_y_

In [64]:

def noise_row(row, x_offset, y_offset, x_wide, y_wide):
    for j, attribute in enumerate(attributes):
        for i, a in enumerate(attribute):
            if "x_size" in a:
                row[j * sz + i] *= x_wide
            elif "y_size" in a:
                row[j * sz + i] *= y_wide
            elif "x" in a:
                row[j * sz + i] *= x_offset
            elif "y" in a:
                row[j * sz + i] *= y_offset
    return row

def draw_image(row, offset, name):
    image_size = 1024
    calibrate_to_borders = lambda s: max(0, min(s, image_size))
    image = np.zeros((image_size, image_size))
    parts = row[offset * sz: (offset + 1) * sz]
    for idx in range(0, len(parts), 4):
        x = calibrate_to_borders(int(parts[idx] * image_size))
        y = calibrate_to_borders(int(parts[idx + 1] * image_size))
        x_sz = calibrate_to_borders(int(parts[idx + 2] * image_size))
        y_sz = calibrate_to_borders(int(parts[idx + 3] * image_size))

        image[x: x + x_sz, y: y + y_sz] = 1

    plt.clf()
    plt.imsave(name, image)
    # , interpolation='none'
    # plt.savefig(name)

a = frame.iloc[2][1:]
modified_a = noise_row(a, 0.4, 0.1, 1.5, 1)
draw_image(modified_a, 0, os.path.join("./", "test1.png"))

<Figure size 432x288 with 0 Axes>

In [65]:
SEGMENTATION_PATH = "/Users/nduginets/PycharmProjects/master-diploma/bboxes/segmentation"
ATTRIBUTES_PATH = "/Users/nduginets/PycharmProjects/master-diploma/bboxes/attributes"

os.makedirs(SEGMENTATION_PATH, exist_ok=True)
os.makedirs(ATTRIBUTES_PATH, exist_ok=True)

type_path_offset_list = [
    ("segmentation", 0, SEGMENTATION_PATH),
    ("attribute_globules", 1, ATTRIBUTES_PATH),
    ("attribute_milia_like_cyst", 2, ATTRIBUTES_PATH),
    ("attribute_negative_network", 3, ATTRIBUTES_PATH),
    ("attribute_pigment_network", 4, ATTRIBUTES_PATH),
    ("attribute_streaks", 5, ATTRIBUTES_PATH),
]

In [66]:
def fill_zeros(index, number_to_fill = '9'):
    number = str(index)
    return "ISIC_" + number_to_fill * (7 - len(number)) + number

print(fill_zeros(1_00_00_00))

ISIC_1000000


In [67]:
def process_single_item(modified_row, index, letter='9'):
    for name, offset, path in type_path_offset_list:
        full_path = os.path.join(path, fill_zeros(index, letter) + "_" + name + ".png")
        draw_image(modified_row, offset, full_path)

process_single_item(modified_a, "1234")

<Figure size 432x288 with 0 Axes>

In [68]:
args = [i / 3 for i in range(1, 7)]
print(args)

def create_dataset(indexes_to_use, letter='9'):
    image_cnt = 1
    for idx in tqdm(indexes_to_use):
        for x_a in args:
            for y_a in args:
                for x_s_a in args:
                    for y_s_a in args:
                        row = frame.iloc[idx][1:]
                        modified_row = noise_row(row, x_a, y_a, x_s_a, y_s_a)
                        process_single_item(modified_row, image_cnt, letter)
                        image_cnt += 1

create_dataset([])

0it [00:00, ?it/s]


[0.3333333333333333, 0.6666666666666666, 1.0, 1.3333333333333333, 1.6666666666666667, 2.0]


In [69]:
def fill_cnt_tensor(t):
        zeros = np.zeros((6, 16))
        for des in range(0, 6):
            cnt = 0
            for i in range(0, 15):
                offset = des * sz + i * 4
                if t[offset] != 0:
                    cnt += 1
            zeros[des][cnt] = 1
        return tuple(np.argmax(zeros, axis=1))
fill_cnt_tensor(frame.iloc[2][1:])

(1, 0, 1, 0, 1, 0)

In [70]:
map_tuple_to_indexes = {}

for idx in range(len(frame.index)):
    image_coordinates = frame.iloc[idx][1:]
    tpl = fill_cnt_tensor(image_coordinates)
    if tpl not in map_tuple_to_indexes:
        map_tuple_to_indexes[tpl] = (0, [])
    new_cnt = map_tuple_to_indexes[tpl][0] + 1
    new_lsr = map_tuple_to_indexes[tpl][1]
    new_lsr.append(idx)
    map_tuple_to_indexes[tpl] = (new_cnt, new_lsr)

list_tuple_to_indexes = sorted([(k, v[0], v[1]) for k, v in map_tuple_to_indexes.items()], key=lambda x: x[1], reverse=True)

[(a, b) for (a, b, _) in list_tuple_to_indexes]

[((1, 0, 0, 0, 0, 0), 493),
 ((1, 0, 0, 0, 1, 0), 398),
 ((1, 0, 0, 0, 2, 0), 141),
 ((1, 0, 0, 0, 3, 0), 79),
 ((1, 1, 0, 0, 0, 0), 70),
 ((1, 0, 2, 0, 0, 0), 56),
 ((1, 0, 1, 0, 0, 0), 55),
 ((1, 0, 0, 0, 4, 0), 54),
 ((1, 0, 1, 0, 1, 0), 42),
 ((1, 0, 3, 0, 0, 0), 34),
 ((1, 0, 0, 0, 5, 0), 24),
 ((1, 1, 0, 0, 1, 0), 24),
 ((1, 0, 0, 1, 0, 0), 21),
 ((1, 0, 2, 0, 1, 0), 20),
 ((1, 2, 0, 0, 1, 0), 18),
 ((1, 2, 0, 0, 0, 0), 18),
 ((1, 4, 0, 0, 0, 0), 17),
 ((1, 0, 5, 0, 0, 0), 16),
 ((1, 0, 0, 0, 6, 0), 16),
 ((1, 3, 0, 0, 0, 0), 14),
 ((1, 0, 1, 0, 2, 0), 14),
 ((1, 0, 4, 0, 0, 0), 14),
 ((2, 0, 0, 0, 1, 0), 13),
 ((1, 0, 1, 0, 3, 0), 13),
 ((1, 0, 3, 0, 1, 0), 13),
 ((2, 0, 0, 0, 0, 0), 12),
 ((1, 0, 6, 0, 0, 0), 11),
 ((1, 0, 2, 0, 2, 0), 11),
 ((1, 0, 7, 0, 0, 0), 11),
 ((1, 5, 0, 0, 0, 0), 10),
 ((0, 0, 0, 0, 0, 0), 10),
 ((1, 0, 0, 1, 1, 0), 9),
 ((1, 1, 0, 0, 2, 0), 9),
 ((1, 1, 1, 0, 0, 0), 9),
 ((1, 4, 0, 0, 1, 0), 8),
 ((1, 3, 0, 0, 1, 0), 8),
 ((1, 0, 0, 0, 7, 0), 8),
 ((1

In [71]:
lbls = ["segm", "globules", "milia_like_cyst", "negative_network", "pigment_network", "streaks"]

In [72]:
distr_cnt = [0 for _ in lbls]

for idx, _ in enumerate(lbls):
    for t, c, _ in list_tuple_to_indexes:
        if t[idx] > 0:
            distr_cnt[idx] += c

sorted(list(zip(lbls, distr_cnt)), key=lambda x:x[1], reverse=True)

[('segm', 2536),
 ('pigment_network', 1504),
 ('milia_like_cyst', 681),
 ('globules', 603),
 ('negative_network', 189),
 ('streaks', 100)]

In [73]:
list(map(lambda x: (x[0], x[1]), filter(lambda x: x[0][lbls.index("streaks")] > 0, list_tuple_to_indexes)))

[((1, 0, 0, 0, 1, 1), 4),
 ((1, 1, 0, 0, 1, 1), 4),
 ((1, 0, 0, 0, 0, 1), 4),
 ((1, 0, 0, 0, 0, 2), 3),
 ((1, 2, 0, 0, 0, 2), 3),
 ((1, 1, 0, 0, 0, 1), 3),
 ((1, 0, 0, 0, 0, 3), 2),
 ((1, 2, 0, 0, 1, 2), 2),
 ((1, 3, 0, 0, 3, 4), 2),
 ((1, 0, 0, 0, 0, 4), 2),
 ((1, 4, 0, 0, 0, 4), 2),
 ((1, 0, 0, 0, 2, 1), 2),
 ((1, 0, 0, 0, 1, 2), 2),
 ((1, 0, 0, 0, 0, 5), 2),
 ((1, 0, 0, 0, 1, 4), 2),
 ((1, 1, 0, 0, 2, 2), 2),
 ((1, 1, 0, 0, 2, 3), 1),
 ((1, 12, 0, 0, 6, 7), 1),
 ((1, 6, 0, 0, 4, 4), 1),
 ((1, 3, 0, 0, 1, 1), 1),
 ((1, 3, 0, 0, 3, 3), 1),
 ((1, 12, 0, 0, 2, 3), 1),
 ((2, 0, 0, 0, 1, 7), 1),
 ((1, 0, 0, 0, 3, 11), 1),
 ((1, 11, 0, 0, 9, 5), 1),
 ((1, 1, 0, 0, 4, 3), 1),
 ((1, 6, 0, 0, 3, 3), 1),
 ((1, 0, 0, 0, 2, 2), 1),
 ((1, 0, 0, 0, 3, 3), 1),
 ((1, 0, 1, 0, 2, 2), 1),
 ((1, 6, 0, 0, 2, 2), 1),
 ((1, 11, 0, 0, 2, 8), 1),
 ((2, 4, 0, 0, 4, 4), 1),
 ((1, 0, 0, 0, 1, 3), 1),
 ((1, 0, 15, 0, 3, 1), 1),
 ((1, 4, 0, 5, 4, 6), 1),
 ((1, 5, 0, 0, 1, 1), 1),
 ((1, 1, 0, 0, 2, 4), 1),
 ((2, 

In [74]:
list(map(lambda x: (x[0], x[1]), filter(lambda x: x[0][lbls.index("negative_network")] > 0, list_tuple_to_indexes)))

[((1, 0, 0, 1, 0, 0), 21),
 ((1, 0, 0, 1, 1, 0), 9),
 ((1, 0, 0, 2, 0, 0), 7),
 ((1, 0, 0, 1, 2, 0), 6),
 ((1, 0, 0, 1, 3, 0), 5),
 ((1, 0, 0, 3, 0, 0), 3),
 ((1, 0, 0, 3, 1, 0), 3),
 ((1, 0, 5, 1, 0, 0), 3),
 ((1, 0, 0, 2, 1, 0), 3),
 ((1, 0, 0, 2, 3, 0), 3),
 ((1, 0, 0, 1, 4, 0), 3),
 ((1, 4, 0, 5, 0, 0), 2),
 ((0, 0, 0, 1, 1, 0), 2),
 ((1, 0, 0, 4, 0, 0), 2),
 ((1, 0, 1, 2, 0, 0), 2),
 ((1, 0, 4, 1, 0, 0), 2),
 ((1, 0, 1, 1, 3, 0), 2),
 ((1, 1, 1, 1, 0, 0), 2),
 ((1, 0, 1, 1, 0, 0), 2),
 ((1, 0, 0, 2, 4, 0), 2),
 ((1, 3, 0, 2, 4, 0), 1),
 ((1, 13, 0, 1, 10, 0), 1),
 ((1, 0, 3, 1, 6, 0), 1),
 ((2, 0, 3, 1, 1, 0), 1),
 ((4, 4, 0, 2, 5, 0), 1),
 ((3, 0, 0, 3, 8, 0), 1),
 ((1, 4, 0, 5, 4, 6), 1),
 ((3, 0, 0, 1, 2, 0), 1),
 ((1, 2, 0, 1, 1, 0), 1),
 ((1, 1, 0, 1, 2, 0), 1),
 ((1, 5, 0, 3, 0, 0), 1),
 ((2, 0, 0, 1, 1, 0), 1),
 ((1, 5, 0, 2, 2, 4), 1),
 ((1, 8, 0, 3, 0, 0), 1),
 ((1, 6, 0, 1, 0, 0), 1),
 ((1, 1, 2, 2, 0, 0), 1),
 ((1, 0, 2, 2, 0, 0), 1),
 ((1, 5, 0, 1, 0, 0), 1),
 ((1, 0, 

In [75]:
list(map(lambda x: (x[0], x[1]), filter(lambda x: x[0][lbls.index("globules")] > 0, list_tuple_to_indexes)))

[((1, 1, 0, 0, 0, 0), 70),
 ((1, 1, 0, 0, 1, 0), 24),
 ((1, 2, 0, 0, 1, 0), 18),
 ((1, 2, 0, 0, 0, 0), 18),
 ((1, 4, 0, 0, 0, 0), 17),
 ((1, 3, 0, 0, 0, 0), 14),
 ((1, 5, 0, 0, 0, 0), 10),
 ((1, 1, 0, 0, 2, 0), 9),
 ((1, 1, 1, 0, 0, 0), 9),
 ((1, 4, 0, 0, 1, 0), 8),
 ((1, 3, 0, 0, 1, 0), 8),
 ((1, 6, 0, 0, 1, 0), 7),
 ((1, 5, 0, 0, 1, 0), 7),
 ((1, 2, 0, 0, 3, 0), 7),
 ((1, 1, 0, 0, 3, 0), 6),
 ((1, 5, 0, 0, 2, 0), 6),
 ((1, 3, 0, 0, 2, 0), 6),
 ((1, 2, 0, 0, 4, 0), 5),
 ((1, 3, 0, 0, 3, 0), 5),
 ((1, 2, 0, 0, 2, 0), 5),
 ((1, 7, 0, 0, 1, 0), 4),
 ((1, 1, 0, 0, 1, 1), 4),
 ((1, 1, 0, 0, 4, 0), 4),
 ((1, 1, 3, 0, 0, 0), 4),
 ((2, 1, 0, 0, 0, 0), 4),
 ((1, 7, 0, 0, 0, 0), 4),
 ((1, 1, 0, 0, 5, 0), 3),
 ((1, 2, 0, 0, 5, 0), 3),
 ((1, 10, 0, 0, 0, 0), 3),
 ((1, 5, 0, 0, 3, 0), 3),
 ((1, 2, 0, 0, 0, 2), 3),
 ((1, 2, 1, 0, 1, 0), 3),
 ((1, 4, 0, 0, 4, 0), 3),
 ((1, 2, 1, 0, 0, 0), 3),
 ((1, 1, 0, 0, 0, 1), 3),
 ((1, 1, 2, 0, 1, 0), 3),
 ((1, 2, 4, 0, 1, 0), 3),
 ((1, 4, 0, 0, 3, 0), 2),
 ((1

let's take by 100 any elements each of this

=> 300 * 256 = 76_800 imgs

In [76]:
def populate(lbl_list, max_cnt=100):
    result = []
    selected_tuples = set()
    for l in lbl_list:
        cnt = 0
        for r, _, items in filter(lambda x: x[0][lbls.index(l)] > 0, list_tuple_to_indexes):
            if cnt == max_cnt:
                continue
            if r in selected_tuples:
                continue
            cnt += 1
            selected_tuples.add(r)
            result.append((r, items[0]))
    return result

tuple_index_to_noise = populate(["streaks", "negative_network", "globules"])
t_i_n_s = (len(tuple_index_to_noise) // 10) + 1


split_to_jobs = [
    (str(i), tuple_index_to_noise[i * t_i_n_s: (i + 1) * t_i_n_s])
    for i in range(10)
]
print(t_i_n_s)
print(len(tuple_index_to_noise))
print(sum(len(stj) for _, stj in split_to_jobs))


28
275
275


In [77]:
from joblib import Parallel, delayed

def print_some(x):
    letter = x[0]
    idxes = [i for _, i in x[1]]
    create_dataset(idxes, letter)
    return idxes

results = Parallel(n_jobs=10)(delayed(print_some)(i) for i in tqdm(split_to_jobs))

print(results)

100%|██████████| 10/10 [00:00<00:00, 98.44it/s]
  0%|          | 0/23 [00:00<?, ?it/s]Process LokyProcess-22:
Traceback (most recent call last):
  File "/Users/nduginets/.conda/envs/newexorders/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/Users/nduginets/.conda/envs/newexorders/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/nduginets/.conda/envs/newexorders/lib/python3.7/site-packages/joblib/externals/loky/process_executor.py", line 446, in _process_worker
    _process_reference_size = _get_memory_usage(pid, force_gc=True)
  File "/Users/nduginets/.conda/envs/newexorders/lib/python3.7/site-packages/joblib/externals/loky/process_executor.py", line 116, in _get_memory_usage
    gc.collect()
KeyboardInterrupt
Process LokyProcess-20:
Traceback (most recent call last):
  File "/Users/nduginets/.conda/envs/newexorders/lib/python3.7/multiprocessing/process.py", line 297, in _

KeyboardInterrupt: 