In [1]:
from os import listdir
from sys import argv

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.metrics import f1_score, roc_curve, roc_auc_score, precision_recall_curve

from sklearn.decomposition import PCA

from skimage import feature
from skimage.feature import greycomatrix
from skimage.feature import greycoprops
from scipy.stats import entropy
from skimage.feature import local_binary_pattern

from sklearn.svm import SVC
from skimage import measure
from skimage.io import imread

import xgboost as xgb

import pandas as pd

from sklearn.preprocessing import minmax_scale as mms
from skimage.util.shape import view_as_blocks
import scipy.stats as stats
from scipy.misc import imresize

from multiprocessing import Pool, Process, Array
import multiprocessing as mp



# Inicialização de alguma variáveis

In [2]:
paths = pd.read_csv('../IC2017_DATA/valid_images.csv')
n_images = len(paths)

names_VI = ["ExG", "ExGR", "CIVE", "VEG", "WI", "NGRDI"]

path_imgs = paths['pic'].values.copy()
path_gts = paths['gt'].values.copy()


In [3]:
resizing_factors = [8, 16, 32, 64, 128]
functions = [np.mean, np.max, np. min, np.std]
nr = len(resizing_factors)
nf = len(functions)

# Laço de criação das Features

In [4]:
"""
Ground Truth (GT) is generated here.
"""

GT = np.zeros((n_images*512*512, ), dtype = "uint8")
for i in range(n_images):
    gt = imread(path_gts[i], as_gray=True)
    _max = np.max(gt)
    #print(i, _max)
    _max = _max if _max != 0 else 1
    #print(i, _max)
    gt = gt // _max
    GT[i*512*512: (i+1)*512*512] = gt.reshape(512*512)

In [5]:
"""
These variables are shared arrays to parallelize the feature creation
"""

shared = Array("d", (n_images * (512 * 512) * (1 + nr * nf)), lock=False)

In [6]:
npshared = np.frombuffer(shared).reshape(n_images * (512 * 512), (1 + nr * nf))
data = pd.DataFrame(npshared)
data.tail()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,11,12,13,14,15,16,17,18,19,20
15466491,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15466492,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15466493,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15466494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15466495,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
"""
This function is requied to use the shared variables as global variables in the shared enviroment.
"""

def _init(init_args):
    global shared
#     shared = init_args[0]

In [8]:
def worker(p):
    
    out = np.frombuffer(shared).reshape(n_images * (512 * 512), (1 + nr * nf))
    offset = 512 * 512

    print(p, end = ' ')
    pic = imread(path_imgs[p], False)

    B, G, R = [np.float32(pic[:, :, c]) for c in range(3)]
    r = R / (R + G + B)
    g = G / (R + G + B)
    b = B / (R + G + B)

    x = 2 * g - r - b

    for i, r in enumerate(resizing_factors):
        s = 512 // r
        # print(x.shape, s)
        x_block = view_as_blocks(x, (s, s))
        for j, f in enumerate(functions):
            # print(i * nf + j, r, f)
            x_stat = f(x_block, axis = (2, 3))
            x_rsz = imresize(x_stat, (512, 512))
            out[p * offset : (p + 1) * offset , i * nf + j] = x_rsz.ravel()

    out[(p) * offset : (p + 1) * offset, nr * nf] = x.ravel()
        
    print("done")
    return p

In [9]:
"""
Define pool of processes
"""
pool = Pool(processes=4, initializer=_init, initargs=([shared],))

`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.3.0.
Use Pillow instead: ``numpy.array(Image.fromarray(arr).resize())``.
`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.3.0.
Use Pillow instead: ``numpy.array(Image.fromarray(arr).resize())``.
`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.3.0.
Use Pillow instead: ``numpy.array(Image.fromarray(arr).resize())``.
`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.3.0.
Use Pillow instead: ``numpy.array(Image.fromarray(arr).resize())``.


4 done
0 done
12 done
5 done
8 done
13 done
1 done
6 done
9 done
14 done
7 done
2 done
10 done
15 done
16 done
3 done
11 done
20 done
17 done
24 done
21 done
18 done
28 done
25 done
22 done
26 done
19 done
29 done
23 done
32 done
27 done
30 done
36 done
31 done
40 done
33 done
37 done
44 done
41 done
34 done
38 done
45 done
35 done
42 done
39 done
46 done
52 done
43 done
48 done
47 done
53 done
49 done
54 done
56 done
55 done
50 done
57 done
58 done
51 done


In [10]:
# %%time
"""
Execute pool of processes
"""
pool.map(worker, range(n_images))

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58]

In [11]:
npshared = np.frombuffer(shared).reshape(n_images * (512 * 512), (1 + nr * nf))
data = pd.DataFrame(npshared)
data.tail()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,11,12,13,14,15,16,17,18,19,20
15466491,99.0,139.0,137.0,200.0,13.0,42.0,133.0,19.0,21.0,9.0,...,11.0,40.0,16.0,100.0,22.0,49.0,30.0,79.0,16.0,0.067416
15466492,99.0,139.0,137.0,200.0,13.0,42.0,133.0,19.0,21.0,9.0,...,11.0,40.0,17.0,100.0,23.0,48.0,29.0,77.0,15.0,0.05029
15466493,99.0,139.0,137.0,200.0,13.0,42.0,133.0,19.0,21.0,9.0,...,11.0,40.0,17.0,100.0,23.0,46.0,28.0,75.0,15.0,0.043222
15466494,99.0,139.0,137.0,200.0,13.0,42.0,133.0,19.0,21.0,9.0,...,11.0,40.0,17.0,100.0,23.0,45.0,27.0,74.0,15.0,0.039139
15466495,99.0,139.0,137.0,200.0,13.0,42.0,133.0,19.0,21.0,9.0,...,11.0,40.0,17.0,100.0,23.0,45.0,27.0,74.0,15.0,0.041045


In [12]:
IMG = np.zeros(n_images*512*512, dtype="uint8")
for i in range(len(paths)):
    IMG[(i) * 512 * 512 : (i + 1) * 512 * 512] = paths.iloc[i]['num']

In [13]:
data["GT"] = GT
data["IMG"] = IMG

In [14]:
data['solo'] = 1
data.loc[data.IMG <= 17, 'solo'] = 0
data.loc[data.IMG >= 40, 'solo'] = 2

In [16]:
data.to_csv("../IC2017_DATA/FancyApproach_dataset.csv", index=False)

# Análises

# Análise das VIs (curva ROC)

In [None]:
plt.figure(figsize=(12,9))
scores = pd.Series(None, index=data.columns)
for col in data.columns:
    score = roc_auc_score(GT, data[col])
    scores[col] = score
    print("%-30s AUC =" % col, score)
    fpr, tpr = roc_curve(GT, data[col])[:2]
    plt.plot(fpr,tpr, label = col)
plt.legend()

CIVE                           AUC = 0.18612729362208746
ExG                            AUC = 0.809777961025575
ExGR                           AUC = 0.836831508304779


In [37]:
scores.sort_values(ascending=False)

GT       1.000000
WI       0.958257
ExG      0.940711
VEG      0.934285
ExGR     0.900862
NGRDI    0.858256
base     0.500000
solo     0.344400
IMG      0.315439
CIVE     0.061969
dtype: float64