# PTexture

A package to compute a total of 261 texture features from voxel lists.

## News

* 9/9/2018. ZLNU is renamed to ZSNU standing for zone-size non-uniformity.
* 9/9/2018. Test codes are added for robustness.


## How to use

1. Open ptexture_with_test.ipynb from Jupyter lab or Jupyter Notebook
2. 'Edit' -> 'Clear All Outputs'
3. 'Run' -> 'Run All Cells'
4. Move to the bottom part of the file. If any errors found, please contact me.
5. Follow the advice in the bottom of the file.




Kenji Hirata, MD, PhD

Hokkaido University, Sapporo, Japan

khirata@med.hokudai.ac.jp

In [1]:
################################################
#
# ptexture project
# 'PET texture analysis with Python'
#
# Kenji Hirata, MD, PhD
# Hokkaido University, Sapporo, Japan
# khirata@med.hokudai.ac.jp
# Rev 8/30/2018
# 1/1/2018
#
################################################

import numpy as np
import scipy.stats
import scipy.ndimage
import pandas as pd
from IPython.display import display
import os
import datetime

def no_error():
    print('No error.')
    
no_error()

No error.


In [2]:
################################################
#
# Report function
# Kenji Hirata, 1/1/2018
#
################################################

def do_not_report(obj):
    pass

def do_report(obj):
    if type(obj) == pd.core.frame.DataFrame:
        display(obj)
    else:
        print(obj)
    return

report = do_not_report
#report = do_report

no_error()

No error.


In [3]:
################################################
#
# Test data
# Kenji Hirata, 11/21/2017
# 8/31/2018
#
################################################

def data1():
    data = np.array([
      3,3,4,4,5,
      3,3,4,3,5,
      7,3,4,3,5,
      3,3,4,4,5,
      3,3,4,4,5,

      6,2,2,4,5,
      2,2,4,4,9,
      2,7,9,9,4,
      1,2,4,9,2,
      2,1,4,2,9,

      3,3,4,4,5,
      3,9,4,4,5,
      3,3,9,4,5,
      3,8,4,4,5,
      3,8,4,4,5
    ])
    m = data.reshape((3,5,5))
    return m


def data2():
    data = np.array([
      3,3,4,4,5,7,3,
      3,3,4,3,5,5,3,
      7,3,4,3,5,4,4,
      3,3,4,4,5,4,4,
      3,3,4,4,5,6,1])
    m = data.reshape((5,7))
    return m
    
    
def data3():
    data = np.array([
        2,2,1,1,1,1,1,
        1,1,1,1,7,1,2,
        1,1,0,0,7,7,2,
        2,2,2,2,2,3,3,
        2,2,3,np.nan,3,4,4,
        5,5,5,3,3,3,3,
        3,3,3,3,3,2,3
    ])
    m = data.reshape((7,7))
    return m


def data4():
    data = np.array([
        1,1,1,1,7,
        1,0,0,1,7,
        1,1,2,2,np.nan,
        2,2,3,np.nan,3,
        5,5,5,3,3,
    ])
    m = data.reshape((5,5))
    return m


def data5():
    np.random.seed(123)
    data = np.random.rand(64)
    m = data.reshape((8,8))
    return m


def data6():
    df = pd.DataFrame({'x':[1,1,1,2,2,3,3,3], 'y':[1,2,3,1,2,1,2,3], 'v':[11,12,13,14,15,16,17,18]})
    return df


def data7():
    data = (np.arange(16))**2/10 + 5
    return data


def data8():
    df = pd.DataFrame({'x':[1,1,1,1,2,2,2,3,3,3,3,4,4,4,4], 'y':[1,2,3,4,1,2,3,1,2,3,4,1,2,3,4], 'v':[11,15,18,23,9,8,6,4,7,6,2,8,9,12,14]})
    return df


def data9():
    df = pd.DataFrame({'x':[1,1,1,1,2,2,2,3,3,3,3,4,4,4,4],
                       'y':[1,2,3,4,1,2,3,1,2,3,4,1,2,3,4],
                       'v':[11.5,15.7,18.9,23.2,9.4,8.7,6.1,4.8,7.1,6.2,2.9,8.7,9.5,12.4,14.3]})
    return df


def data10():
    df = pd.DataFrame({'x':[1,1,1,2,2,2,3,3],
                       'y':[1,2,3,1,2,3,1,2],
                       'v':[3.5, 5.5, 7.0, 3.0, 4.0, 2.0, 6, 5]})
    return df


def data11():
    nx, ny, nz = 5, 4, 3
    l = [(x,y,z) for z in range(nz) for y in range(ny) for x in range(nx)]
    df = pd.DataFrame(l, columns=['x','y','z'])
    df['v'] = df.index * 0.27
    df2 = df.drop([18,20,59])
    return df2


def data12():
    a=data11()
    a=a[a.z==0]
    a=a.drop(['z'],axis=1)
    return a


def data13():
    df = pd.DataFrame({'x':[1,1,1,2,2,3,3,3], 'y':[1,2,3,1,2,1,2,3], 'v1':[3,4,3,3,5,4,3,5]})
    return df


def data14():
    data = np.array([
      1,1,1,
      1,2,2,
      1,1,1,

      3,2,1,
      1,1,1,
      2,2,1,
        
      1,1,1,
      1,3,1,
      1,1,1
    ])
    m = data.reshape((3,3,3))
    return m


def test_datax():
    assert data1().sum() == 320
    assert data2().sum() == 138
    assert data3().shape == (7,7)
    assert np.sum(np.isnan(data3())) == 1
    assert data5().sum() == 32.87392062325199
    assert data6().x.sum() == 16
    assert data7().sum() == 204
    assert data8().v.sum() == 152
    assert data9().sum().sum() == 233.4
    assert data10().sum().sum() == 66
    assert data11().sum().sum() == 705.71
    assert 110.43 < data12().sum().sum() < 110.45
    assert data13().sum().sum() == 61
    assert data14().sum() == 36

    
test_datax()

no_error()

No error.


In [4]:
################################################
#
# generate_ndarray
# takes x,y,v DataFrame to generate numpy 2D array
# takes x,y,z,v DataFrame to generate numpy 3D array
#
# Kenji Hirata, 1/1/2018
# 8/31/2018
#
################################################


def generate_2darray(df):
    minx = np.min(df.x)
    miny = np.min(df.y)
    maxx = np.max(df.x)
    maxy = np.max(df.y)
    m = np.empty((maxy - miny + 1, maxx - minx + 1))
    m.fill(np.nan)

    for i in range(len(df)):
        a = df.iloc[i]
        m[int(a.y)- miny, int(a.x)- minx] = a.v
    return m

def generate_3darray(df):
    minx = np.min(df.x)
    miny = np.min(df.y)
    minz = np.min(df.z)
    maxx = np.max(df.x)
    maxy = np.max(df.y)
    maxz = np.max(df.z)
    m = np.empty((maxz - minz + 1, maxy - miny + 1, maxx - minx + 1))
    m.fill(np.nan)

    for i in range(len(df)):
        a = df.iloc[i]
        m[int(a.z)- minz, int(a.y)- miny, int(a.x)- minx] = a.v
    return m

def generate_2darray_v1(df):
    report('Generating 2-dimensional array')
    minx = np.min(df.x)
    miny = np.min(df.y)
    maxx = np.max(df.x)
    maxy = np.max(df.y)
    m = np.empty((maxy - miny + 1, maxx - minx + 1))
    m.fill(np.nan)

    for i in range(len(df)):
        a = df.iloc[i]
        m[int(a.y)- miny, int(a.x)- minx] = a.v1
    return m

def generate_3darray_v1(df):
    report('Generating 3-dimensional array')
    minx = np.min(df.x)
    miny = np.min(df.y)
    minz = np.min(df.z)
    maxx = np.max(df.x)
    maxy = np.max(df.y)
    maxz = np.max(df.z)
    m = np.empty((maxz - minz + 1, maxy - miny + 1, maxx - minx + 1))
    m.fill(np.nan)

    for i in range(len(df)):
        a = df.iloc[i]
        m[int(a.z)- minz, int(a.y)- miny, int(a.x)- minx] = a.v1
    return m


def test_generate_2darray():
    a = generate_2darray(data6())
    assert a.shape == (3,3)
    assert np.isnan(a[2,1])
    a[np.isnan(a)] = 0
    assert np.sum(a) == 11+12+13+14+15+16+17+18

def test_generate_3darray():
    a = generate_3darray(data11())
    assert a.shape == (3,4,5)
    assert np.isnan(a[0,3,3])
    assert np.isnan(a[1,0,0])
    assert np.isnan(a[2,3,4])
    a[np.isnan(a)] = 0
    #print(np.sum(a))
    assert 451.71 < np.sum(a) < 451.72

def test_generate_2darray_v1():
    a0 = data6()
    a0['v1'] = a0.v % 3
    a = generate_2darray_v1(a0)
    assert a.shape == (3,3)
    assert np.isnan(a[2,1])
    a[np.isnan(a)] = 0
    assert np.sum(a) == 2+1+2+1+2

def test_generate_3darray_v1():
    a0 = data11()
    a0['v1'] = np.floor(a0.v / 3)
    a = generate_3darray_v1(a0)
    #print(a)
    assert a.shape == (3,4,5)
    assert np.isnan(a[0,3,3])
    assert np.isnan(a[1,0,0])
    assert np.isnan(a[2,3,4])
    a[np.isnan(a)] = 0
    #print(a.sum())
    assert np.sum(a) == 123

    

test_generate_2darray()
test_generate_3darray()
test_generate_2darray_v1()
test_generate_3darray_v1()

no_error()

No error.


In [5]:
################################################
#
# discritize
# takes ndarray of continuous value, returns same size of ndarray of nbin levels decrete value.
# The input ndarray may contain NaN.
# The result type is 'ndarray of float' rather than int because NaN does not exist in int type.
#
# Kenji Hirata, 11/21/2017
# modified on 12/04
#
################################################

def discritize(data, nbin, lo, hi, getCategories = False):
    '''If you want lo and hi to be min and max of the data, respectively,
    give np.nan for both lo and hi.
    '''
    if np.isnan(lo):
        lo = np.nanmin(data.ravel())
    if np.isnan(hi):
        hi = np.nanmax(data.ravel())
    
    if lo > hi:
        raise ValueError('lo must not be higher than hi.')
    
    R = np.floor((nbin * (data - lo) / (hi-lo)))
    R[R < 0] = 0
    R[R >= nbin] = nbin -1

    cats = np.array([(hi - lo) / nbin * i + lo for i in range(nbin+1)])
    report(('bins', nbin))
    report(('categories', cats))
    
    if getCategories:
        return R, cats
    else:
        return R 

    
def test_discritize():
    a = np.arange(1,2,0.1)
    #print(a)
    a1, c1 = discritize(a,5,1,2,True)
    #print(a1)
    #print(c1)
    #print(a1.sum())
    assert a1.sum() == 20
    assert c1.sum() == 9
    a2 = discritize(a,10,0,3)
    #print(a2)
    #print(a2.sum())
    assert a2.sum() == 45
    a3 = discritize(a,10,1.3,1.7)
    #print(a3)
    #print(a3.sum())
    assert a3.sum() == 41
    df = data11()
    #display(df)
    df['v1'] = discritize(df.v, 8, 0, 20)
    #display(df)
    assert df.v1.sum() == 153
    df['v1'] = discritize(df.v, 10, 5, 10)
    #display(df)
    assert df.v1.sum() == 276
    df['v1'] = discritize(df.v, 8, np.nan, np.nan)
    #print(df.v1.sum())
    #display(df)
    assert df.v1.sum() == 203

test_discritize()

no_error()

No error.


In [6]:
################################################
#
# complement_crosstab
#
# Kenji Hirata, 11/21/2017
#
################################################

def complement_crosstab2(cr, index_hi=np.nan, column_hi=np.nan):
    if np.isnan(index_hi):
        index_hi = int(np.max(cr.index.values)+1)
    if np.isnan(column_hi):
        column_hi = int(np.max(cr.columns.values)+1)
    d = pd.DataFrame(np.zeros((index_hi, column_hi)), index=np.arange(index_hi), columns=np.arange(column_hi))
    cr1 = cr.add(d, fill_value=0)
    return cr1

def test_complement_crosstab2():
    t = pd.crosstab(np.array([1,1,1,2,5]),np.array([1,2,2,2,4]))
    #display(t)
    t1 = complement_crosstab2(t)
    #display(t1)
    assert t1.shape == (6,5)
    assert np.allclose(t1.columns, pd.Index([0,1,2,3,4]))
    assert np.allclose(t1.index, pd.Index([0,1,2,3,4,5]))
    assert t1.iloc[1,2] == 2
    assert t1.iloc[5,4] == 1
    
test_complement_crosstab2()

no_error()

No error.


In [7]:
################################################
#
# Usual parameters in Python
# Kenji Hirata, 11/22/2017
# 9/1/2018
#
################################################

def usualParams(v):
    report('### Calculating usual parameters')
    
    SUVmax = max(v)
    SUVmean = np.mean(v)
    n = len(v) * 1*1*1
    s = np.sum(v)
    cols = ['SUVmax', 'SUVmean', 'NumOfVoxels', 'SUVsum']
    vals = [[SUVmax, SUVmean, n, s]]
    results = pd.DataFrame(data=vals, columns=cols)

    report(results)
    return results


def test_usualParams():
    a = usualParams(data11().v)
    #display(a)
    assert a.SUVmax[0] == 15.66
    assert 7.924736 < a.SUVmean[0] < 7.924738
    assert a.NumOfVoxels[0] == 57
    assert 451.70 < a.SUVsum[0] < 451.72
    
    
test_usualParams()

no_error()

No error.


In [8]:
################################################
#
# Histogram parameters in Python
# Kenji Hirata, 11/22/2017
#
################################################

def histParams(hist):
    report('### Calculating histogram parameters')
    
    SDhist = np.std(hist, ddof=1)
    
    #Skewness = (1/N)*np.sum((h-np.mean(h))**3) / ((1/N)*np.sum((h-np.mean(h))**2))**(3/2)
    Skewness = scipy.stats.skew(hist)
    
    #Kurtosis = (1/N)*np.sum((h-np.mean(h))**4) / ((1/N)*np.sum((h-np.mean(h))**2))**2 - 3
    Kurtosis = scipy.stats.kurtosis(hist)
    
    tb = pd.crosstab(hist,0)
    p = complement_crosstab2(tb).values.ravel()/len(hist)
    #print('Probability of gray-level i', p)
    EnergyHist = np.sum(p*p)
    
    p_nz = p[p != 0]
    EntropyHist = -np.sum(p_nz*np.log(p_nz))
    
    cols = ['SDhist', 'Skewness', 'Kurtosis', 'EnergyHist', 'EntropyHist']
    vals = [[SDhist, Skewness, Kurtosis, EnergyHist, EntropyHist]]
    results = pd.DataFrame(data=vals, columns=cols)

    report(results)
    return results


def test_histParams():
    v = data11().v
    v1 = discritize(v, 8,0,20)
    h = histParams(v1)
    #display(h)
    assert 1.872334 < h.SDhist[0] < 1.872336
    assert 0.02134 < h.Skewness[0] < 0.02136
    assert -1.175234 < h.Kurtosis[0] < -1.175232
    assert 0.153585 < h.EnergyHist[0] < 0.153587
    assert 1.899741 < h.EntropyHist[0] < 1.899743

test_histParams()

no_error()

No error.


In [9]:
################################################
#
# Define 4 directions for 2-d
#        13 directions for 3-d
# In preparation for calculating GLCM and GLRLM
#
# Kenji Hirata, 1/2/2018
# bug fixed 1/9/2018 (13 directions)
#
################################################

def fourDirections():
    a=[(i,j) for i in [-1,0,1] for j in [-1,0,1]]
    a.remove((0,0))

    for b in a:
        _b = (-b[0],-b[1])
        if _b in a:
            a.remove(_b)
    return a

    
def thirteenDirections():
    a=[(i,j,k) for i in [-1,0,1] for j in [-1,0,1] for k in [-1,0,1]]
    a.remove((0,0,0))

    for b in a:
        _b = (-b[0],-b[1],-b[2])
        if _b in a:
            a.remove(_b)
    return a


def test_fourDirections():
    a = fourDirections()
    #print(a)
    b = [(0,1), (1,0), (1,1), (1,-1)]
    for f in a:
        _f = (-f[0],-f[1])
        assert (f in b) or (_f in b)
    for f in b:
        _f = (-f[0],-f[1])
        assert (f in a) or (_f in a)
    
def test_thirteenDirections():
    a = thirteenDirections()
    #print(a)
    b = [(0,0,1), (0,1,1), (0,1,0), (0,1,-1),
         (1,0,0), (1,0,1), (1,1,1), (1,1,0),
         (1,1,-1), (1,0,-1),(1,-1,-1),(1,-1,0),(1,-1,1)]
    for f in a:
        _f = (-f[0],-f[1],-f[2])
        assert (f in b) or (_f in b)
    for f in b:
        _f = (-f[0],-f[1],-f[2])
        assert (f in a) or (_f in a)

    # The total of 13 directions mean as follows:
    #
    # (0,0,1)  3 o'clock
    # (0,1,1)　4:30 o'clock
    # (0,1,0)　6 o'clock
    # (0,1,-1) 7:30 o'clock
    #  9  o'clock and later are omitted because it already appreared.
    # 
    #  Downward direction (delta z=1)
    # (1,0,0) center of the clock
    # (1,0,1) 3 o'clock
    # (1,1,1) 4:30 o'clock
    # (1,1,0) 6 o'clock
    # (1,1,-1) 7.5 o'clock
    # (1,0,-1) 9 o'clock
    # (1,-1,-1) 10:30 o'clock
    # (1,-1,0) 12 o'clock
    # (1,-1,1) 1:30 o'clock


test_fourDirections()
test_thirteenDirections()

no_error()

No error.


In [10]:
################################################
#
# Generate gray-level co-occurrence matrix (GLCM) based texture parameters in Python
# Kenji Hirata, 1/2/2018
# bug fixed 9/2/2018 in comat2d()
#
################################################

def comat2d(d, offset, nbin):
    d1 = pd.DataFrame({'v2': d.v1, 'y': d.y - offset[0],  'x': d.x - offset[1]})
    d2 = pd.merge(d,d1)
    cr = pd.crosstab(d2.v1, d2.v2)
    cr1 = complement_crosstab2(cr, nbin, nbin)
    return cr1, d2

def comat3d(d, offset, nbin):
    d1 = pd.DataFrame({'v2': d.v1, 'z': d.z - offset[0], 'y': d.y - offset[1], 'x': d.x - offset[2]})
    # subtracting offset may look tricky; with this operation current(v1) and (current + offset)(v2) are paired.
    d2 = pd.merge(d,d1)
    cr = pd.crosstab(d2.v1, d2.v2)
    cr1 = complement_crosstab2(cr, nbin, nbin)
    return cr1, d2


def test_comat2d():
    df = data13()
    #display(df)
    sum = np.zeros((10,10))
    for dir in fourDirections():
        com, df2 = comat2d(df, dir , 10)
        #print(dir)
        #display(df2)
        #display(com)
        sum += com
    #print(sum)
    answer = [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0,],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0,],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0,],
              [0, 0, 0, 2, 2, 2, 0, 0, 0, 0,],
              [0, 0, 0, 3, 0, 0, 0, 0, 0, 0,],
              [0, 0, 0, 3, 2, 1, 0, 0, 0, 0,],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0,],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0,],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0,],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0,]]
    assert np.allclose(sum, answer)

    
def test_comat3d():
    df = data11()
    df['v1'] = discritize(df.v, 8, 0, 20)
    #display(df)
    sum = np.zeros((8,8))
    for dir in thirteenDirections():
        com, df2 = comat3d(df, dir , 8)
        #print(dir)
        #display(df2)
        #display(com)
        sum += com
    #print(sum)
    answer = [[21,  0,  0,  0,  0,  0,  0,  0,],
              [13, 14,  0,  0,  0,  0,  0,  0,],
              [38, 10, 11,  0,  0,  0,  0,  0,],
              [23, 41, 16, 20,  0,  0,  0,  0,],
              [ 0,  6, 29, 16, 11,  0,  0,  0,],
              [ 0,  0, 19, 43, 19, 16,  0,  0,],
              [ 0,  0,  0, 15,  3, 10,  2,  0,],
              [ 0,  0,  0,  0,  0,  0,  0,  0,]]
    assert np.allclose(sum, answer)
    

test_comat2d()
test_comat3d()

no_error()

No error.


In [11]:
################################################
#
# Calculate texture features from GLCM
# Kenji Hirata, 1/2/2018
#
################################################

def comatParams2d(d, nbin):
    offsets = fourDirections()
    return _comatParams(d,nbin,offsets)
    
def comatParams3d(d, nbin):
    offsets = thirteenDirections()
    return _comatParams(d,nbin,offsets)

def _comatParams(d, nbin, offsets):
    report('### Generating Co-occurence matrix')

    dim = len(offsets[0])
    seq = np.arange(nbin)
    ii = np.tile(seq, (nbin,1))
    jj = ii.T
    lst =[]

    for o in offsets:

        if dim==2:
            com, d2 = comat2d(d, o, nbin)
        elif dim==3:
            com, d2 = comat3d(d, o, nbin)
        
        report(('offset', o))
        report(com)
        cm = com.values
        
        cm = cm / np.sum(cm)
        cm_nz = cm[cm != 0]
        
        homogeneity = np.sum(cm / (1+np.abs(ii-jj)))
        energy = np.sum(cm**2)
        correlation = np.corrcoef(d2.v1, d2.v2)[0,1]
        
        contrast = np.sum((ii-jj)**2*cm)
        entropy = -np.sum(cm_nz * np.log(cm_nz))
        dissimilarity = np.sum(np.abs(ii-jj)*cm)

        ids = ['HomogeneityGLCM','EnergyGLCM','CorrelationGLCM','ContrastGLCM','EntropyGLCM','DissimilarityGLCM']
        se=pd.Series([homogeneity, energy, correlation, contrast, entropy, dissimilarity], ids)
        
        if dim==2:
            se.name = str(o[0])+'_'+str(o[1])
        elif dim==3:
            se.name = str(o[0])+'_'+str(o[1])+'_'+str(o[2])

        lst.append(se)
        
    df = pd.DataFrame(lst)
    ave = df.mean()
    ave.name = 'mean'
    df = df.append(ave)
    report(df)
    return df



def test_comatParams2d():
    df = data13()
    results = comatParams2d(df, 8)
    #display(results)
    assert np.allclose(results.loc['mean'],
                       [0.559722, 0.319167, -0.250945, 1.758333, 1.159610, 1.100000])
    
    
def test_comatParams3d():
    df = data11()
    df['v1'] = discritize(df.v, 8, 0, 20)
    results = comatParams3d(df, 8)
    #display(results)
    assert np.allclose(results.loc['mean'],
                       [0.474600, 0.148728, 0.948662, 3.592544, 2.019743, 1.615889])

    
test_comatParams2d()
test_comatParams3d()
   
no_error()

No error.


In [12]:
################################################
#
# Define xline, yline for 2d and xplain, yplain, zplain for 3d
# in preparation of gray-level run-length matrix
# Kenji Hirata, 11/23/2017
# bug fixed 1/9/2018
#
################################################

def xline(x, m):
    ny,nx = m.shape
    return [(y,x) for y in range(ny)]

def yline(y, m):
    ny,nx = m.shape
    return [(y,x) for x in range(nx)]

def xplain(x, m):
    nz,ny,nx = m.shape
    return [(z,y,x) for z in range(nz) for y in range(ny)]

def yplain(y, m):
    nz,ny,nx = m.shape
    return [(z,y,x) for z in range(nz) for x in range(nx)]

def zplain(z, m):
    nz,ny,nx = m.shape
    return [(z,y,x) for y in range(ny) for x in range(nx)]


def test_xline():
    d = np.zeros((3,4))
    #print(d)
    sum = 0
    for i in range(d.shape[1]):
        lines = xline(i, d)
        assert len(lines) == 3
        #print(i, lines)
        sum += np.sum(lines)
    assert sum == 30


def test_yline():
    d = np.zeros((3,4))
    #print(d)
    sum = 0
    for i in range(d.shape[1]):
        lines = yline(i, d)
        assert len(lines) == 4
        #print(i, lines)
        sum += np.sum(lines)
    assert sum == 48
    
    
def test_xplain():
    d = np.zeros((3,4,5))
    #print(d)
    sum = 0
    for i in range(d.shape[1]):
        lines = xplain(i, d)
        assert len(lines) == d.shape[0] * d.shape[1]
        #print(i, lines)
        sum += np.sum(lines)
    #print(sum)
    assert sum == 192

    
def test_yplain():
    d = np.zeros((3,4,5))
    #print(d)
    sum = 0
    for i in range(d.shape[1]):
        lines = yplain(i, d)
        assert len(lines) == d.shape[0] * d.shape[2]
        #print(i, lines)
        sum += np.sum(lines)
    #print(sum)
    assert sum == 270
    
    
def test_zplain():
    d = np.zeros((3,4,5))
    #print(d)
    sum = 0
    for i in range(d.shape[1]):
        lines = zplain(i, d)
        assert len(lines) == d.shape[1] * d.shape[2]
        #print(i, lines)
        sum += np.sum(lines)
    #print(sum)
    assert sum == 400
    

test_xline()
test_yline()
test_xplain()
test_yplain()
test_zplain()
    
no_error()

No error.


In [13]:
################################################
#
# Run length encoding (RLE) for gray-level run-length matrix
# Kenji Hirata, 11/23/2017
# bug fixed 1/9/2018
#
################################################

def rle(vec):
    if len(vec)==0:
        return [],[]
    if len(vec)==1:
        return vec,[1]
    
    prev = vec[0]
    count = 1
    vallst = []
    lenlst = []
    for a in vec[1:]:
        if (a == prev) or (np.isnan(a) and np.isnan(prev)):
            count += 1
        else:
            vallst.append(prev)
            lenlst.append(count)
            count = 1
            prev = a
    else:
        vallst.append(a)
        lenlst.append(count)
    return vallst,lenlst


def test_rle():
    v = [1,1,4,6,6,np.nan,2,2,2,2,3,1,3,np.nan,np.nan,1,1,np.nan]
    vals,lens = rle(v)
    #print(v)
    #print(vals)
    #print(lens)
    a = []
    for x in zip(vals, lens):
        a = a + [x[0]]*x[1]
    #print(a)
    assert np.allclose(v,a, equal_nan=True)
    
test_rle()

no_error()

No error.


In [14]:
################################################
#
# Generate gray-level run-length matrix (GLRLM)
# Kenji Hirata, 11/23/2017
#
################################################

def getGlrlm2d(discritizedArray, initialpoints, dy, dx):
    m = discritizedArray
    ny,nx = m.shape
    a1d =[]
    for current in initialpoints:
        cy,cx = current
        while((cy >= 0) and (cx >= 0) and (cy < ny) and (cx < nx)):
            a1d.append(m[cy,cx])
            cx += dx
            cy += dy
        a1d.append(np.nan)
    vals, lens = rle(a1d)
    
    flag = ~np.isnan(vals)
    vals1 = np.array(vals)[flag].astype(int)
    lens1 = np.array(lens)[flag]
    cr = pd.crosstab(vals1,lens1)
    cr1 = complement_crosstab2(cr)
    return cr1

def getGlrlm3d(discritizedArray, initialpoints, dz, dy, dx):
    m = discritizedArray
    nz,ny,nx = m.shape
    a1d =[]
    for current in initialpoints:
        cz,cy,cx = current
        while((cz >= 0) and (cy >= 0) and (cx >= 0) and (cz < nz) and (cy < ny) and (cx < nx)):
            a1d.append(m[cz,cy,cx])
            cx += dx
            cy += dy
            cz += dz
        a1d.append(np.nan)
    vals, lens = rle(a1d)
    #print(a1d)
    #print(vals)
    #print(lens)
    
    flag = ~np.isnan(vals)
    vals1 = np.array(vals)[flag].astype(int)
    lens1 = np.array(lens)[flag]
    cr = pd.crosstab(vals1,lens1)
    cr1 = complement_crosstab2(cr)
    return cr1


def test_getGlrlm2d():
    m = np.array(data2(), dtype = np.float)
    m[1,2:4]=np.nan
    m[2,3]=np.nan
    m[3,2]=3
    #print(m)
    pt = xline(0,m)
    g = getGlrlm2d(m,pt,0,1)
    #display(g)
    answer = [[0, 0, 0, 0],
              [0, 1, 0, 0],
              [0, 0, 0, 0],
              [0, 3, 3, 1],
              [0, 2, 4, 0],
              [0, 4, 1, 0],
              [0, 1, 0, 0],
              [0, 2, 0, 0]]
    assert np.allclose(g.values, answer)

    pt = set(xline(0, m)) | set(yline(m.shape[0]-1, m))
    g = getGlrlm2d(m,pt,-1,1)
    #display(g)
    answer = [[0, 0, 0],
              [0, 1, 0],
              [0, 0, 0],
              [0, 4, 4],
              [0, 6, 2],
              [0, 4, 1],
              [0, 1, 0],
              [0, 2, 0]]
    assert np.allclose(g.values, answer)

    
def test_getGlrlm3d():
    df = data11()
    df['v1'] = discritize(df.v, 8, 0, 20)
    m = generate_3darray_v1(df)
    #print(m)
    pt = xplain(0,m)
    g = getGlrlm3d(m, pt, 0,0,1 )
    #display(g)
    answer = [[0, 0, 0, 0, 0, 2,],
              [0, 0, 0, 1, 0, 1,],
              [0, 1, 0, 1, 1, 0,],
              [0, 0, 1, 1, 0, 1,],
              [0, 0, 2, 0, 0, 1,],
              [0, 1, 0, 1, 0, 1,],
              [0, 0, 0, 1, 0, 0,]]
    assert np.allclose(g.values, answer)

    m[0,1,1]=6
    m[1,2,2]=6
    #print(m)
    pt = set(xplain(0, m)) | set(yplain(0,m)) | set(zplain(0,m))
    g = getGlrlm3d(m, pt, 1,1,1 )
    #display(g)
    answer = [[0, 9, 0, 0],
              [0, 8, 0, 0],
              [0, 8, 0, 0],
              [0, 9, 0, 0],
              [0, 9, 0, 0],
              [0, 9, 0, 0],
              [0, 2, 0, 1]]
    assert np.allclose(g.values, answer)

    m = data14()
    #print(m)
    xp = xplain(0,m)
    for p in xp:
        m[p] = -1
    #print(m)
    
    m = data14()
    yp = yplain(2,m)
    for p in yp:
        m[p] = -1
    #print(m)

    m = data14()
    zp = zplain(2,m)
    for p in zp:
        m[p] = -1
    #print(m)

    m = data14()
    pt = set(xp) | set(yp) | set(zp)
    for p in pt:
        m[p] = -1
    
    answer = [[[-1,  1,  1],
               [-1,  2,  2],
               [-1, -1, -1]],
              
              [[-1,  2,  1],
               [-1,  1,  1],
               [-1, -1, -1]],
              
              [[-1, -1, -1],
               [-1, -1, -1],
               [-1, -1, -1]]]
    assert np.allclose(m, answer)
        
    #print(m)
    #print(pt)

    m = data14()
    g = getGlrlm3d(m, pt, -1,-1,1 )
    #display(g)
    answer = [[0, 0, 0, 0],
              [0,13, 2, 1],
              [0, 1, 2, 0],
              [0, 2, 0, 0]]
    assert np.allclose(g.values, answer)


test_getGlrlm2d()
test_getGlrlm3d()

no_error()    

No error.


In [15]:
################################################
#
# Calculate features from GLRLM or GLZSM
# Kenji Hirata, 11/23/2017
#
# Major Modification on 9/8/2018
# 'ZLNU' may not be an appropriate name; new name is 'ZSNU' stading for zone-size non-uniformity
#
################################################

def calcGlrlmParams(glrlm_or_glszm, mode):
    '''mode is either 'R' or 'Z'
    'R' indicates GLRLM, and 'Z' indicates GLZSM.
    The calculation formulas are completely same between 'R' and 'Z';
    the only difference is variable name (i.e., index of the returning pd.Series)
    '''
    
    # left-most column indicating number of run(size)=0 must be removed because it is unnecessary and even causes an adverse effect of divided-by-zero error.
    mat = glrlm_or_glszm.values[:,1:]

    sh = mat.shape
    jj = np.tile(np.arange(sh[1])+1, (sh[0],1))
    ii = np.tile(np.arange(sh[0])+1, (sh[1],1)).T
    
    SRE   = (1/np.sum(mat)) * np.sum(mat/jj**2)
    LRE   = (1/np.sum(mat)) * np.sum(mat*jj**2)
    LGRE  = (1/np.sum(mat)) * np.sum(mat/ii**2)
    HGRE  = (1/np.sum(mat)) * np.sum(mat*ii**2)
    SRLGE = (1/np.sum(mat)) * np.sum(mat/ii**2/jj**2)
    SRHGE = (1/np.sum(mat)) * np.sum(mat*ii**2/jj**2)
    LRLGE = (1/np.sum(mat)) * np.sum(mat/ii**2*jj**2)
    LRHGE = (1/np.sum(mat)) * np.sum(mat*ii**2*jj**2)
    GLNUr = (1/np.sum(mat)) * np.sum(np.sum(mat, axis=1)**2)
    RLNU  = (1/np.sum(mat)) * np.sum(np.sum(mat, axis=0)**2)
    RP    = np.sum(mat) / np.sum(jj * mat)

    if mode == 'R':
        ids = ['SRE', 'LRE', 'LGRE', 'HGRE', 'SRLGE', 'SRHGE', 'LRLGE', 'LRHGE', 'GLNUr', 'RLNU', 'RP']
    if mode == 'Z':
        ids = ['SZE', 'LZE', 'LGZE', 'HGZE', 'SZLGE', 'SZHGE', 'LZLGE', 'LZHGE', 'GLNUz', 'ZSNU', 'ZP']

    se = pd.Series([SRE, LRE, LGRE, HGRE, SRLGE, SRHGE, LRLGE, LRHGE, GLNUr, RLNU, RP], ids)
    return se



def test_calcGlrlmParams():
    df = data11()
    df['v1'] = discritize(df.v, 8, 0, 20)
    m = generate_3darray_v1(df)
    #print(m)
    pt = xplain(0,m)
    g = getGlrlm3d(m, pt, 0,0,1 )
    r = calcGlrlmParams(g, 'R')
    #print(r)
    assert 0.212238 < r['SRE'] < 0.212240
    assert 13.235293 < r['LRE'] < 13.235295
    assert 0.190856 < r['LGRE'] < 0.190858
    assert 18.647058 < r['HGRE'] < 18.647060
    assert 0.019357 < r['SRLGE'] < 0.019359
    assert 4.591126 < r['SRHGE'] < 4.591128
    assert 3.896457 < r['LRLGE'] < 3.896459
    assert 209.058823 < r['LRHGE'] < 209.058825
    assert 2.647058 < r['GLNUr'] < 2.647060
    assert 4.411764 < r['RLNU'] < 4.411766
    assert 0.298245 < r['RP'] < 0.298246
    
test_calcGlrlmParams()

print('No error.')


No error.


In [16]:
################################################
#
# Retired functions
# Combine features from GLRLM or GLZSM
# Kenji Hirata, 11/23/2017
#
################################################
def combineGlrlm2d_retired(discritizedArray, test_mode = False):
    
    m = discritizedArray
    ny,nx = m.shape
    lst = []
    test_points = []

    def sub(m, points, dy, dx):
        g = getGlrlm2d(m, points, dy, dx)
        report(g)
        se = calcGlrlmParams(g, 'R')
        se.name = str(dy)+'_'+str(dx)
        lst.append(se)
        ## test purpose
        nonlocal test_points
        if test_mode:
            test_points += [(points,(dy,dx),g)]
    
    points = xline(0, m)
    sub(m, points, 0, 1)

    points = yline(0, m)
    sub(m, points, 1, 0)

    points = set(xline(0, m)) | set(yline(0, m))
    sub(m, points, 1, 1)

    points = set(xline(0, m)) | set(yline(nx-1, m))
    sub(m, points, -1, 1)
    
    df = pd.DataFrame(lst)
    ave = df.mean()
    ave.name = 'mean'
    df = df.append(ave)
    report(df)
    
    if test_mode:
        return df, test_points
    else:
        return df

    
def combineGlrlm3d_retired(discritizedArray, test_mode = False):
    
    m = discritizedArray
    nz,ny,nx = m.shape
    lst =[]
    test_points = []
    
    def sub(m, points, dz, dy, dx):
        g = getGlrlm3d(m, points, dz, dy, dx)
        report(g)
        se = calcGlrlmParams(g, 'R')
        se.name = str(dz)+'_'+str(dy)+'_'+str(dx)
        lst.append(se)
        ## test purpose
        nonlocal test_points
        if test_mode:
            test_points += [(points,(dz,dy,dx),g)]

        
    ###
    points = xplain(0, m)
    sub(m, points, 0, 0, 1)
    
    points = yplain(0, m)
    sub(m, points, 0, 1, 0)
    
    points = zplain(0, m)
    sub(m, points, 1, 0, 0)
    ###
    
    ###
    points = set(xplain(0, m)) | set(yplain(0,m))
    sub(m, points, 0, 1, 1)

    points = set(yplain(0, m)) | set(zplain(0,m))
    sub(m, points, 1, 1, 0)

    points = set(zplain(0, m)) | set(xplain(0,m))
    sub(m, points, 1, 0, 1)

    points = set(xplain(0, m)) | set(yplain(ny-1,m))
    sub(m, points, 0, -1, 1)

    points = set(yplain(0, m)) | set(zplain(nz-1,m))
    sub(m, points, -1, 1, 0)

    points = set(zplain(0, m)) | set(xplain(nx-1,m))
    sub(m, points, 1, 0, -1)
    ###

    ###
    points = set(xplain(0, m)) | set(yplain(0,m)) | set(zplain(0,m))
    sub(m, points, 1, 1, 1)

    points = set(xplain(0, m)) | set(yplain(0,m)) | set(zplain(nz-1,m))
    sub(m, points, -1, 1, 1)

    points = set(xplain(0, m)) | set(yplain(ny-1,m)) | set(zplain(0,m))
    sub(m, points, 1, -1, 1)

    points = set(xplain(0, m)) | set(yplain(ny-1,m)) | set(zplain(nz-1,m))
    sub(m, points, -1, -1, 1)
    ###

    df = pd.DataFrame(lst)
    ave = df.mean()
    ave.name = 'mean'
    df = df.append(ave)

    if test_mode:
        return df, test_points
    else:
        return df


    
def test_combineGlrlm2d_retired():
    m0 = np.array([[1,2,3],[1,1,2],[1,1,1]]).reshape(3,3)
    #print(m0)
    m = np.copy(m0)
    df, pt = combineGlrlm2d_retired(m, test_mode = True)
    #print(pt)
    assert len(pt) == 4
    for pt1,dydx,g in pt:
        m = np.copy(m0)
        for pt2 in pt1:
            m[pt2]=-1
        #print(dydx)
        #print(m)
        #print(g)
        g1 = np.sum(g, axis=0)
        #print(np.inner(g1.index ,g1))
        assert np.inner(g1.index ,g1) == m.size
    
    
def test_combineGlrlm3d_retired():
    m0 = np.array([[[1,1,2],[1,1,1]],[[1,2,2],[1,1,2]]]).reshape(2,2,3)
    #print(m0)
    m = np.copy(m0)
    df, pt = combineGlrlm3d_retired(m, test_mode = True)
    #print(pt)
    assert len(pt) == 13
    for pt1,dzdydx,g in pt:
        m = np.copy(m0)
        for pt2 in pt1:
            m[pt2]=-1
        #print()
        #print(dzdydx)
        #print(m)
        #print(g)
        g1 = np.sum(g, axis=0)
        #print(np.inner(g1.index ,g1))
        assert np.inner(g1.index ,g1) == m.size

test_combineGlrlm2d_retired()
test_combineGlrlm3d_retired()

no_error()

No error.


In [17]:
################################################
#
# Currently working
# Combine features from GLRLM or GLZSM
# Kenji Hirata, 11/23/2017
#
################################################
        
def combineGlrlm2d(discritizedArray, test_mode = False):
    
    m = discritizedArray
    ny,nx = m.shape
    lst = []
    test_points = []

    def sub(m, dy, dx):
        points = []
        if (dx == 1):
            points += xline(0, m)
        if (dx == -1):
            points += xline(nx-1, m)
        if (dy == 1):
            points += yline(0, m)
        if (dy == -1):
            points += yline(ny-1, m)
        points = set(points)
        
        g = getGlrlm2d(m, points, dy, dx)
        report(g)
        se = calcGlrlmParams(g, 'R')
        se.name = str(dy)+'_'+str(dx)
        lst.append(se)
        ## test purpose
        if test_mode:
            test_points.append((points, (dy,dx), g))
    
    
    for dy,dx in fourDirections():
        sub(m, dy, dx)
    
    df = pd.DataFrame(lst)
    ave = df.mean()
    ave.name = 'mean'
    df = df.append(ave)
    report(df)
    
    if test_mode:
        return df, test_points
    else:
        return df

    
    
def combineGlrlm3d(discritizedArray, test_mode = False):
    
    m = discritizedArray
    nz,ny,nx = m.shape
    lst =[]
    test_points = []
    
    def sub(m, dz, dy, dx):
        points = []
        if (dx == 1):
            points += xplain(0, m)
        if (dx == -1):
            points += xplain(nx-1, m)
        if (dy == 1):
            points += yplain(0, m)
        if (dy == -1):
            points += yplain(ny-1, m)
        if (dz == 1):
            points += zplain(0, m)
        if (dz == -1):
            points += zplain(nz-1, m)
        points = set(points)
        
        g = getGlrlm3d(m, points, dz, dy, dx)
        report(g)
        se = calcGlrlmParams(g, 'R')
        se.name = str(dz)+'_'+str(dy)+'_'+str(dx)
        lst.append(se)
        ## test purpose
        if test_mode:
            test_points.append((points,(dz,dy,dx),g))
    
    for dz,dy,dx in thirteenDirections():
        sub(m, dz, dy, dx)

    df = pd.DataFrame(lst)
    ave = df.mean()
    ave.name = 'mean'
    df = df.append(ave)

    if test_mode:
        return df, test_points
    else:
        return df


def combineGlrlm(discritizedArray):
    
    report('### Generating GLRLM')
    
    if len(discritizedArray.shape) == 2:
        df = combineGlrlm2d(discritizedArray)
    elif len(discritizedArray.shape) == 3:
        df = combineGlrlm3d(discritizedArray)
    else:
        raise ValueError('discritizedArray must be 2- or 3-dimensional.')

    report(df)
    return df

    
def test_combineGlrlm2d():
    m0 = np.array([[1,2,3],[1,1,2],[1,1,1]]).reshape(3,3)
    #print(m0)
    m = np.copy(m0)
    df, pt = combineGlrlm2d(m, test_mode = True)
    assert len(pt) == 4
    for pt1,dydx,g in pt:
        m = np.copy(m0)
        for pt2 in pt1:
            m[pt2]=-1
        #print(dydx)
        #print(m)
        #print(g)
        
        if dydx==(-1,-1):
            assert np.allclose(g, np.array([[0,0,0,0],[0,1,1,1],[0,0,1,0],[0,1,0,0]]))
        if dydx==(-1,0):
            assert np.allclose(g, np.array([[0,0,0,0],[0,1,1,1],[0,2,0,0],[0,1,0,0]]))
        if dydx==(-1,1):
            assert np.allclose(g, np.array([[0,0,0],[0,4,1],[0,2,0],[0,1,0]]))
        if dydx==(0,-1):
            assert np.allclose(g, np.array([[0,0,0,0],[0,1,1,1],[0,2,0,0],[0,1,0,0]]))

    
def test_combineGlrlm3d_1():
    m0 = np.array([[[1,1,2],[1,1,1]],[[1,2,2],[1,1,2]]]).reshape(2,2,3)
    #print(m0)
    m = np.copy(m0)
    df, pt = combineGlrlm3d(m, test_mode = True)
    assert len(pt) == 13
    for pt1,dzdydx,g in pt:
        m = np.copy(m0)
        for pt2 in pt1:
            m[pt2]=-1
        #print()
        #print(dzdydx)
        #print(m)
        #print(g)
        
        if dzdydx==(-1,-1,-1):
            assert np.allclose(g, np.array([[0,0,0],[0,6,1],[0,4,0]]))
        if dzdydx==(-1,-1,0):
            assert np.allclose(g, np.array([[0,0,0],[0,4,2],[0,2,1]]))
        if dzdydx==(-1,-1,1):
            assert np.allclose(g, np.array([[0,0,0],[0,6,1],[0,4,0]]))
        if dzdydx==(-1,0,-1):
            assert np.allclose(g, np.array([[0,0,0],[0,6,1],[0,4,0]]))
        if dzdydx==(-1,0,0):
            assert np.allclose(g, np.array([[0,0,0],[0,2,3],[0,2,1]]))
        if dzdydx==(-1,0,1):
            assert np.allclose(g, np.array([[0,0,0],[0,2,3],[0,2,1]]))
        if dzdydx==(-1,1,-1):
            assert np.allclose(g, np.array([[0,0],[0,8],[0,4]]))
        if dzdydx==(-1,1,0):
            assert np.allclose(g, np.array([[0,0,0],[0,6,1],[0,4,0]]))
        if dzdydx==(-1,1,1):
            assert np.allclose(g, np.array([[0,0,0],[0,6,1],[0,4,0]]))
        if dzdydx==(0,-1,-1):
            assert np.allclose(g, np.array([[0,0,0],[0,2,3],[0,2,1]]))
        if dzdydx==(0,-1,0):
            assert np.allclose(g, np.array([[0,0,0],[0,2,3],[0,2,1]]))
        if dzdydx==(0,-1,1):
            assert np.allclose(g, np.array([[0,0,0],[0,6,1],[0,4,0]]))
        if dzdydx==(0,0,1):
            assert np.allclose(g, np.array([[0,0,0,0],[0,1,2,1],[0,2,1,0]]))


def test_combineGlrlm3d_2():
    m0 = np.array([[[1,1,2,2],[1,1,1,1]],[[1,2,2,3],[1,1,2,3]],[[1,2,2,3],[1,1,2,3]]]).reshape(4,2,3)
    #print(m0)
    m = np.copy(m0)
    df, pt = combineGlrlm3d(m, test_mode = True)
    assert len(pt) == 13
    for pt1,dzdydx,g in pt:
        m = np.copy(m0)
        for pt2 in pt1:
            m[pt2]=-1
        #print()
        #print(dzdydx)
        #print(m)
        #print(g)
        g1 = np.sum(g, axis=0)
        #print(np.inner(g1.index ,g1))
        assert np.inner(g1.index ,g1) == m.size
        
        if dzdydx==(-1,-1,-1):
            assert np.allclose(g, np.array([[0,0,0],[0,10,1],[0,8,0],[0,4,0]]))
        if dzdydx==(-1,-1,0):
            assert np.allclose(g, np.array([[0,0,0],[0,8,2],[0,8,0],[0,4,0]]))
        if dzdydx==(-1,-1,1):
            assert np.allclose(g, np.array([[0,0,0],[0,8,2],[0,4,2],[0,4,0]]))
        if dzdydx==(-1,0,-1):
            assert np.allclose(g, np.array([[0,0,0,0],[0,5,2,1],[0,5,0,1],[0,4,0,0]]))
        if dzdydx==(-1,0,0):
            assert np.allclose(g, np.array([[0,0,0,0],[0,6,0,2],[0,6,1,0],[0,4,0,0]]))
        if dzdydx==(-1,0,1):
            assert np.allclose(g, np.array([[0,0,0],[0,4,4],[0,6,1],[0,4,0]]))
        if dzdydx==(-1,1,-1):
            assert np.allclose(g, np.array([[0,0,0],[0,8,2],[0,6,1],[0,2,1]]))
        if dzdydx==(-1,1,0):
            assert np.allclose(g, np.array([[0,0,0],[0,8,2],[0,8,0],[0,4,0]]))
        if dzdydx==(-1,1,1):
            assert np.allclose(g, np.array([[0,0,0],[0,8,2],[0,8,0],[0,4,0]]))
        if dzdydx==(0,-1,-1):
            assert np.allclose(g, np.array([[0,0,0],[0,6,3],[0,6,1],[0,2,1]]))
        if dzdydx==(0,-1,0):
            assert np.allclose(g, np.array([[0,0,0],[0,8,2],[0,6,1],[0,4,0]]))
        if dzdydx==(0,-1,1):
            assert np.allclose(g, np.array([[0,0],[0,12],[0,8],[0,4]]))
        if dzdydx==(0,0,1):
            assert np.allclose(g, np.array([[0,0,0,0],[0,3,3,1],[0,6,1,0],[0,4,0,0]]))
        
def test_combineGlrlm3d_3():
    m0 = np.array([[[1,1,2,2],[1,1,1,1]],[[1,2,2,3],[1,1,2,3]],[[1,2,2,3],[1,1,2,3]]]).reshape(4,2,3)
    m0[1,1,0]=4
    #print(m0)
    m = np.copy(m0)
    df, pt = combineGlrlm3d(m, test_mode = True)
    assert len(pt) == 13
    for pt1,dzdydx,g in pt:
        m = np.copy(m0)
        for pt2 in pt1:
            m[pt2]=-1
        #print()
        #print(dzdydx)
        #print(m)
        #print(g)
        g1 = np.sum(g, axis=0)
        #print(np.inner(g1.index ,g1))
        assert np.inner(g1.index ,g1) == m.size

        if dzdydx==(-1,0,0):
            assert np.allclose(g, np.array([[0,0,0,0],[0,6,0,2],[0,7,0,0],[0,4,0,0],[0,1,0,0]]))
        # In this test_3, only m0[1,1,0] value was changed in comparison with test_2.
        # Column of length 2 is all zero.
        # Other directions are not needed to be tested. 

        
def all_rows_are_distinct(m):
    for i in range(0, m.shape[0]-1):
        for j in range(i+1, m.shape[0]):
            #print(m[i],m[j])
            if np.allclose(m[i], m[j]):
                return False
    return True

def test_all_rows_are_distinct():
    m = np.array([[1,2,3],[2,3,4],[3,4,5]])
    assert all_rows_are_distinct(m)
    m = np.array([[1,2,3],[2,3,4],[3,4,5],[3,4,5]])
    assert not all_rows_are_distinct(m)
    m = np.array([[1,2,3],[2,3,4],[3,4,5],[3,4,6]])
    assert all_rows_are_distinct(m)

    
def test_combineGlrlm():
    n=12
    for i in range(5):
        np.random.seed(seed=i)
        m = np.floor(np.random.uniform(10,20,n**2).reshape(n,n))
        m[m==15]=np.nan
        #print(m)
        a = combineGlrlm(m)
        b = combineGlrlm2d(m)
        assert np.allclose(a,b)
        #display(a)
        
        assert all_rows_are_distinct(a.values)
        
        m = np.floor(np.random.uniform(10,20,n**3).reshape(n,n,n))
        m[m==15]=np.nan
        #print(m)
        a = combineGlrlm(m)
        b = combineGlrlm3d(m)
        assert np.allclose(a,b)
        #display(a)
        
        assert all_rows_are_distinct(a.values)
    
    
test_combineGlrlm2d()
test_combineGlrlm3d_1()
test_combineGlrlm3d_2()
test_combineGlrlm3d_3()
test_all_rows_are_distinct()
test_combineGlrlm()

no_error()

No error.


In [18]:
################################################
#
# gray-level zone-size matrix in Python
# Kenji Hirata, 11/24/2017
#
# Major Modification on 9/8/2018
# 'ZLNU' may not be an appropriate name; new name is 'ZSNU' stading for zone-size non-uniformity
#
# Major Modification on 9/8/2018
# The old version of getGlzsm() retired because it did not match new versions of python libraries.
#
################################################
    
def getGlzsm_retired(discritizedArray, connection):
    m = discritizedArray
    if len(m.shape) == 2:
        if connection == 4:
            s = [[0,1,0],[1,1,1],[0,1,0]]
        elif connection == 8:
            s = [[1]*3]*3
        else:
            raise ValueError('connection must be 4 or 8 in case of 2-dimensional.')
    elif len(m.shape) == 3:
        if connection == 6:
            s = [[[0,0,0], [0,1,0], [0,0,0]],
                 [[0,1,0], [1,1,1], [0,1,0]],
                 [[0,0,0], [0,1,0], [0,0,0]]]
        elif connection == 18:
            s = [[[0,1,0], [1,1,1], [0,1,0]],
                 [[1,1,1], [1,1,1], [1,1,1]],
                 [[0,1,0], [1,1,1], [0,1,0]]]
        elif connection == 26:
            s = [[[1,1,1], [1,1,1], [1,1,1]],
                 [[1,1,1], [1,1,1], [1,1,1]],
                 [[1,1,1], [1,1,1], [1,1,1]]]
        else:
            raise ValueError('connection must be 6 or 18 or 26 in case of 3-dimensional.')
    else:
        raise ValueError('discritizedArray must be 2- or 3-dimensional.')

    lst = []
    max_graylevel = np.nanmax(m).astype(int)
    max_sizezone = 0
    
    #print(max_graylevel)
    
    for i in range(0, max_graylevel+1):  #[2,3]:

        #print(i)
        m1 = m==i
        #print(m1)
        a,_ = scipy.ndimage.label(m1, s)
        #print(a)
        if np.sum(a)==0:
            continue
        b=pd.crosstab(a.flatten(), 0)
        #display(b)
        c = pd.crosstab(b[1:].values.ravel(), 0)
        c.columns = [[i]]
        c = c.T

        max_sizezone1 = np.max(c.columns.values)
        if max_sizezone < max_sizezone1:
            max_sizezone = max_sizezone1
            
        print(c)

        lst.append(c)

    df = pd.DataFrame(np.zeros((max_graylevel+1, max_sizezone+1)),
                      index = np.arange(max_graylevel+1),
                      columns=np.arange(max_sizezone+1))
    display(df)
    for l in lst:
        display(l)
        df = df.add(l, fill_value = 0)
    display(df)
    df = df.astype(int)
    return df


def getGlzsm(discritizedArray, connection):
    m = discritizedArray
    if len(m.shape) == 2:
        if connection == 4:
            s = [[0,1,0],[1,1,1],[0,1,0]]
        elif connection == 8:
            s = [[1,1,1],[1,1,1],[1,1,1]]
        else:
            raise ValueError('connection must be 4 or 8 in case of 2-dimensional.')
    elif len(m.shape) == 3:
        if connection == 6:
            s = [[[0,0,0], [0,1,0], [0,0,0]],
                 [[0,1,0], [1,1,1], [0,1,0]],
                 [[0,0,0], [0,1,0], [0,0,0]]]
        elif connection == 18:
            s = [[[0,1,0], [1,1,1], [0,1,0]],
                 [[1,1,1], [1,1,1], [1,1,1]],
                 [[0,1,0], [1,1,1], [0,1,0]]]
        elif connection == 26:
            s = [[[1,1,1], [1,1,1], [1,1,1]],
                 [[1,1,1], [1,1,1], [1,1,1]],
                 [[1,1,1], [1,1,1], [1,1,1]]]
        else:
            raise ValueError('connection must be 6 or 18 or 26 in case of 3-dimensional.')
    else:
        raise ValueError('discritizedArray must be 2- or 3-dimensional.')

    max_graylevel = np.nanmax(m).astype(int)
    max_zonesize = 0
    
    lst = []
    max_zonesize = 0
    for i in range(0, max_graylevel+1):
        l,_ = scipy.ndimage.label((m==i)*1, s)
        _,c = np.unique(l, return_counts=True)
        c = c[1:]
        d1,d2 = np.unique(c, return_counts=True)

        for j,k in zip(d1,d2):
            lst.append((i,j,k))
            if max_zonesize < j:
                max_zonesize = j
                
    zsm = np.zeros((max_graylevel+1, max_zonesize+1))

    for l in lst:
        zsm[l[0],l[1]]=l[2]

    df = pd.DataFrame(zsm,
                      index = np.arange(max_graylevel+1),
                      columns=np.arange(max_zonesize+1))
    
    return df



def combineGlzsm(discritizedArray, connection):
    report('### Generating GLZSM')
    m = discritizedArray
    df = getGlzsm(m, connection)
    report(df)
    ps = calcGlrlmParams(df, 'Z')
    res = pd.DataFrame([ps])
    report(res)
    return res



def test_getGlzsm():
    # 2d, 4-connection
    m=data2()*1.0
    #print(m)
    g=getGlzsm(m, 4)
    #print(g.astype(int))
    ans = np.array([[0,0,0,0,0,0,0,0,0,0],
                    [0,1,0,0,0,0,0,0,0,0],
                    [0,0,0,0,0,0,0,0,0,0],
                    [0,0,2,0,0,0,0,0,0,1],
                    [0,0,0,0,1,0,0,0,1,0],
                    [0,0,0,0,0,0,1,0,0,0],
                    [0,1,0,0,0,0,0,0,0,0],
                    [0,2,0,0,0,0,0,0,0,0]])
    assert np.allclose(g, ans)
    
    
    # 2d, 8-connection
    m=data2()*1.0
    m[2,5:7]=0
    m[3,2]=np.nan
    #print(m)
    g=getGlzsm(m, 8)
    #print(g.astype(int))
    ans = np.array([[0,0,1,0,0,0,0,0,0,0],
                    [0,1,0,0,0,0,0,0,0,0],
                    [0,0,0,0,0,0,0,0,0,0],
                    [0,0,2,0,0,0,0,0,0,1],
                    [0,0,1,0,0,0,0,1,0,0],
                    [0,0,0,0,0,0,1,0,0,0],
                    [0,1,0,0,0,0,0,0,0,0],
                    [0,2,0,0,0,0,0,0,0,0]])
    assert np.allclose(g, ans)

    
    #3d, 6-connection
    m = data14()
    m[1,1,0]=0
    m[1,1,2]=0
    #print(m)
    g=getGlzsm(m,6)
    #display(g.astype(int))
    ans = np.array([[0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
                    [0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],
                    [0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
                    [0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]])
    assert np.allclose(g, ans)


    #3d, 18-connection
    m = data14()
    m[1,1,0]=0
    m[1,1,2]=0
    m[0,1,1]=0
    #print(m)
    g=getGlzsm(m,18)
    #display(g.astype(int))
    ans = np.array([[0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
                    [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],
                    [0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
                    [0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]])
    assert np.allclose(g, ans)

    
    #3d, 26-connection
    m = data14()
    m[1,1,0]=0
    m[1,1,2]=0
    m[0,1,1]=0
    #print(m)
    g=getGlzsm(m,26)
    #display(g.astype(int))
    ans = np.array([[0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
                    [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],
                    [0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
                    [0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]])
    assert np.allclose(g, ans)

    
    #3d, 6,18,26-connection
    m = np.array([[[1,2],[0,1]],[[0,0],[2,0]]])
    g=getGlzsm(m,6)
    ans = np.array([[0,1,0,1],
                    [0,2,0,0],
                    [0,2,0,0]])
    assert np.allclose(g, ans)
    g=getGlzsm(m,18)
    ans = np.array([[0,0,0,0,1],
                    [0,0,1,0,0],
                    [0,2,0,0,0]])
    assert np.allclose(g, ans)
    g=getGlzsm(m,26)
    ans = np.array([[0,0,0,0,1],
                    [0,0,1,0,0],
                    [0,0,1,0,0]])
    assert np.allclose(g, ans)

    
    #3d, 26-connection again
    df = data11()
    df['v1'] = discritize(df.v, 8, 0, 20)
    m = generate_3darray_v1(df)
    g=getGlzsm(m,26)
    #display(g.astype(int))
    ans = np.array([[0,0,0,0,0,0,0,0,0,0,1],
                    [0,0,0,0,0,0,0,0,1,0,0],
                    [0,1,0,0,0,0,0,1,0,0,0],
                    [0,0,0,0,0,0,0,0,0,0,1],
                    [0,0,1,0,0,0,0,1,0,0,0],
                    [0,0,0,0,0,0,0,0,0,1,0],
                    [0,0,0,1,0,0,0,0,0,0,0]])
    assert np.allclose(g, ans)


    
def test_combineGlzsm():

    df = data11()
    df['v1'] = discritize(df.v, 8, 0, 20)
    m = generate_3darray_v1(df)
    r=combineGlzsm(m,26)
    #display(r)
    assert 0.1610 < r['SZE'][0] < 0.1612
    assert 50.777777 < r['LZE'][0] < 50.777779
    assert 0.184767 < r['LGZE'][0] < 0.184769
    assert 19.333332 < r['HGZE'][0] < 19.333334
    assert 0.015703 < r['SZLGE'][0] < 0.015705
    assert 2.451695 < r['SZHGE'][0] < 2.451697
    assert 14.706580 < r['LZLGE'][0] < 14.706582
    assert 787.555555 < r['LZHGE'][0] < 787.555557
    assert 1.444443 < r['GLNUz'][0] < 1.444445
    assert 1.444443 < r['ZSNU'][0] < 1.444445
    assert 0.157894 < r['ZP'][0] < 0.157896
        
    
test_getGlzsm()
test_combineGlzsm()

no_error()

No error.


In [19]:
################################################
#
# neighborhood gray-level different matrix in Python
# Kenji Hirata, 11/27/2017
#
################################################

def getNgldm(discritizedArray, mode):
    m = discritizedArray
    if len(m.shape) == 2:
        return getNgldm2d(discritizedArray, mode)
    elif len(m.shape) == 3:
        return getNgldm3d(discritizedArray, mode)
    else:
        raise ValueError('discritizedArray must be 2- or 3-dimensional.')

def getNgldm2d(discritizedArray, mode):
    m = discritizedArray
    ny,nx = m.shape
    
    #fill periphery with numpy.nan
    nx+=2
    ny+=2
    m1 = np.empty((ny,nx))
    m1.fill(np.nan)
    m1[1:-1,1:-1] = m
    m = m1

    se = pd.Series()
    for i in range(int(np.nanmax(m)+1)):
        #print('i =',i)
        avelist = []
        for y in range(1,ny-1):
            for x in range(1,nx-1):
                if np.isnan(m[y,x]) == False:
                    if m[y,x] == i:
                        neighbors = [m[y-1,x-1], m[y-1,x  ], m[y-1,x+1],
                                     m[y  ,x-1],             m[y  ,x+1],
                                     m[y+1,x-1], m[y+1,x  ], m[y+1,x+1]]
                        if mode == 1:
                            ave = np.nanmean(neighbors)
                        elif mode == 2:
                            ave = np.mean(neighbors)
                        #print(x,y,'ave =',ave)
                        avelist.append(np.abs(i - ave))
        sum = np.nansum(avelist) # not mean but sum
        se1 = pd.Series({i:sum})
        se = se.append(se1)
    return se

def getNgldm3d(discritizedArray, mode):
    m = discritizedArray
    nz,ny,nx = m.shape
    
    #fill periphery with numpy.nan
    nz+=2
    ny+=2
    nx+=2    
    m1 = np.empty((nz,ny,nx))
    m1.fill(np.nan)
    m1[1:-1,1:-1,1:-1] = m
    m = m1

    se = pd.Series()
    for i in range(int(np.nanmax(m)+1)):
        avelist = []
        for z in range(1,nz-1):
            for y in range(1,ny-1):
                for x in range(1,nx-1):
                    if np.isnan(m[z,y,x]) == False:
                        if m[z,y,x] == i:
                            neighbors = [m[z-1,y-1,x-1], m[z-1,y-1,x  ], m[z-1,y-1,x+1],
                                         m[z-1,y  ,x-1], m[z-1,y  ,x  ], m[z-1,y  ,x+1],
                                         m[z-1,y+1,x-1], m[z-1,y+1,x  ], m[z-1,y+1,x+1],
                                         m[z  ,y-1,x-1], m[z  ,y-1,x  ], m[z  ,y-1,x+1],
                                         m[z  ,y  ,x-1],                 m[z  ,y  ,x+1],
                                         m[z  ,y+1,x-1], m[z  ,y+1,x  ], m[z  ,y+1,x+1],
                                         m[z+1,y-1,x-1], m[z+1,y-1,x  ], m[z+1,y-1,x+1],
                                         m[z+1,y  ,x-1], m[z+1,y  ,x  ], m[z+1,y  ,x+1],
                                         m[z+1,y+1,x-1], m[z+1,y+1,x  ], m[z+1,y+1,x+1]]
                            if mode == 1:
                                ave = np.nanmean(neighbors)
                            elif mode == 2:
                                ave = np.mean(neighbors)
                            avelist.append(np.abs(i - ave))
        sum = np.nansum(avelist) # not mean but sum
        se1 = pd.Series({i:sum})
        se = se.append(se1)
    return se


def calcNgldmParams(discritizedArray, ngldm, nbin):
    
    p1 = pd.crosstab(discritizedArray.ravel(),0)
    p = complement_crosstab2(p1, nbin).values.ravel()
    N = np.sum(p)
    p = p/N
    
    s = complement_crosstab2(pd.DataFrame(ngldm), nbin).values.ravel()

    jj = np.tile(np.arange(nbin),(nbin,1))
    ii = jj.T

    report('NGLDM')
    report(s)
    report('probability')
    report(p)

    Coarseness = 1 / (1e-20 + np.sum(p*s))

    Contrast = np.sum(np.outer(p,p) * (ii-jj)**2) * np.sum(s) / (N*nbin*(nbin-1))
    
    #busyness
    total = 0
    for i in range(len(p)):
        for j in range(len(p)):
            if not(p[i]==0) and not(p[j]==0):
                total += np.abs(i * p[i] - j * p[j])
    Busyness = np.sum(p*s) / total

    se = pd.Series([Coarseness,Contrast,Busyness], ['CoarsenessNGLDM','ContrastNGLDM','BusynessNGLDM'])
    return pd.DataFrame([se])

def combineNgldm(mat, nbin, mode):
    report('### Generating NGLDM')
    ngldm = getNgldm(mat, mode)
    res = calcNgldmParams(mat, ngldm, nbin)
    report(res)
    return res



def test_getNgldm():
    m=data2()
    a1=getNgldm(m, mode=1)
    a2=getNgldm2d(m, mode=1)
    assert np.allclose(a1,a2)
    
    m=data1()
    a1=getNgldm(m, mode=1)
    a2=getNgldm3d(m, mode=1)
    assert np.allclose(a1,a2)
    
    
def test_getNgldm2d():
    df=data10()
    df['v1'] = discritize(df.v, 5, 0, 10)
    m = generate_2darray_v1(df)
    #print(m)
    a1 = getNgldm2d(m, mode=1)
    #print(a1)
    ans = np.array([0.000000, 2.916667, 0.792857, 2.666667])
    assert np.allclose(a1,ans)

def test_getNgldm3d():
    df = data11()
    df['v1'] = discritize(df.v, 8, 0, 20)
    m = generate_3darray_v1(df)
    #print(m)
    a1 = getNgldm3d(m, mode=1)
    #print(a1)
    ans = np.array([13.294481, 8.204421, 2.799930, 2.254031, 6.488474, 10.949799, 5.718182])
    assert np.allclose(a1,ans)

    
def test_calcNgldmParams():
    # The test is included in test_combineNgldm()
    return
    
    
def test_combineNgldm():
    # 2d
    df=data10()
    df['v1'] = discritize(df.v, nbin=5, lo=0, hi=10)
    m = generate_2darray_v1(df)
    #print(m)
    r = combineNgldm(m, nbin=5, mode=1)
    #display(r)
    assert 0.48596 < r['CoarsenessNGLDM'][0] < 0.48598
    assert 0.048568 < r['ContrastNGLDM'][0] < 0.048570
    assert 1.371824 < r['BusynessNGLDM'][0] < 1.371826

    # 3d
    df = data11()
    df['v1'] = discritize(df.v, 8, 0, 20)
    m = generate_3darray_v1(df)
    #print(m)
    r = combineNgldm(m, nbin=8, mode=1)
    #display(r)
    assert 0.136487 < r['CoarsenessNGLDM'][0] < 0.136489
    assert 0.107271 < r['ContrastNGLDM'][0] < 0.107273
    assert 0.509290 < r['BusynessNGLDM'][0] < 0.509292
    
    
test_getNgldm()
test_getNgldm2d()
test_getNgldm3d()
test_calcNgldmParams()
test_combineNgldm()
    
no_error()

No error.


In [27]:
################################################
#
# Get all params
# Takes DataFrame with columns of x,y,v --- v is continuous
#       nbin = 16, 32, 64, etc
# Returns DataFrame
#
# Kenji Hirata, 11/27/2017
#
################################################

def getAllParams2d(df, nbin, lo, hi, connection, ngldm_mode):
    
    results = []
    
    results.append(usualParams(df.v))
    
    df['v1'] = discritize(df.v, nbin, lo, hi)
    results.append(histParams(df.v1))
    
    results.append(comatParams2d(df, nbin)) # comatParams requires v1 columns, which is discreted version of v.
    
    mat = generate_2darray_v1(df)
    results.append(combineGlrlm(mat))
    
    results.append(combineGlzsm(mat, connection))
    
    results.append(combineNgldm(mat, nbin, ngldm_mode))
    
    return results

def getAllParams3d(df, nbin, lo, hi, connection, ngldm_mode):
    
    results = []
    
    results.append(usualParams(df.v))
    
    df['v1'] = discritize(df.v, nbin, lo, hi)
    results.append(histParams(df.v1))
    
    results.append(comatParams3d(df, nbin)) # comatParams requires v1 columns, which is discreted version of v.
    
    mat = generate_3darray_v1(df)
               
    results.append(combineGlrlm(mat))
    
    results.append(combineGlzsm(mat, connection))

    results.append(combineNgldm(mat, nbin, ngldm_mode))
    
    return results


def test_getAllParams2d():
    res = getAllParams2d(data10(), nbin=5, lo=0, hi=10, connection=4, ngldm_mode=1)
    #for r in res:
    #    display(r)
    s = ''
    for r in res:
        s += r.to_string()+'\r\n'
    #print(s)
        
    f = open('standard_test_getAllParams2d_20180909.txt')
    ans = f.read()
    f.close()
    
    s = s.replace('\n','').replace('\r','')
    ans = ans.replace('\n','').replace('\r','')
    assert s == ans
    
def test_getAllParams3d():
    res = getAllParams3d(data11(), nbin=8, lo=0, hi=20, connection=26, ngldm_mode=1)
    for r in res:
        display(r)

    s = ''
    for r in res:
        s += r.T.to_string()+'\r\n'
    #print(s)
       
    f = open('standard_test_getAllParams3d_20180909.txt')
    ans = f.read()
    f.close()
    
    s = s.replace('\n','').replace('\r','')
    ans = ans.replace('\n','').replace('\r','')
    assert s == ans
    

test_getAllParams2d()
test_getAllParams3d()

no_error()

Unnamed: 0,SUVmax,SUVmean,NumOfVoxels,SUVsum
0,15.66,7.924737,57,451.71


Unnamed: 0,SDhist,Skewness,Kurtosis,EnergyHist,EntropyHist
0,1.872335,0.02135,-1.175233,0.153586,1.899742


Unnamed: 0,HomogeneityGLCM,EnergyGLCM,CorrelationGLCM,ContrastGLCM,EntropyGLCM,DissimilarityGLCM
-1_-1_-1,0.268939,0.194215,0.944109,7.863636,1.774868,2.772727
-1_-1_0,0.279762,0.160714,0.922362,7.214286,1.941434,2.642857
-1_-1_1,0.288194,0.142361,0.92043,6.708333,2.004637,2.541667
-1_0_-1,0.310345,0.155767,0.94068,5.37931,1.94792,2.275862
-1_0_0,0.321759,0.182099,0.961318,4.694444,1.825564,2.138889
-1_0_1,0.327778,0.195556,0.980735,4.333333,1.730192,2.066667
-1_1_-1,0.369565,0.179584,0.940878,3.347826,1.813262,1.782609
-1_1_0,0.392857,0.147959,0.920189,2.928571,1.979845,1.642857
-1_1_1,0.409091,0.140496,0.916394,2.636364,2.011476,1.545455
0_-1_-1,0.69697,0.105601,0.967319,0.606061,2.346953,0.606061


Unnamed: 0,SRE,LRE,LGRE,HGRE,SRLGE,SRHGE,LRLGE,LRHGE,GLNUr,RLNU,RP
-1_-1_-1,1.0,1.0,0.248862,17.017544,0.248862,17.017544,0.248862,17.017544,8.754386,57.0,1.0
-1_-1_0,1.0,1.0,0.248862,17.017544,0.248862,17.017544,0.248862,17.017544,8.754386,57.0,1.0
-1_-1_1,1.0,1.0,0.248862,17.017544,0.248862,17.017544,0.248862,17.017544,8.754386,57.0,1.0
-1_0_-1,1.0,1.0,0.248862,17.017544,0.248862,17.017544,0.248862,17.017544,8.754386,57.0,1.0
-1_0_0,1.0,1.0,0.248862,17.017544,0.248862,17.017544,0.248862,17.017544,8.754386,57.0,1.0
-1_0_1,1.0,1.0,0.248862,17.017544,0.248862,17.017544,0.248862,17.017544,8.754386,57.0,1.0
-1_1_-1,1.0,1.0,0.248862,17.017544,0.248862,17.017544,0.248862,17.017544,8.754386,57.0,1.0
-1_1_0,1.0,1.0,0.248862,17.017544,0.248862,17.017544,0.248862,17.017544,8.754386,57.0,1.0
-1_1_1,1.0,1.0,0.248862,17.017544,0.248862,17.017544,0.248862,17.017544,8.754386,57.0,1.0
0_-1_-1,0.778409,1.886364,0.211158,18.272727,0.127735,15.443182,0.544851,29.590909,6.636364,25.681818,0.77193


Unnamed: 0,SZE,LZE,LGZE,HGZE,SZLGE,SZHGE,LZLGE,LZHGE,GLNUz,ZSNU,ZP
0,0.1611,50.777778,0.184768,19.333333,0.015704,2.451696,14.706581,787.555556,1.444444,1.444444,0.157895


Unnamed: 0,CoarsenessNGLDM,ContrastNGLDM,BusynessNGLDM
0,0.136488,0.107272,0.509291


No error.


In [21]:
################################################
#
# Convert ugly table to pretty table
# Takes list of DataFrame
# Returns DataFrame
#
# Kenji Hirata, 1/2/2018
#
################################################


def convertToPrettyTable(res):
    res1 = []
    for r in res:
        if len(r)==1:
            res1.append(r)
        else:
            s = r.iloc[-1,:]
            d = pd.DataFrame([s], index=[0])
            res1.append(d)
    return pd.concat(res1, axis=1)

def convertToPrettyTable2(res):
    res1 = []
    for r in res:
        if len(r)==1:
            res1.append(r)
        else:
            res1.append(stack1(r))
    return pd.concat(res1, axis=1)


def stack1(df):
    b=df.stack()

    def f(s):
        return s
        #return str(s).replace('(','').replace(')','').replace(' ','').replace(',','')

    return pd.DataFrame([b.values], columns= [ind[1]+f(ind[0]) for ind in b.index])


def test_convertToPrettyTable():
    res = getAllParams2d(data10(), nbin=5, lo=0, hi=10, connection=4, ngldm_mode=1)
    pre = convertToPrettyTable(res)
    s = pre.to_string()
    #print(pre.to_string())
    f = open('standard_test_convertToPrettyTable_20180909.txt')
    ans = f.read()
    f.close()
    
    s = s.replace('\n','').replace('\r','')
    ans = ans.replace('\n','').replace('\r','')
    assert s == ans


def test_convertToPrettyTable2():
    res = getAllParams2d(data10(), nbin=5, lo=0, hi=10, connection=4, ngldm_mode=1)
    pre = convertToPrettyTable2(res)
    s = pre.to_string()
    #print(pre.to_string())
    f = open('standard_test_convertToPrettyTable2_20180909.txt')
    ans = f.read()
    f.close()
    
    s = s.replace('\n','').replace('\r','')
    ans = ans.replace('\n','').replace('\r','')
    assert s == ans
    

def test_stack1():
    df = pd.DataFrame(data=np.arange(6).reshape(2,3), index=['1_1','-1_1'], columns=['A','B','C'])
    #display(df)
    df1 = stack1(df)
    #display(df1)
    for a,b in zip(df1.columns, (['A1_1','B1_1','C1_1','A-1_1','B-1_1','C-1_1'])):
        assert a == b

        
test_convertToPrettyTable()
test_convertToPrettyTable2()
test_stack1()

no_error()

No error.


In [22]:
###################################################
#
# Super batch for texture analyses in Python
# Kenji Hirata, 1/2/2018
#
###################################################

def superBatch(directory, dim, nbin, lo, hi, connection, ngldm_mode, full = False, report_progress = True, column_renamer = None):
    '''
Parameters
column_renamer: dict that is used for renaming columns. For example, in case that the dataframe has columns of x,y,z,v1, then v1 must be renamed to v by columns_renamer = {'b':'c'}

Returns

'''
    if (dim != 2) and (dim != 3):
        raise ValueError('dim (dimension) must be 2 or 3.')

    filenames = os.listdir(directory)

    if report_progress:
        print("Files:", filenames)

    lst = []
    for f in filenames:
        
        if report_progress:
            print(str(datetime.datetime.now()), f)
        
        fullname = os.path.join(directory, f)
        
        df = pd.read_csv(fullname, delimiter='\t')
        
        if column_renamer != None:
            df.rename(columns = column_renamer, inplace=True)

        
        if dim == 2:
            cols = ['x','y','v']
        if dim == 3:
            cols = ['x','y','z','v']
            
        for char in cols:
            if not (char in df.columns):
                raise ValueError('Table does not have '+ char+ ' column. Consider use of columns_renamer parameter.')
        


        if dim==2:
            results = getAllParams2d(df[cols], nbin, lo, hi, connection, ngldm_mode)
        elif dim==3:
            results = getAllParams3d(df.loc[:,cols], nbin, lo, hi, connection, ngldm_mode)
            
        if full:
            results_pretty = convertToPrettyTable2(results)
        else:
            results_pretty = convertToPrettyTable(results)
        
        #print(results_pretty.T)
        
        results_pretty.index = [f]
        
        lst.append(results_pretty)

    results_final = pd.concat(lst)
    return results_final


def test_superbatch():
    directory = r'testdata1'
    df = superBatch(directory, dim = 3, nbin = 64, lo = 0, hi = 20, connection = 26, ngldm_mode = 1, full = True, report_progress = False)
    #display(df)
    s = df.to_string()

    f = open('standard_test_superBatch_20180909.txt')
    ans = f.read()
    f.close()
    
    s = s.replace('\n','').replace('\r','')
    ans = ans.replace('\n','').replace('\r','')
    assert s == ans

    
    # testdata2 has dirty table that has columns of roinum,x,y,z,v1,v2,v3,v4.
    # v column must be generated.
    # v1 and v2 will disturb GLCM computation, thus they must be removed before.
    directory = r'testdata2'
    df = superBatch(directory, dim = 3, nbin = 64, lo = np.nan, hi = np.nan, connection = 26, ngldm_mode = 1, full = True, report_progress = False, column_renamer={'v1':'v'})
    #display(df.T)
    #print(df.to_string())
    #print(df.shape)
    assert df.shape == (1,261)

    
test_superbatch()

no_error()

No error.


## To compute your data, please modify the below cell and then run the cell by Shift + Enter.

### superBatch takes parameters

directory: the directory (or folder) where you have voxel textfile(s) consisting of x,y,z,v columns.

dim: dimention. Can be 2 or 3. Choose 3 for volume data, 2 for polar map.

nbin: the number of discritization bin, typically 64.

lo and hi: the lower and upper limit used for discritization. If you want to use global minimum and maximum for the limits, give np.nan to both. See the example.

connection: Voxel connectivity. In 2-d (i.e., dim = 2), it can be 2 or 4. In 3-d (i.e., dim = 3), it can be 6, 18, or 26.

ngldm_mode: 1 uses np.nanmean(), mode == 2 uses np.mean() to calculate NGLDM

full: True or False. Default if False. If full = True, the output includes the features of all 13 different directions (in 3-d) in addition to mean of them for GLCM and GLRLM. If full = False, the output includes only the features of mean of 13 directions for GLCM and GLRLM. Thus, if full = True, the output has 261 features. If full = False, the output has 40 features.

report_progress: True or False. Default if True. If True, the progress is reported for each voxel textfile.

column_renamer: dict to rename columns. Default is None. See the example.


In [23]:
# Template for Running ptexture

df = superBatch(directory = r'testdata1', dim = 3, nbin = 64, lo = 0, hi = 20, connection = 26, ngldm_mode = 1, full = False, report_progress = True, column_renamer = None)

# Save to an excel file
df.to_excel('results.xlsx')


# Some examples

# If you want to use min-max for discritation,
#df = superBatch(directory = r'testdata1', dim = 3, nbin = 64, lo = np.nan, hi = np.nan, connection = 26, ngldm_mode = 1, full = False, report_progress = True, column_renamer = None)

# If you want to use 'v1' column instead of 'v' column,
#df = superBatch(directory = r'testdata2', dim = 3, nbin = 64, lo = 0, hi = 20, connection = 26, ngldm_mode = 1, full = False, report_progress = True, column_renamer={'v1':'v'})


Files: ['test1.txt', 'test2.txt']
2018-09-09 10:23:02.449797 test1.txt
2018-09-09 10:23:02.744123 test2.txt
