In [11]:
import pandas as pd
import os
import numpy as np
from scipy.spatial import distance
from sklearn import mixture

In [12]:
# download CONTROL
p_matrices_control = []

dir_path_control = r"D:\Documents\Processed matrices\Control"
entries_control = os.listdir(dir_path_control)


for file_name in entries_control:
    new_matr = np.loadtxt(dir_path_control + fr"\{file_name}", delimiter=',').reshape(256, 256)
    p_matrices_control.append(new_matr)

In [13]:
len(p_matrices_control)

1529

In [14]:
# download BC
p_matrices_bc = []

dir_path_bc = r"D:\Documents\Processed matrices\BC"
entries_bc = os.listdir(dir_path_bc)


for file_name in entries_bc:
    new_matr = np.loadtxt(dir_path_bc + fr"\{file_name}", delimiter=',').reshape(256, 256)
    p_matrices_bc.append(new_matr)

In [15]:
len(p_matrices_bc)

813

In [16]:
# download FAM
p_matrices_fam = []

dir_path_fam = r"D:\Documents\Processed matrices\FAM"
entries_fam = os.listdir(dir_path_fam)


for file_name in entries_fam:
    new_matr = np.loadtxt(dir_path_fam + fr"\{file_name}", delimiter=',').reshape(256, 256)
    p_matrices_fam.append(new_matr)

In [17]:
len(p_matrices_fam)

1194

In [18]:
def barycenter(points: list((float, float))) -> (float, float):
    x_sum, y_sum = 0, 0
    for p in points:
        x_sum += p[0]
        y_sum += p[1]
    
    n = len(points)
    bcenter = (x_sum / n, y_sum / n)
    return bcenter

In [19]:
def segmentation(p_matrices, flag): 
    back_max_distances = []
    ker_max_distances = []
    max_distances = []
    min_distances = []
    bary_distances = []
    
    for p in p_matrices:
        
        bad = p < 0.05 * p.max()
        p[bad] = 0

        coords = []
        for i in range(p.shape[0]):
            for j in range(p.shape[1]):
                if p[i, j] > 0:
                    coords.append((i, j))
        gmm = mixture.GaussianMixture(n_components=2, covariance_type='full').fit(coords)
        coords1 = np.array(coords)
        segmentation1 = np.array(gmm.predict(coords1), dtype=np.bool)
        segmentation2 = np.array([True if not x else False for x in segmentation1])
        segm1 = coords1[segmentation1]
        segm2 = coords1[segmentation2]

        segm1_x = np.array([x[0] for x in segm1])
        segm1_y = np.array([x[1] for x in segm1])

        segm2_x = np.array([x[0] for x in segm2])
        segm2_y = np.array([x[1] for x in segm2])

        dists1 = distance.cdist(segm1, segm1, 'euclidean')
        ind1 = np.unravel_index(np.argmax(dists1, axis=None), dists1.shape)

        dists2 = distance.cdist(segm2, segm2, 'euclidean')
        ind2 = np.unravel_index(np.argmax(dists2, axis=None), dists2.shape)

        two_max_red = [(segm1[ind1[0]][0], -segm1[ind1[0]][1]),
                       (segm1[ind1[1]][0], -segm1[ind1[1]][1])]
        two_max_green = [(segm2[ind2[0]][0], -segm2[ind2[0]][1]), 
                         (segm2[ind2[1]][0], -segm2[ind2[1]][1])]

        dist_max_green = distance.euclidean(two_max_green[0], two_max_green[1])
        dist_max_red = distance.euclidean(two_max_red[0], two_max_red[1])

        red_bary = barycenter(segm1)
        green_bary = barycenter(segm2)
        dist_bary = distance.euclidean(red_bary, green_bary)

        dists3 = distance.cdist(coords, coords, 'euclidean')
        ind3 = np.unravel_index(np.argmax(dists3, axis=None), dists3.shape)
        two_max = [(coords[ind3[0]][0], -np.array(coords[ind3[0]][1])),
                  (coords[ind3[1]][0], -np.array(coords[ind3[1]][1]))]
        dist_max = distance.euclidean(two_max[0], two_max[1])

        dists4 = distance.cdist(segm1, segm2, 'euclidean')
        ind4 = np.unravel_index(np.argmin(dists4, axis=None), dists4.shape)
        two_min = [(segm1[ind4[0]][0], -np.array(segm1[ind4[0]][1])),
                  (segm2[ind4[1]][0], -np.array(segm2[ind4[1]][1]))]
        dist_min = distance.euclidean(two_min[0], two_min[1])

        back_max_distances.append(dist_max_red)
        ker_max_distances.append(dist_max_green)
        max_distances.append(dist_max)
        min_distances.append(dist_min)
        bary_distances.append(dist_bary)
        
        n = len(p_matrices)
        t = np.full((n), flag)
        
        d = {"background_max_distance":back_max_distances,
        "kernel_max_distance": ker_max_distances,
        "textures_max_distance": max_distances,
        "textures_min_distance": min_distances,
        "barycenter_distance": bary_distances,
        "group": t}
        
    return d

In [20]:
# ?np.full

In [21]:
control = segmentation(p_matrices_control, 0)
bc = segmentation(p_matrices_bc, 1)

In [22]:
fam = segmentation(p_matrices_fam, 2)

In [23]:
df_control = pd.DataFrame(control)

In [24]:
df_control.to_csv(r'csvs\df_control.csv', index = False)

In [25]:
df_control.head()

Unnamed: 0,background_max_distance,kernel_max_distance,textures_max_distance,textures_min_distance,barycenter_distance,group
0,12.369317,14.142136,22.671568,1.0,10.189276,0
1,16.155494,17.204651,31.240999,1.0,13.991094,0
2,16.552945,15.297059,30.413813,1.0,13.52594,0
3,17.0,15.132746,29.832868,1.0,12.837341,0
4,16.155494,15.811388,30.413813,1.0,13.578361,0


In [26]:
df_bc = pd.DataFrame(bc)

In [27]:
df_bc.to_csv(r'csvs\df_bc.csv', index = False)

In [28]:
df_bc.head()

Unnamed: 0,background_max_distance,kernel_max_distance,textures_max_distance,textures_min_distance,barycenter_distance,group
0,23.853721,24.083189,47.507894,1.0,22.96142,1
1,17.492856,17.720045,33.970576,1.0,15.357888,1
2,18.601075,22.022716,40.311289,1.0,18.739477,1
3,22.671568,17.204651,40.360872,1.0,20.796279,1
4,15.652476,14.764823,29.0,1.0,12.828382,1


In [29]:
df_fam = pd.DataFrame(fam)

In [30]:
df_fam.to_csv(r'csvs\df_fam.csv', index = False)

In [31]:
df_fam.head()

Unnamed: 0,background_max_distance,kernel_max_distance,textures_max_distance,textures_min_distance,barycenter_distance,group
0,19.416488,19.723083,38.897301,1.0,18.194518,2
1,20.808652,19.235384,38.948684,1.0,18.358038,2
2,19.104973,17.691806,37.48333,1.0,20.117416,2
3,17.492856,18.439089,35.383612,1.0,16.464861,2
4,14.866069,14.56022,27.658633,1.0,12.669002,2


In [32]:
all_data = [df_control, df_bc, df_fam]

In [33]:
res = pd.concat(all_data)

In [34]:
res.head()

Unnamed: 0,background_max_distance,kernel_max_distance,textures_max_distance,textures_min_distance,barycenter_distance,group
0,12.369317,14.142136,22.671568,1.0,10.189276,0
1,16.155494,17.204651,31.240999,1.0,13.991094,0
2,16.552945,15.297059,30.413813,1.0,13.52594,0
3,17.0,15.132746,29.832868,1.0,12.837341,0
4,16.155494,15.811388,30.413813,1.0,13.578361,0


In [35]:
res.describe()

Unnamed: 0,background_max_distance,kernel_max_distance,textures_max_distance,textures_min_distance,barycenter_distance,group
count,3536.0,3536.0,3536.0,3536.0,3536.0,3536.0
mean,16.383306,16.457061,30.682541,1.08551,14.114617,0.90526
std,3.43547,3.398518,7.824125,2.06838,4.483109,0.872536
min,3.0,10.440307,15.0,1.0,5.138974,0.0
25%,13.928388,14.035669,24.758837,1.0,11.007116,0.0
50%,15.652476,15.811388,29.068884,1.0,13.17562,1.0
75%,18.027756,18.35756,35.468296,1.0,16.37595,2.0
max,42.449971,39.623226,141.014184,118.511603,129.504252,2.0


In [36]:
res.to_csv(r'csvs\df_all.csv', index = False)