In [1]:
%gui wx
%matplotlib inline

#### load libraries, functions, colormap

In [2]:
from __future__ import division

In [3]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt
from mayavi import mlab
import seaborn as sns
from vtk_rw import read_vtk, write_vtk

In [100]:
def make_cmap(c):
    cmap = np.asarray(sns.color_palette('cubehelix', c-1))
    cmap = np.concatenate((np.array([[0.4,0.4,0.4]]), cmap), axis=0)
    cmap = np.concatenate((cmap, np.ones((c,1))), axis=1)

    cmap_255=np.zeros_like(cmap)
    for row in range(cmap.shape[0]):
        cmap_255[row]=[np.floor(i * 255) for i in cmap[row]]
    cmap_255=cmap_255.astype(int)
    
    return cmap, cmap_255

#### load relevant files for subject and hemisphere

In [106]:
hemi='rh'
embed=3
k=7
pal, pal_255 = make_cmap(k+1)

mesh_file = '/scr/ilz3/myelinconnect/all_data_on_simple_surf/surfs/lowres_%s_d.vtk'%hemi
mask_file = '/scr/ilz3/myelinconnect/all_data_on_simple_surf/masks/%s_mask.1D.roi'%hemi
embed_file='/scr/ilz3/myelinconnect/all_data_on_simple_surf/clust_old/raw/%s_embed_%s.csv'%(hemi, str(embed))
kmeans_file='/scr/ilz3/myelinconnect/all_data_on_simple_surf/clust_old/raw/%s_kmeans_%s_embed_%s.csv'%(hemi, str(k), str(embed))


#func_file=data_dir+'rest1_1_meshsmooth_%s/%s_%s_mid_simple_0.01_rest_%s_smoothdata.vtk'%(smooth, sub, hemi, hemi)
#t1_file=data_dir+'t1_smooth_1.5_mesh/%s_%s_mid_simple_0.01_t1_smooth1.5_%s.vtk'%(sub, hemi, hemi)
#var_file=data_dir+'/results_k%s_s%s_other/%s_%s_cluster_var.csv'%(str(n_components_kmeans),smooth, sub, hemi)
#t_file=data_dir+'/results_k%s_s%s_other/%s_%s_cluster_avg_t.csv'%(str(n_components_kmeans),smooth, sub, hemi)
#p_file=data_dir+'/results_k%s_s%s_other/%s_%s_cluster_avg_p.csv'%(str(n_components_kmeans),smooth, sub, hemi)
#fig_file=data_dir+'/results_k%s_s%s_other/%s_%s_cluster_violins.svg'%(str(n_components_kmeans),smooth, sub, hemi)

In [107]:
v,f,d = read_vtk(mesh_file)

mask = np.loadtxt(mask_file)[:,0]
embed=np.loadtxt(embed_file, delimiter=',')
clust=np.loadtxt(kmeans_file, delimiter=',')

x=v[:,0]
y=v[:,1]
z=v[:,2]
triangles=f

In [108]:
mlab.figure(bgcolor=(1, 1, 1))
img = mlab.triangular_mesh(x,y,z,triangles,scalars=clust, colormap='spectral')
lut = img.module_manager.scalar_lut_manager.lut.table.to_array()
img.module_manager.scalar_lut_manager.lut.table = pal_255
mlab.draw()
mlab.show()

#### Get T1 values in all clusters

In [None]:
# make a list of dictionaries for each cluster k0, k1, ...(0=mask to max kmeans) 
# and subcluster s0_1, s1_1, s1_2, ..(1 to max subcluster, no 0 subcluster)
t1_clust={}
for c in range(int(clust.max()+1)):
    t1_clust['k'+str(c)]=[]

# write all t1 values in one cluster into the list of its dictionary
for i in range(len(t1)):
    if t1[i]>1000:
        k=int(clust[i])
        t1_clust['k'+str(k)].append(t1[i])

#### Compare variance of kmeans cluster vs all other clusters and mean between all clusters

In [None]:
levene_array=np.zeros((6,int(clust.max())))
col=[]
sigk=[]
for c1 in range(int(clust.max())):
    a=t1_clust['k'+str(c1+1)]
    b=[]
    for c2 in range(int(clust.max())):
        if (c2+1) != (c1+1):
            b+=t1_clust['k'+str(c2+1)]
    W,p=stats.levene(a, b)
    col.append(int(c1+1))
    levene_array[0,c1]=W
    levene_array[1,c1]=p
    levene_array[2,c1]=np.var(a)
    levene_array[3,c1]=np.var(b)
    if p <(0.05/int(clust.max())):
        levene_array[4,c1]=True
    else:
        levene_array[4,c1]=False
    
    if np.var(a)<np.var(b):
        levene_array[5,c1]=True
    else:
        levene_array[5,c1]=False
        
    if (levene_array[4,c1]==True) and (levene_array[5,c1]==True):
        sigk.append(c1+1)
    #print 'levene k'+str(c1+1)+' vs all other clusters:', W, p
    #print 'variance k'+str(c1+1), np.var(a)
    #print 'variance all others', np.var(b)
    #print ''


    
levene_df=pd.DataFrame(levene_array, columns=col, index=['W', 'p', 'var k', 'var other', 'p<0.05/k', 'var decrease'])
levene_df.to_csv(var_file)

In [None]:
t_array=np.zeros((int(clust.max()),int(clust.max())))
p_array=np.zeros((int(clust.max()),int(clust.max())))
col2=[]
for c1 in range(int(clust.max())):
    for c2 in range(int(clust.max())):
        if c2>=c1:
            a=t1_clust['k'+str(c1+1)]
            b=t1_clust['k'+str(c2+1)]
            t,p=stats.ttest_ind(a, b)
            t_array[c1][c2]=t
            p_array[c1][c2]=p
    col2.append(c1+1)
t_df=pd.DataFrame(t_array, columns=col2, index=col2)
p_df=pd.DataFrame(p_array, columns=col2, index=col2)

t_df.to_csv(t_file)
p_df.to_csv(p_file)

#### Plot clustering, T1 values, Levene's statistics and T statistics

In [None]:
plot_list=[]
for c in range(int(clust.max())):
    plot_list+=[np.array(t1_clust['k'+str(c+1)])]
    
names=[]
for c in range(int(clust.max())):
    names.append(str(c+1))

In [None]:
sns.palplot(pal)

In [None]:
sns.set_context('notebook', font_scale=1.8)
fig = plt.figure(figsize=(10,10))
sns.violinplot(plot_list, color=pal[1:], names=names, saturation=1)
sns.axlabel('cluster #','T1 value', fontsize=22)
savefig(fig_file)

Variance comparison

In [None]:
pd.options.display.float_format = '{:,.2e}'.format
levene_df

In [None]:
pd.options.display.float_format = '{:,.0f}'.format
levene_df.tail(2)

In [None]:
sig=np.zeros_like(clust)
for i in sigk: 
    sig[clust == i] = i

In [None]:
sigk

In [None]:
allcort=np.where(clust!=0)[0].shape[0]
sigcort=np.where(sig!=0)[0].shape[0]
sigpart=sigcort/allcort
print float(sigpart)

In [None]:
mlab.figure(bgcolor=(1, 1, 1))
img = mlab.triangular_mesh(xt,yt,zt,trianglest,scalars=sig, colormap='Set2')
lut = img.module_manager.scalar_lut_manager.lut.table.to_array()
img.module_manager.scalar_lut_manager.lut.table = pal_255
mlab.draw()
mlab.show()

T test (T values, p values)

In [None]:
pd.options.display.float_format = '{:,.2f}'.format
t_df

In [None]:
pd.options.display.float_format = '{:,.2e}'.format
p_df