# Segmentation of simulated images:

## K-means image segmentation

A simple K-means image segmentation based on peak areas:

In [None]:
peak_kmeans_segmentation = np.zeros(integrated_peak_areas.shape)
nclust = 2
for lipid_id, lipid_name in enumerate(lipid_names):
    peak_intensities = integrated_peak_areas[:,:,lipid_id]
    peak_intensities = peak_intensities.reshape((-1,1))
    peak_kmeans = KMeans(n_clusters=nclust)
    region_classification = peak_kmeans.fit_predict(peak_intensities)
    region_classification = region_classification.reshape(40, 40)
    region_classification = np.argsort(np.argsort(peak_kmeans.cluster_centers_.reshape((-1,))))[region_classification]
    peak_kmeans_segmentation[..., lipid_id] = region_classification
    

K-means image segmentation based on estimated signals:

In [None]:
masserstein_kmeans_segmentation = np.zeros(integrated_peak_areas.shape)
for lipid_id, lipid_name in enumerate(lipid_names):
    lipid_signals = estimated_lipid_signals[:,:,lipid_id]
    lipid_signals = lipid_signals.reshape((-1,1))
    peak_kmeans = KMeans(n_clusters=nclust)
    region_classification = peak_kmeans.fit_predict(lipid_signals)
    region_classification = region_classification.reshape(40, 40)
    lipid_signals = lipid_signals.reshape((40, 40))
    region_classification = np.argsort(np.argsort(peak_kmeans.cluster_centers_.reshape((-1,))))[region_classification]
    masserstein_kmeans_segmentation[..., lipid_id] = region_classification
    

## spatial-DGMM image segmentation

Now, we'll perform a spatial-DGMM segmentations based on peak intensities and based on masserstein estimates.   
First, convert the signal and intensity arrays to data frames in a long format 

In [None]:
coords = [[j, i] for i in range(40) for j in range(40)]

In [None]:
integrated_peak_areas_melt = [integrated_peak_areas[i,j,...] for i in range(40) for j in range(40)] 
estimated_lipid_signals_melt = [estimated_lipid_signals[i,j,...] for i in range(40) for j in range(40)] 

In [None]:
integrated_peak_areas_df = pd.DataFrame(integrated_peak_areas_melt, columns = lipid_names)
estimated_lipid_signals_df = pd.DataFrame(estimated_lipid_signals_melt, columns = lipid_names)

Import the prepared data frames into the R environment

In [None]:
%R -i integrated_peak_areas_df 
%R -i estimated_lipid_signals_df
%R -i coords
%R -i lipid_names

Prepare the `MSImagingExperiment` objects from the `Cardinal` package

In [None]:
%%R 

coords <- as.data.frame(t(coords))
colnames(coords) <- c('x', 'y')

In [None]:
%%R

intsy_data <- t(integrated_peak_areas_df)
signal_data <- t(estimated_lipid_signals_df)
fdata <- MassDataFrame(mz=1:3, row.names=as.character(lipid_names))
run <- factor(rep("run0", ncol(coords)))
pdata <- PositionDataFrame(run=run, coord=coords)

masserstein_image <- MSImagingExperiment(spectraData=signal_data,
                                         featureData=fdata,
                                         pixelData=pdata,
                                        centroided=TRUE)

intensity_image <- MSImagingExperiment(spectraData=intsy_data,
                                       featureData=fdata,
                                       pixelData=pdata,
                                      centroided=TRUE)

In [None]:
%%R

intensity_image

Visualize the obtained images to verify if the imports and convertions were successful.  

In [None]:
%%R 
image(intensity_image, i=1:3)

In [None]:
%%R 
image(masserstein_image,i=1:3)

Do the segmentation:

In [None]:
%%R 

peak_sdgmm <- spatialDGMM(intensity_image, k=2)
masserstein_sdgmm <- spatialDGMM(masserstein_image, k=2)

Visualize segmentations in R prior to importing back to Python:

In [None]:
%%R 

image(peak_sdgmm, values='class', i=1:3)

In [None]:
%%R

plot(peak_sdgmm, i=1:3)

In [None]:
%%R

image(masserstein_sdgmm, values='class', i=1:3)

In [None]:
%%R

plot(masserstein_sdgmm, i=1:3)

In [None]:
%%R

masserstein_sdgmm_segmentation_melt <- sapply(masserstein_sdgmm$class, as.numeric)
peak_sdgmm_segmentation_melt <- sapply(peak_sdgmm$class, as.numeric)


In [None]:
masserstein_sdgmm_segmentation_melt = %Rget masserstein_sdgmm_segmentation_melt
peak_sdgmm_segmentation_melt = %Rget peak_sdgmm_segmentation_melt

In [None]:
masserstein_sdgmm_segmentation = np.array(
    [
        [
            [
                masserstein_sdgmm_segmentation_melt[40*i+j,k] for k in range(3) 
            ]  for j in range(40)
        ] for i in range(40)
    ]
)
masserstein_sdgmm_segmentation -= 2
masserstein_sdgmm_segmentation *= -1

peak_sdgmm_segmentation = np.array(
    [
        [
            [
                peak_sdgmm_segmentation_melt[40*i+j,k] for k in range(3) 
            ]  for j in range(40)
        ] for i in range(40)
    ]
)
peak_sdgmm_segmentation -= 2
peak_sdgmm_segmentation *= -1

Verify a successful conversion:

In [None]:
plt.subplots(2,3, figsize=(8,4))
for lipid_id, lipid_name in enumerate(lipid_names):
    plt.subplot(231+lipid_id)
    plt.title(lipid_name + ' peak')
    plt.imshow(peak_sdgmm_segmentation[..., lipid_id])
    plt.axis('off')
    plt.subplot(234+lipid_id)
    plt.title(lipid_name + ' signal')
    plt.imshow(masserstein_sdgmm_segmentation[..., lipid_id])
    plt.axis('off')
plt.tight_layout()

## Comparison of segmentation results

Accuracy of different segmentation approaches

In [None]:
nclust = 2

algorithms = ['Peak K-means', 'WS K-means', 'Peak sDGMM', 'WS sDGMM']
accuracy = np.zeros((len(lipid_names), len(algorithms)))
correlation = np.zeros((len(lipid_names), len(algorithms)))

for lipid_id, lipid_name in enumerate(lipid_names):
    # Peak K-means
    region_classification = peak_kmeans_segmentation[..., lipid_id]
    accuracy[lipid_id, 0] = np.mean(region_classification == enrichment_mask[...,lipid_id])
    correlation[lipid_id, 0] = pearsonr(region_classification.flatten(), enrichment_mask[...,lipid_id].flatten())[0]
    # WS K-means
    region_classification = masserstein_kmeans_segmentation[..., lipid_id]
    accuracy[lipid_id, 1] = np.mean(region_classification == enrichment_mask[...,lipid_id])
    correlation[lipid_id, 1] = pearsonr(region_classification.flatten(), enrichment_mask[...,lipid_id].flatten())[0]
    # Peak spatialDGMM
    region_classification = peak_sdgmm_segmentation[..., lipid_id]
    accuracy[lipid_id, 2] = np.mean(region_classification == enrichment_mask[...,lipid_id])
    correlation[lipid_id, 2] = pearsonr(region_classification.flatten(), enrichment_mask[...,lipid_id].flatten())[0]
    # WS spatialDGMM
    region_classification = masserstein_sdgmm_segmentation[..., lipid_id]
    # ws_sdgmm_specificity[lipid_id] = np.sum(region_classification * enrichment_mask[...,lipid_id])/np.sum(enrichment_mask[...,lipid_id])
    accuracy[lipid_id, 3] = np.mean(region_classification == enrichment_mask[...,lipid_id])
    correlation[lipid_id, 3] = pearsonr(region_classification.flatten(), enrichment_mask[...,lipid_id].flatten())[0]

average_accuracy = np.mean(accuracy, axis=0)
average_correlation = np.mean(correlation, axis=0)

In [None]:
print('Accuracy of segmentation:')
print('Lipid     ', 'Peak K-means', 'WS K-means', 'Peak sDGMM', 'WS sDGMM', sep='\t')
for i, n in enumerate(lipid_names):
    acc_vec = ['{:2.2f}%     '.format(100*x) for x in accuracy[i]]
    print(n, *acc_vec, sep='\t')
print('Average     ', *['{:2.2f}%     '.format(100*x) for x in average_accuracy], sep='\t')
print('Correlation of segmentation:')
print('Lipid     ', 'Peak K-means', 'WS K-means', 'Peak sDGMM', 'WS sDGMM', sep='\t')
for i, n in enumerate(lipid_names):
    corr_vec = ['{:0.4f}     '.format(x) for x in correlation[i]]
    print(n, *corr_vec, sep='\t')
print('Average     ', *['{:0.4f}     '.format(x) for x in average_correlation], sep='\t')

Visualize segments obtained with the four approaches:

In [None]:
accuracy

In [None]:
plt.subplots(4, 3, figsize=(4.6,6.3))
for lipid_id, lipid_name in enumerate(lipid_names):
    plt.subplot(4, 3, 1 + lipid_id)
    plt.title(lipid_name)
    plt.imshow(peak_kmeans_segmentation[..., lipid_id])
    plt.text(38, 1, str(int(100*accuracy[lipid_id, 0]))+'%',
            bbox=dict(facecolor='white', edgecolor='black', boxstyle='round,pad=0.2'),
            ha='right', va='top')
    plt.axis('off')
    
    plt.subplot(4, 3, 4 + lipid_id)
    plt.imshow(peak_sdgmm_segmentation[..., lipid_id])
    plt.text(38, 1, str(int(100*accuracy[lipid_id, 2]))+'%',
            bbox=dict(facecolor='white', edgecolor='black', boxstyle='round,pad=0.2'),
            ha='right', va='top')
    plt.axis('off')
    
    plt.subplot(4, 3, 7 + lipid_id)
    plt.imshow(masserstein_kmeans_segmentation[..., lipid_id])
    plt.text(38, 1, str(int(100*accuracy[lipid_id, 1]))+'%',
            bbox=dict(facecolor='white', edgecolor='black', boxstyle='round,pad=0.2'),
            ha='right', va='top')
    plt.axis('off')
    
    plt.subplot(4, 3, 10 + lipid_id)
    plt.imshow(masserstein_sdgmm_segmentation[..., lipid_id])
    plt.text(38, 1, str(int(100*accuracy[lipid_id, 3]))+'%',
            bbox=dict(facecolor='white', edgecolor='black', boxstyle='round,pad=0.2'),
            ha='right', va='top')
    plt.axis('off')
    
plt.subplot(4,3,3)
plt.text(40, 20, 'Peak intensity\nK-means', rotation=-90, rotation_mode='anchor', ha='center', va='bottom')
plt.subplot(4,3,6)
plt.text(40, 20, 'Peak intensity\nspatial-DGMM', rotation=-90, rotation_mode='anchor', ha='center', va='bottom')
plt.subplot(4,3,9)
plt.text(40, 20, 'Masserstein\nK-means', rotation=-90, rotation_mode='anchor', ha='center', va='bottom')
plt.subplot(4,3,12)
plt.text(40, 20, 'Masserstein\nspatial-DGMM', rotation=-90, rotation_mode='anchor', ha='center', va='bottom')

plt.tight_layout()
plt.savefig('Figs/simulations/segmentation_comparison.png', dpi=600)

Visualize the distribution of signals in clusters

In [None]:
plt.subplots(6, 3, figsize=(12,14))
for lipid_id, lipid_name in enumerate(lipid_names):
    plt.subplot(6, 3, 1 + lipid_id)
    plt.hist([integrated_peak_areas[enrichment_mask[..., lipid_id]==i, lipid_id] for i in range(nclust)], stacked=True, bins=40)
    plt.legend(['Low','High'])
    plt.title(lipid_name + ', peak intensity\nTrue segments')
    
    plt.subplot(6, 3, 4 + lipid_id)
    plt.hist([estimated_lipid_signals[enrichment_mask[..., lipid_id]==i, lipid_id] for i in range(nclust)], stacked=True, bins=40)
    plt.legend(['Low','High'])
    plt.title(lipid_name + ', Masserstein\nTrue segments')
    
    plt.subplot(6, 3, 7 + lipid_id)
    plt.hist([integrated_peak_areas[peak_kmeans_segmentation[..., lipid_id]==i, lipid_id] for i in range(nclust)], stacked=True, bins=40)
    plt.legend(['Cluster 0','Cluster 1'])
    plt.title(lipid_name + ', peak intensity\nK-means')
    
    plt.subplot(6, 3, 10 + lipid_id)
    plt.hist([estimated_lipid_signals[masserstein_kmeans_segmentation[..., lipid_id]==i, lipid_id] for i in range(nclust)], stacked=True, bins=40)
    plt.legend(['Cluster 0','Cluster 1'])
    plt.title(lipid_name + ', Masserstein\nK-means')
    
    plt.subplot(6, 3, 13 + lipid_id)
    plt.hist([integrated_peak_areas[peak_sdgmm_segmentation[..., lipid_id]==i, lipid_id] for i in range(nclust)], stacked=True, bins=40)
    plt.legend(['Cluster 0','Cluster 1'])
    plt.title(lipid_name + ', peak intensity\nspatial-DGMM')
    
    plt.subplot(6, 3, 16 + lipid_id)
    plt.hist([estimated_lipid_signals[masserstein_sdgmm_segmentation[..., lipid_id]==i, lipid_id] for i in range(nclust)], stacked=True, bins=40)
    plt.legend(['Cluster 0','Cluster 1'])
    plt.title(lipid_name + ', Masserstein\nspatial-DGMM')
plt.tight_layout()
plt.savefig('Figs/simulations/intensity_distributions_in_clusters.png', dpi=600)

# Selecting an ion image to save

In [None]:
spectrum_of_interest = min(lipid_spectra['cerebellum'], key = lambda x: x.confs[0][0])
    with ImzMLWriter('%s_deconvolved_image.imzML' % n) as writer:
        for i in range(image_shapes[n][0]):
            for j in range(image_shapes[n][1]):
                sorted_intensity = [deconv_img[i,j,iid] for iid in mass_axis_order]
                writer.addSpectrum(sorted_mass_axis, sorted_intensity, (j, i))

In [18]:
spectrum_of_interest = min(zip(lipid_spectra['cerebellum'], range(len(lipid_spectra['cerebellum']))), key = lambda x: abs(x[0].confs[0][0]-755.47))

In [25]:
n='cerebellum'
mz = spectrum_of_interest[0].confs[0][0]
lid = spectrum_of_interest[1]
with ImzMLWriter('test_image.imzML') as writer:
    for i in range(image_shapes[n][0]):
        for j in range(image_shapes[n][1]):
            writer.addSpectrum([mz], [regression_images['cerebellum'][i,j,lid]], (j, i))

In [24]:
lid

35