In [36]:
import numpy as np
import pandas as pd
import plotly
import plotly.express as px
import msfeastPipeline as msfeast
import os

In [37]:
test_data_directory = "test_data"
filepath_test_spectra = os.path.join(test_data_directory, "test_spectra.mgf")
filepath_test_quant_table = os.path.join(test_data_directory, "test_quant_table.csv")
filepath_test_treat_table = os.path.join(test_data_directory, "test_treat_table.csv")

In [38]:
treat_table = pd.read_csv(filepath_test_treat_table)
quant_table = pd.read_csv(filepath_test_quant_table)
pipeline = msfeast.Msfeast()
pipeline.attach_spectral_data_from_file(filepath_test_spectra, identifier_key="scans")
pipeline.attach_quantification_table(quant_table)
pipeline.attach_treatment_table(treat_table)

In [39]:
pipeline.run_spectral_similarity_computations("ModifiedCosine")

In [40]:
pipeline.run_and_attach_kmedoid_grid([3,4,5,6,7,8,9,10,12,14,16,18,20])
pipeline.run_and_attach_tsne_grid([5,10,15,20,25])

Kmedoid grid results. Use to inform kmedoid classification selection ilocs.
    iloc   k  silhouette_score  random_seed_used
0      0   3          0.248863                 0
1      1   4          0.217128                 0
2      2   5          0.217015                 0
3      3   6          0.215879                 0
4      4   7          0.245240                 0
5      5   8          0.247320                 0
6      6   9          0.246809                 0
7      7  10          0.453825                 0
8      8  12          0.388331                 0
9      9  14          0.358659                 0
10    10  16          0.323834                 0
11    11  18          0.268399                 0
12    12  20          0.223671                 0
T-sne grid results. Use to inform t-sne embedding selection.
   iloc  perplexity  pearson_score  spearman_score  random_seed_used
0     0           5       0.655730        0.567051                 0
1     1          10       0.692365     

In [41]:
pipeline.select_kmedoid_settings(iloc = 7)
pipeline.select_tsne_settings(iloc = 1)

Plot Similarity matrix

In [42]:
import numpy as np
import plotly.express as px
from scipy.spatial.distance import pdist, squareform
from scipy.cluster.hierarchy import linkage, optimal_leaf_ordering
from scipy.cluster import hierarchy
import copy
# Assume 'similarity_array' is your pairwise similarity array
similarity_array = copy.deepcopy(pipeline.similarity_array)
# Compute the distances for hierarchical clustering
#distances = msfeast._convert_similarity_to_distance(similarity_array)

linkage_matrix = hierarchy.ward(similarity_array) # hierarchical clustering using ward linkage
ordered_index = hierarchy.leaves_list(
    hierarchy.optimal_leaf_ordering(linkage_matrix, similarity_array)
)
ordered_similarity_array = similarity_array[ordered_index,:][:,ordered_index]
ordered_index = np.array(ordered_index, dtype=str)
# Draw the heatmap
fig = px.imshow(ordered_similarity_array)
fig.update_layout(
  width=700,height=700,
  xaxis = dict(tickmode = 'array', tickvals = np.arange(0, ordered_similarity_array.shape[0]), ticktext = ordered_index,),
  yaxis = dict(tickmode = 'array',  tickvals = np.arange(0, ordered_similarity_array.shape[0]), ticktext = ordered_index),
)

Plot distance matrix

In [43]:
import numpy as np
import plotly.express as px
from scipy.spatial.distance import pdist, squareform
from scipy.cluster.hierarchy import linkage, optimal_leaf_ordering
from scipy.cluster import hierarchy
import copy
# Assume 'similarity_array' is your pairwise similarity array
similarity_array = copy.deepcopy(pipeline.similarity_array)
# Compute the distances for hierarchical clustering
distances = msfeast._convert_similarity_to_distance(similarity_array)

linkage_matrix = hierarchy.ward(distances) # hierarchical clustering using ward linkage
ordered_index = hierarchy.leaves_list(
    hierarchy.optimal_leaf_ordering(linkage_matrix, distances)
)
distances = distances[ordered_index,:][:,ordered_index]
ordered_index = np.array(ordered_index, dtype=str)
# Draw the heatmap
fig = px.imshow(distances)
fig.update_layout(
  width=700,height=700,
  xaxis = dict(tickmode = 'array', tickvals = np.arange(0, distances.shape[0]), ticktext = ordered_index,),
  yaxis = dict(tickmode = 'array',  tickvals = np.arange(0, distances.shape[0]), ticktext = ordered_index),
)


scipy.cluster: The symmetric non-negative hollow observation matrix looks suspiciously like an uncondensed distance matrix


scipy.cluster: The symmetric non-negative hollow observation matrix looks suspiciously like an uncondensed distance matrix



Compute t-SNE embedding

In [47]:
pipeline.assignment_table["set_id"]

0     group_km_2
1     group_km_4
2     group_km_4
3     group_km_6
4     group_km_4
5     group_km_4
6     group_km_8
7     group_km_4
8     group_km_3
9     group_km_0
10    group_km_6
11    group_km_9
12    group_km_9
13    group_km_2
14    group_km_9
15    group_km_7
16    group_km_4
17    group_km_8
18    group_km_5
19    group_km_9
20    group_km_6
21    group_km_4
22    group_km_9
23    group_km_0
24    group_km_4
25    group_km_4
26    group_km_4
27    group_km_6
28    group_km_1
29    group_km_2
Name: set_id, dtype: object

In [58]:
tsnedf = pipeline.embedding_coordinates_table
tsnedf["k-medoid cluster"] = pipeline.assignment_table["set_id"]
tsnedf["ordered_index"] = ordered_index
fig = px.scatter(
  tsnedf, x="x", y="y", text="ordered_index",
  color_discrete_sequence=px.colors.qualitative.Set1
)
fig.update_traces(textposition='top center')
fig.update_layout(
  height=600, 
  width =700, 
  title_text='t-SNE Embedding of Similarity Data',
  xaxis_title = "t-sne embedding x coordinate",
  yaxis_title = "t-sne embedding y coordinate")
fig.update_traces(marker={'size': 15})
fig.show()

In [59]:
tsnedf = pipeline.embedding_coordinates_table
tsnedf["k-medoid cluster"] = pipeline.assignment_table["set_id"]
tsnedf["ordered_index"] = ordered_index
fig = px.scatter(
  tsnedf, x="x", y="y", text="ordered_index", color="k-medoid cluster",
  color_discrete_sequence=px.colors.qualitative.Set1
)
fig.update_traces(textposition='top center')
fig.update_layout(
  height=600, 
  width =700, 
  title_text='t-SNE Embedding of Similarity Data With K-medoid Clusters',
  xaxis_title = "t-sne embedding x coordinate",
  yaxis_title = "t-sne embedding y coordinate")
fig.update_traces(marker={'size': 15})
fig.show()