In [None]:
import pandas as pd
import numpy as np
np.random.seed(42)
num_coefficients = 14
from BeyondBlooms2024.config import name_dict, color_dict
from BeyondBlooms2024.config_file import (ABUNDANCES_FILE, CCMN_CON_MAP_PATH, CON_LOUVAIN_META_PATH,CON_LOUVAIN_NETWORK_PATH, ENRICH,
NUM_PERMUTATIONS, NUM_SAMPLES, NUM_CORES, METADATA_FILE, PRUNED_PVAL_CCMN_PATH,PVAL_CCMN_PATH,ENRICHED_META_PATH, RANDOM_PVAL_CCMN_PATH)

In [None]:
def calculate_fourier_coefficients(series):
    """ Calculate Fourier Coefficients from the time series"""
    print(series.shape)
    fourier_transform = np.fft.fft(series)
    coefficients = fourier_transform[1:num_coefficients]  # Select the desired number of coefficients
    ret = np.concatenate([np.real(coefficients), np.imag(coefficients)], axis=0)
    #return np.abs(coefficients)
    return ret

In [None]:
hellinger =True
kind =  "euk"
locations = ["f4"]#["f4", "egc", "hgiv"]
dic_loc= {"f4":"F4", "egc":"EGC", "hgiv":"HG"}
# chosen_method = "Pearson_FFT"
chosen_method = "NMI"
if chosen_method in ["Pearson_FFT", "Pearson"]:
    sym=True
else:
    sym=False
for loc in locations:
    # Example usage
    df_spec = pd.read_csv(ABUNDANCES_FILE, sep=";", index_col=0)
    if hellinger == True:
        df_sqrt = np.sqrt(df_spec)
        row_norms = np.linalg.norm(df_sqrt, axis=1)
        df_normalized = df_sqrt.div(row_norms, axis=0)
        df_spec = df_normalized
df_spec.shape

In [None]:
df_spec_T = df_spec.T

In [None]:
df_spec_T

In [None]:
df_fft_spec = df_spec_T.apply(lambda row: calculate_fourier_coefficients(row),axis =1, result_type='expand')

In [None]:
df_fft_spec

In [None]:
# Create column names for the FFT components
fft_column_names = [f'fft_component_{i+1}' for i in range(26)]

# Rename the columns in the new DataFrame
df_fft_spec.columns = fft_column_names

In [None]:
meta = pd.read_csv(ENRICH, sep=",")
meta.head()

In [None]:
meta = pd.read_csv(ENRICH, sep=",")
meta["clu"] = meta["cluster_names"]
meta["colors"] = meta["cluster_color"]
#meta = meta[meta["clu"].isin([name_dict_new[x] for x in ['01-F',
 #'02-F',
 #'03-L',
 #'04-L',
 #'05-L',
 #'06-M',
 #'07-M',
 #'08-M',
# '09-H',
## '10-H']])]
#meta = meta[meta["clu"].isin([name_dict[str(x)] for x in [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17]])]
meta = meta.sort_values(by='clu')
node_label= meta[["Nodes", "clu"]]
node_label.set_index("Nodes", inplace=True)
clu_dict = node_label.to_dict()["clu"]
list_off_con = meta["Nodes"]

In [None]:
clu_l = meta["clu"].unique().tolist()
clu_l.sort()
clu_l

In [None]:
df_fft_spec = df_fft_spec.T[list_off_con].T

In [None]:
import numpy as np
from sklearn.decomposition import PCA
X = df_fft_spec
pca = PCA(n_components=10)
abc = pca.fit_transform(X)

In [None]:
abc

In [None]:
import umap
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
reducer = umap.UMAP()
scaled_data = StandardScaler().fit_transform(abc)

U_embedding = reducer.fit_transform(scaled_data)
print(U_embedding.shape)

df_fft_spec["x_2d"] =U_embedding[:, 0]
df_fft_spec["y_2d"] =U_embedding[:, 1]

plt.scatter(U_embedding[:, 0], U_embedding[:, 1], c=meta["colors"].values, s=5)
#plt.gca().set_aspect('equal', 'datalim')
plt.gcf().savefig("figures/PCA_Umap2.png")

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Assuming you have already imported your data and have the U_embedding and meta variables

# Set the style for the plot
sns.set(style="whitegrid")

# Create a scatter plot using seaborn
plt.figure(figsize=(10, 8))
sns.scatterplot(x=U_embedding[:, 0], y=U_embedding[:, 1], hue=meta["clu"].values, palette=meta["colors"].unique().tolist(), s=50)

# Customize the plot further if needed
#plt.title('Scatter Plot with Seaborn')
plt.xlabel('Umap x')
plt.ylabel('Umap y')
plt.legend(title='Colors', loc='upper right')

# Save the plot
plt.savefig("figures/PCA_Umap2_seaborn.png")

# Show the plot
plt.show()

In [None]:
import plotly.express as px
import umap
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
reducer = umap.UMAP( n_components=3)
#scaled_data = StandardScaler().fit_transform(abc)
scaled_data =abc
V_embedding = reducer.fit_transform(scaled_data)
df_fft_spec["x_3d"] =V_embedding[:, 0]
df_fft_spec["y_3d"] =V_embedding[:, 1]
df_fft_spec["z_3d"] =V_embedding[:, 2]

print(V_embedding.shape)
d = {}

In [None]:
meta["clu"].unique()

In [None]:
df_fft_spec.to_csv("tables/latentspacelatent_space.csv", sep=";")
df_fft_spec

In [None]:
df_fft_spec["Nodes"] =df_fft_spec.index
df_fft_spec["clu"] =df_fft_spec["Nodes"].apply(lambda x: clu_dict[x])
df_fft_spec = df_fft_spec[df_fft_spec["clu"]!="no_label"] # ToDo. Fix
#from config import name_dict_new
#df_fft_spec["clu"] =df_fft_spec["clu"].apply(lambda x: name_dict[x])
umap3d_matrix = df_fft_spec[["x_3d","y_3d","z_3d","clu"]]

In [None]:
centroids = umap3d_matrix.groupby('clu').mean()

In [None]:
centroids

In [None]:
# Create a 3D scatter plot using Plotly
fig = px.scatter_3d(x=V_embedding[:, 0], y=V_embedding[:, 1], z=V_embedding[:, 2], color_discrete_sequence = meta["colors"].unique(), color=meta["clu"].values,opacity=0.5)
#fig.add_trace(px.scatter_3d(x=centroids["x_3d"], y=centroids["x_3d"], z=centroids["x_3d"]))
centroids_trace =px.scatter_3d(x=centroids["x_3d"], y=centroids["y_3d"], z=centroids["z_3d"])
fig.add_trace(centroids_trace.update_traces(marker=dict(color=meta["colors"].unique().tolist(), symbol="x", line=dict(color='black', width=1000,)), name='Centroids').data[0])
fig.update_layout(scene=dict(aspectmode="data"), height=1000, width=1000)
#plt.savefig("tables/PCA_Umap3d_seaborn.png")
fig.write_image("figures/Sup_Figure_S4_PCA_Umap3d_seaborn.png")
fig.show()

In [None]:
centroids

In [None]:
import numpy as np
from scipy.spatial.distance import pdist, squareform

# Assuming your matrix looks like this:
# matrix = np.array([
#     [x1, y1, z1],
#     [x2, y2, z2],
#     ...,
#     [x10, y10, z10]
# ])

# Calculate the pairwise distances
distance_vector = pdist(centroids)

# Convert the distance vector to a square distance matrix
distance_matrix = squareform(distance_vector)

# Display the distance matrix
m =np.round(distance_matrix,0)

In [None]:
df_ccm = pd.read_csv(PRUNED_PVAL_CCMN_PATH,sep=";")
meta_file_dict = meta[["Nodes","cluster_names"]]
meta_file_dict.set_index("Nodes",inplace=True)
mfd = meta_file_dict.to_dict()["cluster_names"]
df_ccm["from_clu"]=df_ccm["from"].map(mfd)
df_ccm["to_clu"]=df_ccm["to"].map(mfd)
#df_ccm.head()

In [None]:
matrix_3b = df_ccm[["from_clu","to_clu", "corr"]].groupby(["to_clu","from_clu"]).agg(np.mean).reset_index()

# Create a matrix using pivot
matrix_pivot = matrix_3b.pivot(index='to_clu', columns='from_clu', values='corr')
matrix_pivot

In [None]:
matrix_pivot.to_csv("tables/Main_Figure_S3_D_table.csv",sep=";")

In [None]:
#df_ccm = pd.read_csv("tables/matrix_cluster_cluster_distance_ALL.csv",sep=",", index_col=0)
#dc = df_ccm[df_ccm>-0.1]
#dc = dc.fillna(0)
dc =matrix_pivot
#dc.to_csv("tables/matrix_cluster_cluster_distance_ALL_clean.csv",sep=",")
dc = dc.fillna(0) #ToDo replace with ccm aggregated ADD HERE
dc =pd.DataFrame(dc)


In [None]:
dc.columns

In [None]:
dc

In [None]:
#from config import name_dict_new
#name_clu = [i for i in centroids.index]
#n_dict = {col: name_clu[col] for col in dc.columns}
#dc.rename(columns=n_dict, inplace=True)
#dc.index = dc.index.map(n_dict)

In [None]:
bin = dc[dc == 0]
bin = bin.fillna(1)
bin

In [None]:
bin_one= dc[dc == 10]
bin_one = bin_one.fillna(1)
bin_one

In [None]:
# All distances
dist_map = np.multiply(bin_one, m)
print(dist_map.sum().sum())
dist_map

In [None]:
dist_map.to_csv("tables/Sup_Figures_4_5_6_Latentspace_distance_matrix.csv",sep=";")

In [None]:
# distance masked with CCMNs
result = np.multiply(bin, m)
print(result.sum().sum())
result

In [None]:
m_inv = m+1
n = np.where(m_inv!=1, 1, 0)
result_inv = np.multiply(bin, m_inv)

print(result_inv.sum().sum())
result_inv

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Assuming A is the adjacency matrix and D is the distance matrix
# Create sample matrices for illustration
A = bin.values

D = m
D=  np.where((D> 0) & (D < 4), 1, 0)
# Visual representation
plt.subplot(1, 2, 1)
plt.imshow(A, cmap='viridis')
plt.title('Adjacency Matrix')

plt.subplot(1, 2, 2)
plt.imshow(D, cmap='viridis')
plt.title('Distance Matrix')

plt.show()


In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 6))

pivot_table_mask = (result == 0.00)

sns.clustermap(result, annot=True, cmap='coolwarm', fmt='.0f', cbar=True, square=True, linewidths=1, mask=pivot_table_mask, col_cluster=False, row_cluster=False)

sns.set_style('white')
#sns.grid(False)
# Logarithmic normalization

# Annotate the heatmap with cluster names
#plt.xticks(np.arange(10) + 0.5, clu_list_, rotation=90)
#plt.yticks(np.arange(10) + 0.5, clu_list_, rotation=0)

#plt.title('Beta Diversity Matrix', fontsize=16)
#plt.xlabel('From Cluster', fontsize=12)
#sns.setylabel('Cluster', fontsize=12)

#plt.tight_layout()  # Adjust layout for better appearance

save_path_temp =f'figures/Sup_Figure_S6_Distance_Connections_heatmap_ALL.png'
plt.savefig(save_path_temp, dpi=200, bbox_inches='tight')

plt.show()

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 6))

pivot_table_mask = (dist_map == 0.00)

sns.clustermap(dist_map, annot=True, cmap='coolwarm', fmt='.0f', cbar=True, square=True, linewidths=1, mask=pivot_table_mask, col_cluster=True, row_cluster=True,)

sns.set_style('white')
# Logarithmic normalization

# Annotate the heatmap with cluster names
#plt.xticks(np.arange(10) + 0.5, clu_list_, rotation=90)
#plt.yticks(np.arange(10) + 0.5, clu_list_, rotation=0)

#plt.title('Beta Diversity Matrix', fontsize=16)
#plt.xlabel('Cluster', fontsize=12)
#plt.ylabel('Cluster', fontsize=12)

#plt.tight_layout()  # Adjust layout for better appearance

save_path_temp =f'figures/Sup_Figure_S5_Distance_ALL_Connections_heatmap_ALL.png'
plt.savefig(save_path_temp, dpi=200, bbox_inches='tight')

plt.show()