In [None]:
# Copyright 2025 Sony Corporation

# Notebook for draw figure 4
Note:Before runnning this notebook, please run generate_data.ipynb

In [None]:
import subprocess
import os
import glob
import shutil
import sys
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [None]:
workdir = os.getcwd()

In [None]:
#samples
samplenames = ["FP7000_34c","FP7000_34c","Levine_32dim", "Levine_13dim","Samusik_01","Samusik_all"]


In [None]:
#BL-flowsom best f-value
BL_Fval = pd.DataFrame({
    'Levine_32dim': [0.799],
    'Levine_13dim': [0.520],
    'Samusik_01':   [0.684],
    'Samusik_all': [0.694472],
    'FP7000_34c': [0.516],
    'FP7000_34c-pca': [0.516],
})

In [None]:
#best meta div num
best_metas = {"Levine_32dim": 28,
         "Levine_13dim": 48,
         "Samusik_01": 37,
         "Samusik_all": 38,
         "FP7000_34c": 20,
         "FP7000_34c-pca": 20 }           

In [None]:
#specify flosom directory
fsom_experiment_name = "Flowsom"

In [None]:
resdf = pd.DataFrame()
for sample_name in samplenames:
    print(sample_name)
    best_m = best_metas.get(sample_name) 
    print(best_m)
    directory = os.path.join(workdir,fsom_experiment_name,sample_name,"%d"%best_m, "10")
    files = [file for file in os.listdir(directory) if (file.startswith("vseed_f1hang_") and file.endswith("resh"))]
    df = pd.DataFrame()
    for file in files:
        file_path = os.path.join(directory, file)
        # Get the numeric part from the file name and use it as an index
        index = int(file.split('_')[2].split('.')[0])
        temp_df = pd.read_csv(file_path,header=None)
        # concat 
        temp_df['index'] = index
        temp_df.set_index('index', inplace=True)
        df = pd.concat([df, temp_df])
    t_df = df.rename(columns={0:sample_name})
    resdf = pd.concat([resdf,t_df],axis=1)

In [None]:
f_df = resdf.melt()

In [None]:
for sample in samplenames:
    print(resdf[sample].describe())

In [None]:
f_df = resdf.melt()

In [None]:
import matplotlib.patches as mpatches
import matplotlib.lines as mlines
fig = plt.figure(figsize=(8,4),dpi=300)
#fig, ax1 = plt.subplots(1,1)
sns.set(style="whitegrid")
sns.boxplot(x="variable",y="value",showmeans=False,data = f_df, color="b",width=0.3)
sns.boxplot(x="variable",y="value",showmeans=False,data = f_df[f_df["variable"] == "FP7000_34c-pca"], color="g",width=0.3)
i = 0
for sample_name in samplenames:
    F = BL_Fval.get(sample_name)
    plt.plot(i, F , marker='o', markersize=5, color='red', label="a", zorder=10)  # average       
    i = i+1

plt.xlabel('Dataset', fontsize=12)
plt.ylim(0,1)

blue_patch = mpatches.Patch(color='b', label='Flowsom random init.\n(average)')
green_patch = mpatches.Patch(color='g', label='Flowsom pca init.\n(average)')

red_triangle = mlines.Line2D([], [], color='red', marker='o', linestyle='None',
                          markersize=5, label='BL-FlowSOM')
plt.legend( handles = [red_triangle,blue_patch,green_patch],loc ="lower right")


In [None]:
fig.savefig("Fig4.png")