In [None]:
# Copyright 2025 Sony Corporation

# Plot the F value at iteration 10 of BL-Flowsom and Flowsom with error bars and values

In [None]:
import subprocess
import os
import glob
import shutil
import sys
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.patches as mpatches
import matplotlib.lines as mlines

In [None]:
workdir = os.getcwd()

In [None]:
workdir

In [None]:
meta_list = range(10,61,1)
fsom_experiment_name= "Flowsom" 
BL_experiment_name = "BL_FlowSOM"

In [None]:
samplenames = ["Levine_32dim","Levine_13dim","Samusik_01","Samusik_all"]
samplenames_BL = ["Levine_32dim","Levine_13dim","Samusik_01","Samusik_all"]

samplenames_FP = ["FP34c"]

In [None]:
xrange_dict = {"Levine_32dim": range(14,37,1),
               "Levine_13dim": range(24,50,1),
               "Samusik_01"  : range(24,39,1),
               "Samusik_all" : range(24,40,1),
               "FP34c-pca": range(14,21,1),
               "FP34c"    : range(14,21,1),
              }

In [None]:
for sample_name in samplenames:
    print(sample_name)
    fsom_exp_path = os.path.join(workdir,fsom_experiment_name,sample_name) 
    os.chdir(workdir)
    resdf = pd.DataFrame()
    meta_list = xrange_dict.get(sample_name) 
    for meta in meta_list:
        directory = os.path.join(fsom_exp_path,"%d"%meta,"10")    
        files = [file for file in os.listdir(directory) if (file.startswith("vseed_f1hang_") and file.endswith("resh"))]
        df = pd.DataFrame()
        for file in files:
            file_path = os.path.join(directory, file)
            # Get the numeric part from the file name and use it as an index
            index = int(file.split('_')[2].split('.')[0])
            temp_df = pd.read_csv(file_path,header=None)
            # concat 
            temp_df['index'] = index
            temp_df.set_index('index', inplace=True)
            df = pd.concat([df, temp_df])
        data_hang = df.rename(columns={0:"%d"%meta})
        resdf = pd.concat([resdf,data_hang],axis=1)
    d = pd.melt(resdf) #flowsom result
    plt.figure(figsize=(12,6),dpi=300)
    sns.set(style="whitegrid")
    sns.boxplot(x="variable",y="value",showmeans=False,data = d,width=0.3)
    plt.ylim(0, 1)
    plt.xlabel('meta', fontsize=12)
    plt.ylabel('F', fontsize=12)
    plt.title('%s meta vs F(flowsom 100 seed)'%sample_name, fontsize=14)
    plt.savefig("%s_meta_F.png"%sample_name)
    plt.close()

### BL-FlowSOM and Flowsom 


In [None]:
for sample_name in samplenames_BL:#sample
    print(sample_name)
    fsom_exp_path = os.path.join(workdir,fsom_experiment_name,sample_name) 
    BL_exp_path = os.path.join(workdir,BL_experiment_name,sample_name) 
    os.chdir(workdir)
    #read flowsom result
    resdf = pd.DataFrame()
    meta_list = xrange_dict.get(sample_name) 
    for meta in meta_list:
        directory = os.path.join(fsom_exp_path,"%d"%meta,"10")    
        files = [file for file in os.listdir(directory) if (file.startswith("vseed_f1hang_") and file.endswith("resh"))]
        df = pd.DataFrame()
        for file in files:
            file_path = os.path.join(directory, file)
            # Get the numeric part from the file name and use it as an index
            index = int(file.split('_')[2].split('.')[0])
            temp_df = pd.read_csv(file_path,header=None)
            # concat
            temp_df['index'] = index
            temp_df.set_index('index', inplace=True)
            df = pd.concat([df, temp_df])
        data_hang = df.rename(columns={0:"%d"%meta})
        resdf = pd.concat([resdf,data_hang],axis=1)
    d = pd.melt(resdf) #flowsom reulst
    max_index = resdf.mean().sort_values().idxmax()
    max_idx = int(max_index) - 10
    max_index2 = resdf.median().sort_values().idxmax()
    print(max_index)

    #read BL-FlowSOM result
    os.chdir(BL_exp_path)
    files = [file for file in os.listdir("./") if file.endswith('.hres')]
    df = pd.DataFrame()
    for file in files:
        file_path = os.path.join("./", file)
        if int(file.split("_")[4]) == 10: #iteration =10 only
            #  Get the numeric part from the file name and use it as an index
            index = int(file.split('_')[2])
            if index not in meta_list:
                continue
            temp_df = pd.read_csv(file_path,header=None)
            # concat
            temp_df['index'] = index
            temp_df.set_index('index', inplace=True)
            df = pd.concat([df, temp_df])
        else:
            pass
               
    dd = df.T.melt().rename(columns={"value":"BL"})
    print("BL:%s"%sample_name)
    max_meta = dd["index"][dd.idxmax()["BL"]]
    print(max_meta)
    
    print(dd)
    os.chdir(workdir)
    plt.figure(figsize=(12,6),dpi=300)
    sns.set(style="whitegrid")
    sns.boxplot(x="variable",y="value",showmeans=False,data = d,width=0.3)
    #sns.lineplot(dd, color="red")
    i = 0
    for meta in meta_list:   
        plt.plot(i, df[0][meta], marker="o", markersize=5, color='red', label="a",zorder=10)  # average
        i = i+1
    plt.ylim(0, 1)
    plt.xlabel('No. of meta clusters', fontsize=12)
    plt.ylabel('F1-score', fontsize=12)
    plt.title('%s'%sample_name, fontsize=14)
    blue_patch = mpatches.Patch(color='b', label='Flowsom\n(average)')
    red_triangle = mlines.Line2D([], [], color='red', marker='o', linestyle='None',
                          markersize=5, label='BL-FlowSOM')
    plt.legend( handles = [blue_patch, red_triangle],loc ="lower right")
    plt.savefig("BL-FSOM_Fval_%s_meta_F.png"%sample_name)
    plt.close()

# FP Dataset

In [None]:
for sample_name in samplenames_FP:
    print(sample_name)
    fsom_exp_path = os.path.join(workdir,fsom_experiment_name,sample_name) 
    BL_exp_path = os.path.join(workdir,BL_experiment_name,sample_name) 
    os.chdir(workdir)
    #read flowsom result
    resdf = pd.DataFrame()
    meta_list = xrange_dict.get(sample_name) 
    for meta in meta_list:
        directory = os.path.join(fsom_exp_path,"%d"%meta,"10")    
        files = [file for file in os.listdir(directory) if (file.startswith("vseed_f1hang_") and file.endswith("resh"))]
        df = pd.DataFrame()
        for file in files:
            file_path = os.path.join(directory, file)
            # Get the numeric part from the file name and use it as an index
            index = int(file.split('_')[2].split('.')[0])
            temp_df = pd.read_csv(file_path,header=None)
            # concat
            temp_df['index'] = index
            temp_df.set_index('index', inplace=True)
            df = pd.concat([df, temp_df])
        data_hang = df.rename(columns={0:"%d"%meta})
        resdf = pd.concat([resdf,data_hang],axis=1)
    d = pd.melt(resdf) #flowsom result
    max_index = resdf.mean().sort_values().idxmax()
    max_idx = int(max_index) - 10
    max_index2 = resdf.median().sort_values().idxmax()
    print(max_index2)

    #read BL-FlowSOM result
    os.chdir(BL_exp_path)
    files = [file for file in os.listdir("./") if file.endswith('.hres')]
    df = pd.DataFrame()
    for file in files:
        file_path = os.path.join("./", file)
        if int(file.split("_")[4]) == 10: #iteration =10 only
            # Get the numeric part from the file name and use it as an index
            index = int(file.split('_')[2])
            if index not in meta_list:
                continue
            temp_df = pd.read_csv(file_path,header=None)
            # concat
            temp_df['index'] = index
            temp_df.set_index('index', inplace=True)
            df = pd.concat([df, temp_df])
        else:
            pass
               
    dd = df.T.melt().rename(columns={"value":"BL"})
    
    os.chdir(workdir)
    plt.figure(figsize=(5,6),dpi=300)
    sns.set(style="whitegrid")
    sns.boxplot(x="variable",y="value",showmeans=False,data = d,width=0.3)
    #sns.lineplot(dd, color="red")
    i = 0
    for meta in meta_list:   
        plt.plot(i, df[0][meta], marker="o", markersize=5, color='red', label="a",zorder=10)  # averate
        i = i+1
    #plt.axvline(x=max_idx, color="red")
    plt.ylim(0, 1)
    plt.xlabel('No. of meta clusters', fontsize=12)
    plt.ylabel('F1-score', fontsize=12)
    plt.title('FP_34dim', fontsize=14)
    blue_patch = mpatches.Patch(color='b', label='Flowsom\n(average)')
    #red_triangle = mlines.Line2D([], [], color='orange',  linestyle='dashed',
    #                      markersize=5, label='BL-FlowSOM')
    red_triangle = mlines.Line2D([], [], color='red', marker='o', linestyle='None',
                          markersize=5, label='BL-FlowSOM')
    plt.legend( handles = [blue_patch, red_triangle],loc ="lower right")
    plt.savefig("BL-FSOM_Fval_%s_meta_F.png"%sample_name)
    plt.close()

In [None]:
#PCA
for sample_name in samplenames_FP:
    print(sample_name)
    #fsom_exp_path = os.path.join(workdir,fsom_experiment_name,sample_name) 
    fsom_exp_path = os.path.join(workdir,fsom_experiment_name,sample_name+"-pca") 
    BL_exp_path = os.path.join(workdir,BL_experiment_name,sample_name) 
    os.chdir(workdir)
    #read flowsom result
    resdf = pd.DataFrame()
    meta_list = xrange_dict.get(sample_name) 
    for meta in meta_list:
        directory = os.path.join(fsom_exp_path,"%d"%meta,"10")    
        files = [file for file in os.listdir(directory) if (file.startswith("vseed_f1hang_") and file.endswith("resh"))]
        df = pd.DataFrame()
        for file in files:
            file_path = os.path.join(directory, file)
            # Get the numeric part from the file name and use it as an index
            index = int(file.split('_')[2].split('.')[0])
            temp_df = pd.read_csv(file_path,header=None)
            # concat
            temp_df['index'] = index
            temp_df.set_index('index', inplace=True)
            df = pd.concat([df, temp_df])
        data_hang = df.rename(columns={0:"%d"%meta})
        resdf = pd.concat([resdf,data_hang],axis=1)
    d = pd.melt(resdf) #flowsom result
    max_index = resdf.mean().sort_values().idxmax()
    max_idx = int(max_index) - 10
    max_index2 = resdf.median().sort_values().idxmax()
    print(max_index2)

    #read BL-FlowSOM result
    os.chdir(BL_exp_path)
    files = [file for file in os.listdir("./") if file.endswith('.hres')]
    df = pd.DataFrame()
    for file in files:
        file_path = os.path.join("./", file)
        if int(file.split("_")[4]) == 10: #iteration =10 only
            # Get the numeric part from the file name and use it as an index
            index = int(file.split('_')[2])
            if index not in meta_list:
                continue
            temp_df = pd.read_csv(file_path,header=None)
            # concat
            temp_df['index'] = index
            temp_df.set_index('index', inplace=True)
            df = pd.concat([df, temp_df])
        else:
            pass
               
    dd = df.T.melt().rename(columns={"value":"BL"})
    
    os.chdir(workdir)
    plt.figure(figsize=(5,6),dpi=300)
    sns.set(style="whitegrid")
    sns.boxplot(x="variable",y="value",showmeans=False,data = d,color="g",width=0.3)
    #sns.lineplot(dd, color="red")
    i = 0
    for meta in meta_list:   
        plt.plot(i, df[0][meta], marker="o", markersize=5, color='red', label="a",zorder=10)  # averate
        i = i+1
    #plt.axvline(x=max_idx, color="red")
    plt.ylim(0, 1)
    plt.xlabel('No. of meta clusters', fontsize=12)
    plt.ylabel('F1-score', fontsize=12)
    plt.title('FP_34dim', fontsize=14)
    blue_patch = mpatches.Patch(color='g', label='Flowsom(PCA init.)\n(average)')
    #red_triangle = mlines.Line2D([], [], color='orange',  linestyle='dashed',
    #                      markersize=5, label='BL-FlowSOM')
    red_triangle = mlines.Line2D([], [], color='red', marker='o', linestyle='None',
                          markersize=5, label='BL-FlowSOM')
    plt.legend( handles = [blue_patch, red_triangle],loc ="lower right")
    plt.savefig("BL-FSOM_Fval_%s-pca_meta_F.png"%sample_name)
    plt.close()