In [1]:
import os

import pandas as pd

import numpy as np

from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
DATA_DIR = "./radiomicsFeaturesICC"
OUT_DIR = "./figures"
AUG_TYPES = ["in_plane_random","in_plane_systematic","out_plane","inout_plane_random","inout_plane_systematic"]
SOI = ["t2w","adc","sub_win","sub_wout"]

# ICC Plots

In [3]:
OUT_FOLDER = "plots"

In [4]:
for aug_type in AUG_TYPES:
    
    int_df = pd.read_csv(os.path.join(DATA_DIR, f"{aug_type}_internal.csv"), index_col=0).sort_values(by=["feat_family","feature", "filter"])
    ext_df = pd.read_csv(os.path.join(DATA_DIR, f"{aug_type}_external.csv"), index_col=0).sort_values(by=["feat_family","feature", "filter"])
    
    int_df["full_feature"] = int_df["feat_family"]+"_"+int_df["feature"]
    int_df["stable"] = (int_df["ci_down"]>0.9).astype(int)
    
    ext_df["full_feature"] = ext_df["feat_family"]+"_"+ext_df["feature"]
    ext_df["stable"] = (ext_df["ci_down"]>0.9).astype(int)
    
    for sequence in SOI:
        
        out_path = os.path.join(OUT_DIR,OUT_FOLDER,sequence)
        
        if not os.path.exists(out_path):
            os.makedirs(out_path)
            
        seq_df = {}

        seq_df["internal"] = int_df[int_df.sequence==sequence].reset_index(drop=True)
        seq_df["external"] = ext_df[ext_df.sequence==sequence.split("_")[0]].reset_index(drop=True)
     
        idx = seq_df["internal"].groupby(["feat_family","feature"])["stable"].transform(max)==seq_df["internal"]["stable"]#will select the best filter; duplicates can exist

        for ds_type in seq_df:
            
            uf_df = seq_df[ds_type][seq_df[ds_type]["filter"]=="original"]
            bf_df = seq_df[ds_type][idx&(seq_df[ds_type]["filter"]!="original")]
            
            merged_df = pd.concat([uf_df,bf_df])
            
            groups = ["original"] + sorted(bf_df["filter"].unique()) #aligning the group original, filter1,filter2,etc
            
            fig, ax = plt.subplots(figsize=(12,8))

            for group in groups:

                x = merged_df[merged_df["filter"]==group].full_feature
                y = merged_df[merged_df["filter"]==group].icc_value

                lower_ci = merged_df[merged_df["filter"]==group].ci_down
                upper_ci = merged_df[merged_df["filter"]==group].ci_up

                lower_error = y - lower_ci
                upper_error = upper_ci - y

                ax.errorbar(x, y, yerr=[lower_error, upper_error], fmt='o', capsize=4, label=group)

                plt.fill_between(x, lower_ci, upper_ci, alpha=0.2)

            ax.axhline(y=0.9, linestyle="--",color='green')
            ax.set_title('Original v/s Best Filtered Features')

            ax.set_xlabel('Features')
            ax.tick_params(axis='x', labelrotation=90, labelsize=10)

            ax.set_ylabel('ICC Value')
            ax.set_ylim([0,1])
            ax.legend(title="Filters",loc='center left', bbox_to_anchor=(1.0, 0.5))

            
            plt.savefig(os.path.join(out_path,f"{aug_type}_{ds_type}.png"),bbox_inches = 'tight',
                pad_inches = 0,dpi=300)

            # close the figure to avoid memory leak
            plt.close(fig)
   

# Overlap ICC Plots

In [5]:
OUT_FOLDER = "overlap_plots"

In [6]:
for aug_type in AUG_TYPES:
    
    int_df = pd.read_csv(os.path.join(DATA_DIR, f"{aug_type}_internal.csv"), index_col=0).sort_values(by=["feat_family","feature", "filter"])
    ext_df = pd.read_csv(os.path.join(DATA_DIR, f"{aug_type}_external.csv"), index_col=0).sort_values(by=["feat_family","feature", "filter"])

    int_df["full_feature"] = int_df["feat_family"]+"_"+int_df["feature"]
    int_df["stable"] = (int_df["ci_down"]>0.9).astype(int)
    
    ext_df["full_feature"] = ext_df["feat_family"]+"_"+ext_df["feature"]
    ext_df["stable"] = (ext_df["ci_down"]>0.9).astype(int)
    
    for sequence in SOI:
        
        out_path = os.path.join(OUT_DIR,OUT_FOLDER,sequence)
        
        if not os.path.exists(out_path):
            os.makedirs(out_path)

        seq_df = {}
        
        seq_df["internal"] = int_df[int_df.sequence==sequence].reset_index(drop=True)
        seq_df["external"] = ext_df[ext_df.sequence==sequence.split("_")[0]].reset_index(drop=True)
        
        uf_int = seq_df["internal"][seq_df["internal"]["filter"]=="original"]
        uf_ext = seq_df["external"][seq_df["external"]["filter"]=="original"]
        uf_overlap = pd.concat([uf_int,uf_ext]).groupby(["feat_family","feature","filter"]).min().reset_index()
        
        idx = seq_df["internal"].groupby(["feat_family","feature"])["stable"].transform(max)==seq_df["internal"]["stable"]#will select the best filter; duplicates can exist

        bf_int = seq_df["internal"][idx&(seq_df["internal"]["filter"]!="original")]
        bf_ext = seq_df["external"][idx&(seq_df["external"]["filter"]!="original")]
        
        bf_overlap = pd.concat([bf_int,bf_ext]).groupby(["feat_family","feature","filter"]).min().reset_index()
       
        idx = bf_overlap.groupby(["feat_family","feature"])["ci_down"].transform(max)==bf_overlap["ci_down"]#will select the best filter; duplicates can exist
        max_bf_overlap = bf_overlap[idx]
        
        
        overlap_df = pd.concat([uf_overlap, max_bf_overlap])
        groups = ["original"] + sorted(bf_overlap["filter"].unique())
        
        fig, ax = plt.subplots(figsize=(12,8))

        for group in groups:

            x = overlap_df[overlap_df["filter"]==group].full_feature
            y = overlap_df[overlap_df["filter"]==group].icc_value

            lower_ci = overlap_df[overlap_df["filter"]==group].ci_down
            upper_ci = overlap_df[overlap_df["filter"]==group].ci_up

            lower_error = y - lower_ci
            upper_error = upper_ci - y

            ax.errorbar(x, y, yerr=[lower_error, upper_error], fmt='o', capsize=4, label=group)

            plt.fill_between(x, lower_ci, upper_ci, alpha=0.2)

        ax.axhline(y=0.9, linestyle="--",color='green')
        ax.set_title('Original v/s Robust Filtered Features')

        ax.set_xlabel('Features')
        ax.tick_params(axis='x', labelrotation=90, labelsize=10)

        ax.set_ylabel('ICC Value')
        ax.set_ylim([0,1])
        ax.legend(title="Filters",loc='center left', bbox_to_anchor=(1.0, 0.5))


        plt.savefig(os.path.join(out_path,f"{aug_type}.png"),bbox_inches = 'tight',
            pad_inches = 0,dpi=300)

        # close the figure to avoid memory leak
        plt.close(fig)
  


# Heatmaps

In [7]:
OUT_FOLDER = "heatmaps"

ICC_THRESHOLD = 0.90

In [8]:
MAP_DIM = (4,22)#4 feature family, 72 features in total; 22 is the maximum possibile number of members in a feature family

In [9]:
for aug_type in AUG_TYPES:
    
    int_df = pd.read_csv(os.path.join(DATA_DIR, f"{aug_type}_internal.csv"), index_col=0).sort_values(by=["feat_family","feature", "filter"])
    ext_df = pd.read_csv(os.path.join(DATA_DIR, f"{aug_type}_external.csv"), index_col=0).sort_values(by=["feat_family","feature", "filter"])
    
    int_df["full_feature"] = int_df["feat_family"]+"_"+int_df["feature"]
    int_df["stable"] = (int_df["ci_down"]>0.9).astype(int)
    
    ext_df["full_feature"] = ext_df["feat_family"]+"_"+ext_df["feature"]
    ext_df["stable"] = (ext_df["ci_down"]>0.9).astype(int)
    
    for sequence in SOI:
        
        out_path = os.path.join(OUT_DIR,OUT_FOLDER,sequence)
        
        if not os.path.exists(out_path):
            os.makedirs(out_path)

        seq_df = {}
        seq_df["internal"] = int_df[int_df.sequence==sequence].reset_index(drop=True)
        seq_df["external"] = ext_df[ext_df.sequence==sequence.split("_")[0]].reset_index(drop=True)
        
        uf_int = seq_df["internal"][seq_df["internal"]["filter"]=="original"]
        uf_ext = seq_df["external"][seq_df["external"]["filter"]=="original"]
        uf_overlap = pd.concat([uf_int,uf_ext]).groupby(["feat_family","feature","filter"]).min().reset_index()
    
        idx = seq_df["internal"].groupby(["feat_family","feature"])["stable"].transform(max)==seq_df["internal"]["stable"]#will select the best filter; duplicates can exist

        bf_int = seq_df["internal"][idx&(seq_df["internal"]["filter"]!="original")].groupby(["feat_family","feature"]).max().reset_index()
        bf_ext = seq_df["external"][idx&(seq_df["external"]["filter"]!="original")].groupby(["feat_family","feature"]).max().reset_index()
        bf_overlap = pd.concat([bf_int,bf_ext]).groupby(["feat_family","feature","filter"]).min().reset_index()
        
        map_dict = {
            "unfiltered":{"internal":uf_int,"external":uf_ext, "overlap":uf_overlap},
            "filtered":{"internal":bf_int, "external":bf_ext, "overlap":bf_overlap}
        }
        
        fig, axes = plt.subplots(3,2, figsize = (15,10))
        
        #plt.show()
        
        for i,stat in enumerate(map_dict):
            
            for j,ds_type in enumerate(map_dict[stat]):
                
                axes[j,i].set_title(stat+"_"+ds_type)
                
                temp_df = map_dict[stat][ds_type].sort_values(by = ["feat_family","feature"])
                map_arr = np.ones(MAP_DIM) * -1
                
                feat_families = sorted(temp_df.feat_family.unique())
                
                for k,feat_family in enumerate(feat_families):
                    
                    arr = temp_df[temp_df.feat_family==feat_family].ci_down.to_numpy()
                    map_arr[k,:len(arr)] = arr
                
                cmap_stable = sns.light_palette("seagreen", as_cmap=True)
                cmap_unstable = sns.dark_palette("gray",as_cmap=True)
                map_arr[map_arr>ICC_THRESHOLD] = 1
                #annot_matrix = 

                sns.heatmap(map_arr, vmin=0, vmax=1, linewidths=2, mask=map_arr<1, cmap=cmap_stable, square=True, cbar=False, ax=axes[j,i])
                sns.heatmap(map_arr, vmin=0, vmax=1, linewidths=2, mask=(map_arr==-1)|(map_arr==1), square=True, cmap=cmap_unstable, cbar=False, yticklabels=feat_families, ax=axes[j,i])
               
        #plt.subplots_adjust(hspace=0, wspace=0)
        

        plt.tight_layout()
        plt.savefig(os.path.join(out_path,f"{aug_type}.png"),bbox_inches = 'tight',
            pad_inches = 0,dpi=300)

        # close the figure to avoid memory leak
        plt.close(fig)
  
            
    

# Bar Plot for Filters & Sequences

In [10]:
OUT_FOLDER = "barplots"

In [11]:
out_path = os.path.join(OUT_DIR,OUT_FOLDER)
    
if not os.path.exists(out_path):
    os.makedirs(out_path)

for aug_type in AUG_TYPES:

    int_df = pd.read_csv(os.path.join(DATA_DIR, f"{aug_type}_internal.csv"), index_col=0).sort_values(by=["sequence","feat_family","feature","filter"])
    int_df["stable"] = (int_df["ci_down"]>0.9).astype(int)
    int_df["filter_family"] = [i.split("-")[0] for i in int_df["filter"].values]


    ext_df = pd.read_csv(os.path.join(DATA_DIR, f"{aug_type}_external.csv"), index_col=0).sort_values(by=["sequence","feat_family","feature","filter"])
    ext_df["stable"] = (ext_df["ci_down"]>0.9).astype(int)
    ext_df["filter_family"] = [i.split("-")[0] for i in ext_df["filter"].values]
    
    idx = int_df.groupby(["feat_family","feature"])["stable"].transform(max)==int_df["stable"]#will select the best filter; duplicates can exist
    
    ov_dfs = []
    cols_to_keep = ["sequence","filter_family","filter","feature","stable"]
    
    for sequence in SOI:
        
        seq_int_df = int_df[int_df.sequence==sequence].sort_values(by=["feat_family","feature","filter"]).reset_index(drop=True)
        seq_ext_df = ext_df[ext_df.sequence==sequence.split("_")[0]].sort_values(by=["feat_family","feature", "filter"]).reset_index(drop=True)
        
        idx = seq_int_df.groupby(["feat_family","feature"])["stable"].transform(max)==seq_int_df["stable"]#will select the best filter; duplicates can exist
    
        
        _int_df = seq_int_df[idx][cols_to_keep]
        _ext_df = seq_ext_df[idx][cols_to_keep]
        
        bRobust = _int_df.stable&_ext_df.stable
        
        ov_df = _int_df.copy()
        ov_df["robust"] = bRobust.astype(int)
        
        ov_dfs.append(ov_df)
        
     
    
    ov_df = pd.concat(ov_dfs)
    sns.set_theme(style="whitegrid", palette="muted")
    
    fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(12,6))
#     sns.catplot(data=ov_df, y="filter", hue="robust", kind="count", ax=ax1)
#     sns.catplot(data=ov_df, y="filter_family", hue="robust", kind="count", ax=ax2)

    filter_families = list(set(ov_df.filter_family)-{"original"})
    sns.countplot(data=ov_df, y="filter_family", hue="robust", ax=ax1, order=["original"]+sorted(filter_families))
    filters = list(set(ov_df["filter"])-{"original"})
    sns.countplot(data=ov_df, y="filter", hue="robust", ax=ax2, order=["original"] + sorted(filters))
    
    plt.tight_layout()
    plt.savefig(os.path.join(out_path,f"{aug_type}.png"),bbox_inches = 'tight',
        pad_inches = 0,dpi=300)

    # close the figure to avoid memory leak
    plt.close(fig)
    
    
    #idx = (_int_df.stable==1)&(_ext_df.stable==1)
    
  
    
    
 


# Table: Dice Summary

In [12]:
OUT_DIR = "./csv_files"

In [13]:
FEAT_DIR = r"C:\Users\sithi\Research\INT-Projects\stability\radiomicsFeatures"

In [14]:
OUT_FOLDER = "dice_summary"

In [15]:
out_path = os.path.join(OUT_DIR, OUT_FOLDER)
if not os.path.exists(out_path):
    os.makedirs(out_path)

for ds_type in ["internal","external"]:
    out_df = {}
    
    for aug_type in AUG_TYPES:

        df = pd.read_csv(os.path.join(FEAT_DIR, f"{aug_type}_{ds_type}.csv"), index_col=0)
        
        sequences = df.sequence.unique()
        
        for sequence in sequences:
        
            out_df.setdefault((ds_type, sequence, "mean"),[]).append(df[df.sequence==sequence].dice.mean().round(2))
            out_df.setdefault((ds_type, sequence, "std"),[]).append(df[df.sequence==sequence].dice.std().round(2))
            
            
    df = pd.DataFrame(out_df,index=AUG_TYPES)
    df.index.name = "augmentation"
    
    df.to_csv(os.path.join(out_path,f"{ds_type}.csv"))
    

# Table - Overall across all the augmentation scenarios

In [16]:
ICC_THRESHOLD = 0.90

FEAT_FAMILIES = ["firstorder", "glcm", "glrlm", "glszm"]
OUT_DIR = "./csv_files"
OUT_FOLDER = "overall"

In [17]:
int_dfs = []
ext_dfs = []

for aug_type in AUG_TYPES:

    int_df = pd.read_csv(os.path.join(DATA_DIR, f"{aug_type}_internal.csv"), index_col=0)
    int_df["aug_type"] = [aug_type]*len(int_df)
    int_df["stable"] = (int_df["ci_down"]>0.9).astype(int)
    int_dfs.append(int_df)

    ext_df = pd.read_csv(os.path.join(DATA_DIR, f"{aug_type}_external.csv"), index_col=0)
    ext_df["aug_type"] = [aug_type]*len(ext_df)
    ext_df["stable"] = (ext_df["ci_down"]>0.9).astype(int)
    ext_dfs.append(ext_df)

merged_int_df = pd.concat(int_dfs).sort_values(by=["sequence","aug_type","feat_family","feature", "filter"]).reset_index(drop=True)
merged_ext_df = pd.concat(ext_dfs).sort_values(by=["sequence","aug_type","feat_family","feature", "filter"]).reset_index(drop=True)

out_path = os.path.join(OUT_DIR, OUT_FOLDER)
if not os.path.exists(out_path):
    os.makedirs(out_path)

for sequence in SOI:
    
    index = []
    
    dfs = []
    
    for aug_type in AUG_TYPES:

        int_df = merged_int_df[(merged_int_df.sequence==sequence)&(merged_int_df.aug_type==aug_type)].drop(["sequence","aug_type"], axis=1).reset_index(drop=True)
        ext_df = merged_ext_df[(merged_ext_df.sequence==sequence.split("_")[0])&(merged_ext_df.aug_type==aug_type)].drop(["sequence","aug_type"], axis=1).reset_index(drop=True)
      
        uf_int = int_df[int_df["filter"]=="original"].reset_index()
        uf_ext = ext_df[ext_df["filter"]=="original"].reset_index()
        
        uf_overlap = pd.concat([uf_int,uf_ext]).groupby(["feat_family","feature","filter"]).min().reset_index()
        
        idx = int_df.groupby(["feat_family","feature"])["stable"].transform(max)==int_df["stable"]#best filters(s) or non_filter for each feature

        
        bf_int = int_df[idx&int_df["filter"]!="original"]
        bf_ext = ext_df[idx&ext_df["filter"]!="original"]
        bf_overlap = pd.concat([bf_int,bf_ext]).groupby(["feat_family","feature","filter"]).min().reset_index()
        
        #idx = bf_overlap.groupby(["feat_family","feature"])["ci_down"].transform(max)==bf_overlap["ci_down"]#will select the best filter; duplicates can exist
        max_bf_overlap = bf_overlap.groupby(["feat_family","feature"]).max().reset_index()
        max_bf_int = bf_int.groupby(["feat_family","feature"]).max().reset_index()
        
        total_feats = 0
        
        row = {}
        
        column_order = []
        
        
        for feat_family in FEAT_FAMILIES:
            
            
            num_feats = len(uf_int[uf_int.feat_family==feat_family])
            
            o_stable = uf_int[uf_int.feat_family==feat_family].stable.sum()
            o_robust = uf_overlap[uf_overlap.feat_family==feat_family].stable.sum()#sum((uf_int_df[uf_int_df.feat_family==feat_family].stable)*(uf_ext_df[uf_ext_df.feat_family==feat_family].stable))
            
        
            
            bf_stable = max_bf_int[max_bf_int.feat_family==feat_family].stable.sum()
            bf_robust = max_bf_overlap[max_bf_overlap.feat_family==feat_family].stable.sum()#sum((bf_int_df[bf_int_df.feat_family==feat_family].stable==1)&(bf_ext_df[bf_ext_df.feat_family==feat_family].stable==1))
            
            
            row[(sequence, feat_family,"O", "S")] = [np.round(o_stable/num_feats,2)]
            row[(sequence, feat_family,"O", "R")] = [np.round(o_robust/num_feats,2)]
            
            row[(sequence, feat_family,"BF", "S")] = [np.round(bf_stable/num_feats,2)]
            row[(sequence, feat_family, "BF", "R")] = [np.round(bf_robust/num_feats,2)]
            
            row.setdefault((sequence, "Overall", "O","S"),[]).append(o_stable)
            row.setdefault((sequence, "Overall", "O","R"),[]).append(o_robust)
            row.setdefault((sequence, "Overall", "BF","S"),[]).append(bf_stable)
            row.setdefault((sequence, "Overall", "BF","R"),[]).append(bf_robust)
            
            column_order = [(i,feat_family,j,k) for i,feat_family,j,k in row.keys() if feat_family in FEAT_FAMILIES]
            
            total_feats += num_feats
        
            
        row[(sequence, "Overall", "O","S")] = np.round(sum(row[(sequence, "Overall", "O","S")])/total_feats,2)
        row[(sequence, "Overall", "O","R")] = np.round(sum(row[(sequence, "Overall", "O","R")])/total_feats,2)
        row[(sequence, "Overall", "BF","S")] = np.round(sum(row[(sequence, "Overall", "BF","S")])/total_feats,2)
        row[(sequence, "Overall", "BF","R")] = np.round(sum(row[(sequence, "Overall", "BF","R")])/total_feats,2)
        
        columns = row.keys()
        column_order = [key for key in columns if "Overall" not in str(key)] + [key for key in columns if "Overall" in str(key)]
        
        dfs.append(pd.DataFrame(row,index=[aug_type]))
        
    out_df = pd.concat(dfs)
    out_df = out_df.reindex(columns=column_order)
    
    out_df.index.name = "augmentation"
    out_df.to_csv(os.path.join(out_path,f"{sequence}.csv"))
    
    #display(out_df)

        


# Table - Extracted Feature Overview

In [18]:
OUT_FOLDER = "feats_overview"

In [19]:
out_path = os.path.join(OUT_DIR, OUT_FOLDER)
if not os.path.exists(out_path):
    os.makedirs(out_path)

In [20]:
df = pd.read_csv(os.path.join(DATA_DIR, f"inout_plane_systematic_internal.csv"), index_col=0)

In [21]:
out_df = df.groupby(["feat_family","feature"])["filter"].apply(set).reset_index()
out_df["filter"] = out_df["filter"].apply(lambda x:sorted(x))
pd.DataFrame(out_df).to_csv(os.path.join(out_path,f"output.csv"))

# Table - Robust Features for each sequence, Export as excel

In [22]:
ICC_THRESHOLD = 0.90
OUT_DIR = "./csv_files"

OUT_FOLDER = "robust_feats"

In [23]:
out_path = os.path.join(OUT_DIR, OUT_FOLDER)
if not os.path.exists(out_path):
    os.makedirs(out_path)

for aug_type in AUG_TYPES:
    
    int_df = pd.read_csv(os.path.join(DATA_DIR, f"{aug_type}_internal.csv"), index_col=0).sort_values(by=["feat_family","feature", "filter"])
    ext_df = pd.read_csv(os.path.join(DATA_DIR, f"{aug_type}_external.csv"), index_col=0).sort_values(by=["feat_family","feature", "filter"])

    int_df["full_feature"] = int_df["feat_family"]+"_"+int_df["feature"]
    int_df["stable"] = (int_df["ci_down"]>0.9).astype(int)
    
    ext_df["full_feature"] = ext_df["feat_family"]+"_"+ext_df["feature"]
    ext_df["stable"] = (ext_df["ci_down"]>0.9).astype(int)
    
    dfs = []
    
    for sequence in SOI:
        
        seq_df = {}
        
        seq_df["internal"] = int_df[int_df.sequence==sequence].reset_index(drop=True)
        seq_df["external"] = ext_df[ext_df.sequence==sequence.split("_")[0]].reset_index(drop=True)
        
        uf_int = seq_df["internal"][seq_df["internal"]["filter"]=="original"]
        uf_ext = seq_df["external"][seq_df["external"]["filter"]=="original"]
        uf_overlap = pd.concat([uf_int,uf_ext]).groupby(["feat_family","feature","filter"]).min().reset_index()
        
        idx = seq_df["internal"].groupby(["feat_family","feature"])["stable"].transform(max)==seq_df["internal"]["stable"]#will select the best filter; duplicates can exist

        bf_int = seq_df["internal"][idx&(seq_df["internal"]["filter"]!="original")]
        bf_ext = seq_df["external"][idx&(seq_df["external"]["filter"]!="original")]
        
        bf_overlap = pd.concat([bf_int,bf_ext]).groupby(["feat_family","feature","filter"]).min().reset_index()
        cf_overlap = pd.concat([bf_overlap, uf_overlap])
    
        
        out_df = pd.DataFrame(cf_overlap[cf_overlap.stable==1].groupby(["sequence","feat_family","feature"])["filter"].apply(set)).reset_index()
        out_df["filter"] = out_df["filter"].apply(lambda x:sorted(x))
        dfs.append(out_df)

        
    out_df = pd.concat(dfs)
    out_df.to_csv(os.path.join(out_path,f"{aug_type}.csv"))


# WIP

In [None]:
# import matplotlib.pyplot as plt
# import numpy as np

# # Generate some sample data
# x = np.array([1, 2, 3, 4, 5])
# y = np.array([1.2, 2.3, 3.5, 4.1, 5.2])
# lower_error = np.array([0.1, 0.2, 0.3, 0.4, 0.5])
# upper_error = np.array([0.2, 0.3, 0.4, 0.1, 0.5])

# # Calculate the confidence intervals
# lower_ci = y - lower_error
# upper_ci = y + upper_error

# # Plot the error bars
# plt.errorbar(x, y, yerr=[lower_error, upper_error], fmt='o', capsize=4)

# # Plot the confidence intervals
# plt.fill_between(x, lower_ci, upper_ci, alpha=0.2)

# # Set the plot title and axis labels
# plt.title('Error Bar Plot with Confidence Intervals')
# plt.xlabel('X')
# plt.ylabel('Y')

# # Show the plot
# plt.show()

In [None]:
# import matplotlib.pyplot as plt
# import numpy as np

# # Generate some example data
# np.random.seed(1)
# x = np.arange(0, 10, 1)
# y1 = np.random.normal(loc=0, scale=1, size=10)
# y2 = np.random.normal(loc=2, scale=1, size=10)
# y3 = np.random.normal(loc=4, scale=1, size=10)
# y_err1 = np.random.normal(loc=0, scale=0.5, size=10)
# y_err2 = np.random.normal(loc=0, scale=0.5, size=10)
# y_err3 = np.random.normal(loc=0, scale=0.5, size=10)

# # Create a line plot with error bars for each group
# fig, ax = plt.subplots()
# ax.errorbar(x, y1, yerr=y_err1, fmt='-o', label='Group 1')
# ax.errorbar(x, y2, yerr=y_err2, fmt='-o', label='Group 2')
# ax.errorbar(x, y3, yerr=y_err3, fmt='-o', label='Group 3')
# ax.set_xlabel('X-axis label')
# ax.set_ylabel('Y-axis label')
# ax.set_title('Line plot with error bars for multiple groups')
# ax.legend()

# plt.show()


In [None]:
# import matplotlib.pyplot as plt
# import numpy as np

# # Generate some example data
# np.random.seed(1)
# x = np.arange(0, 10, 1)
# y1 = np.random.normal(loc=0, scale=1, size=10)
# y2 = np.random.normal(loc=2, scale=1, size=10)
# y3 = np.random.normal(loc=4, scale=1, size=10)
# y_err1 = np.random.normal(loc=0, scale=0.5, size=10)
# y_err2 = np.random.normal(loc=0, scale=0.5, size=10)
# y_err3 = np.random.normal(loc=0, scale=0.5, size=10)

# # Create a stacked line plot with confidence intervals and different markers
# fig, ax = plt.subplots()
# ax.plot(x, y1, marker='o', label='Group 1')
# ax.fill_between(x, y1-y_err1, y1+y_err1, alpha=0.2)
# ax.plot(x, y2, marker='s', label='Group 2')
# ax.fill_between(x, y2-y_err2, y2+y_err2, alpha=0.2)
# ax.plot(x, y3, marker='^', label='Group 3')
# ax.fill_between(x, y3-y_err3, y3+y_err3, alpha=0.2)
# ax.set_xlabel('X-axis label')
# ax.set_ylabel('Y-axis label')
# ax.set_title('Stacked line plot with confidence intervals and different markers')
# ax.legend()

# plt.show()




In [None]:
# groups = max_seq_df["filter"].unique()

# # Plot the error bars

# fig, ax = plt.subplots(figsize=(8, 12))

# for group in groups:
    
#     x = max_seq_df[max_seq_df["filter"]==group].icc_value
#     y = max_seq_df[max_seq_df["filter"]==group].full_feature
    
#     lower_ci = max_seq_df[max_seq_df["filter"]==group].ci_down
#     upper_ci = max_seq_df[max_seq_df["filter"]==group].ci_up
    
#     lower_error = x - lower_ci
#     upper_error = upper_ci - x
    
#     fmt = 'o' if group=="original" else '^'
#     ax.errorbar(x, y, xerr=[lower_error, upper_error], fmt=fmt, capsize=4, label=group)
#     #ax.errorbar(x, y, yerr=[lower_error, upper_error], fmt='o', capsize=4, label=group)

#     #Plot the confidence intervals
    
#     #if group=="original":
#     #plt.fill_between(y, lower_ci, upper_ci, alpha=0.2)
    
# ax.set_xlabel('ICC Value')
# ax.set_ylabel('ICC Value')
# ax.set_title('Line plot with error bars for multiple groups')
# ax.legend()

# # # Set the plot title and axis labels
# # plt.title('Error Bar Plot with Confidence Intervals')
# # plt.xlabel('X')
# # plt.ylabel('Y')

# # Show the plot

# plt.show()

In [None]:
# import matplotlib.pyplot as plt
# import numpy as np

# # Generate some sample data
# x = np.array(["f1", "f2", "f3", "f4", "f5"])
# y1 = np.array([0.83, 0.93, 0.93, 0.89, 1.0])
# y2 = np.array([0.83, 0.92, 0.94, 0.90, 0.9])

# lower_ci = np.array([0.83, 0.90, 0.93, 0.84, 0.95])
# upper_ci = np.array([0.89, 0.95, 0.94, 0.91, 1.0])

# lower_error = y1 - lower_ci
# upper_error = upper_ci - y1


# # # Calculate the confidence intervals
# # lower_ci = y - lower_error
# # upper_ci = y + upper_error

# # Plot the error bars
# plt.errorbar(x, y1, yerr=[lower_error, upper_error], fmt='o', capsize=4, label="Group 1")

# #Plot the confidence intervals
# plt.fill_between(x, lower_ci, upper_ci, alpha=0.2)

# # Set the plot title and axis labels
# plt.title('Error Bar Plot with Confidence Intervals')
# plt.xlabel('X')
# plt.ylabel('Y')

# # Show the plot
# plt.show()

In [None]:
# import matplotlib.pyplot as plt
# import numpy as np

# # Generate some sample data
# x = np.array(['A', 'B', 'C', 'D', 'E'])
# y = np.array([1.2, 2.3, 3.5, 4.1, 5.2])
# lower_ci = np.array([0.1, 0.2, 0.3, 0.4, 0.5])
# upper_ci = np.array([0.2, 0.3, 0.4, 0.1, 0.5])

# # Set the figure size and font style
# plt.figure(figsize=(8, 6))
# plt.rcParams['font.family'] = 'sans-serif'
# plt.rcParams['font.size'] = 12

# # Plot the error bars
# plt.errorbar(x, y, yerr=[lower_ci, upper_ci], fmt='o', capsize=4,
#              color='black', ecolor='gray', elinewidth=1, capthick=1)

# # Set the plot title and axis labels
# plt.title('Error Bars with Categorical X-Axis')
# plt.xlabel('Category')
# plt.ylabel('Value')

# # Adjust the plot limits and ticks
# plt.ylim([np.min(y) - 0.5, np.max(y) + 0.5])
# plt.yticks(np.arange(np.min(y), np.max(y) + 0.5, 0.5))

# # Show the plot
# plt.show()