In [None]:
from autogluon.tabular import TabularPredictor
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import seaborn as sns
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
label_codes = {8: 'retinoid receptor agonist',
                        9: 'topoisomerase inhibitor',
                        0: 'ATPase inhibitor',
                        10: 'tubulin polymerization inhibitor',
                        6: 'dmso',
                        7: 'protein synthesis inhibitor',
                        5: 'PARP inhibitor',
                        1: 'Aurora kinase inhibitor',
                        3: 'HSP inhibitor',
                        2: 'HDAC inhibitor',
                        4: 'JAK inhibitor'}

In [None]:
predictor = TabularPredictor.load("/home/jovyan/share/data/analyses/benjamin/Single_cell_supervised/AutoGluon_training_full_BF_good")

In [None]:
predictor_best = TabularPredictor.load("/home/jovyan/share/data/analyses/benjamin/Single_cell_supervised/AutoGluon_training_full_BF_3")

In [None]:
predictor_CP = TabularPredictor.load("/home/jovyan/share/data/analyses/benjamin/Single_cell_supervised/AutoGluon_training_full_BF_CP_good")

In [None]:
predictor.leaderboard()

In [None]:
predictor_CP.leaderboard()

In [None]:
leaderboard = predictor.leaderboard(silent=True)
models_to_keep = leaderboard.dropna(subset=['score_val'])

# List of models to delete (models not in models_to_keep)
all_models = set(leaderboard['model'].values)
models_to_keep_set = set(models_to_keep['model'].values)
models_to_delete = list(all_models - models_to_keep_set)

In [None]:
predictor.delete_models(models_to_keep= list(models_to_keep_set),  delete_from_disk=False, dry_run= False)

In [None]:
fit_summary = predictor.fit_summary()
fit_summary_CP = predictor_CP.fit_summary()

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np 

def plot_top_n_tabular_models_matplotlib(results, n_models=10, show_plot=True, save_file=None):
    # Extract needed information
    model_performance = {k: v for k, v in results["model_performance"].items() if v is not None}
    
    # Sort models by validation performance and select top n_models
    model_names = sorted(model_performance, key=model_performance.get, reverse=True)[:n_models]
    val_perfs = [model_performance[key] for key in model_names]
    
    # Ensure leaderboard contains only models with calculated accuracy
    leaderboard = results["leaderboard"][results["leaderboard"]["model"].isin(model_names)]
    
    inference_latency = leaderboard['pred_time_val'].values
    training_time = leaderboard['fit_time'].values
    
    # Create DataFrame for plotting
    data = pd.DataFrame({
        'Model': model_names,
        'Validation Performance': val_perfs,
        'Inference Latency': inference_latency,
        'Training Time': training_time
    })
    
    # Reorder data to match the order of model_names
    data = data.set_index('Model').reindex(model_names).reset_index()
    
    # Assign a unique color to each model using a scientific color map
    colors = plt.cm.viridis(np.linspace(0, 1, n_models))
    
    fig, ax = plt.subplots(figsize=(10, 6))
    
    # Plot each of the top N models as a separate point with a unique color
    for i, model_name in enumerate(model_names):
        ax.scatter(data.loc[data['Model'] == model_name, 'Inference Latency'],
                   data.loc[data['Model'] == model_name, 'Validation Performance'],
                   color=colors[i],
                   label=model_name,
                   s=100)  # Adjust size as needed
    
    ax.set_xlabel('Inference Latency [s]')
    ax.set_ylabel('Accuracy')
    ax.set_title(f"Top {n_models} Models during fit()")
    
    # Add legend outside the plot
    ax.legend(title="Model", bbox_to_anchor=(1.05, 1), loc='upper left')
    
    plt.tight_layout()
    
    if show_plot:
        plt.show()
    
    if save_file:
        plt.savefig(save_file, bbox_inches="tight")


In [None]:
plot_top_n_tabular_models_matplotlib(fit_summary_CP,  n_models=10)

In [None]:
plot_top_n_tabular_models_matplotlib(fit_summary_CP,  n_models=8)

In [None]:
test_set = pd.read_csv("/home/jovyan/share/data/analyses/benjamin/Single_cell_supervised/AutoGluon_training_full_BF_good/test_split_ALL.csv")

In [None]:
test_set_CP = pd.read_csv("/home/jovyan/share/data/analyses/benjamin/Single_cell_supervised/AutoGluon_training_full_BF_CP_good/test_split_ALL.csv")

In [None]:
predictor_CP.leaderboard(test_set_CP, silent = True)

In [None]:
def evaluate_models_and_display_summary(test_data, label_column, predictor, top_n=10, model_name = None):
    """
    Evaluates all models on the test set and displays the top 5 models' metrics.

    Parameters:
    - test_data: The test DataFrame.
    - label_column: The name of the label column in test_data.
    - predictor: The trained AutoGluon TabularPredictor object.
    - top_n: Number of top models to display (default is 5).
    """
    y_test = test_data[label_column]
    X_test = test_data.drop(labels=[label_column], axis=1)
    
    # Getting the leaderboard
    
    # Evaluate all models and collect their predictions
    if model_name != None:
        model_names = model_name
    else:
        leaderboard = predictor.leaderboard(test_data, silent=True)
        model_names = leaderboard['model'][:top_n].to_list()
        
    performances = []
    all_reports = []
    string_labels = [label_codes[label] for label in np.unique(y_test)]
    for model in model_names:
        y_pred = predictor.predict(X_test, model=model)
        report = classification_report(y_test, y_pred, output_dict=True)
        report_df = pd.DataFrame(report).transpose()
        report_df['model'] = model  # Add a column for the model
        all_reports.append(report_df)
        accuracy = accuracy_score(y_test, y_pred)
        performances.append((model, report, accuracy))

        print(f"Model: {model}, Accuracy: {accuracy:.4f}")
        display(pd.DataFrame(report).transpose())
        
        # Optional: Plot confusion matrix for each model
        cm = confusion_matrix(y_test, y_pred, labels=predictor.class_labels)
        cm_percentage = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] * 100  # Convert to percentage
        
        disp = ConfusionMatrixDisplay(confusion_matrix=cm_percentage, display_labels=string_labels)
        fig, ax = plt.subplots(figsize=(10,7))
        disp.plot(cmap=plt.cm.Blues, ax=ax, values_format='.2f')
        plt.title(f'Confusion Matrix for {model}')
        plt.xlabel('Predicted label')
        plt.ylabel('True label')
        plt.savefig(f"/home/jovyan/share/data/analyses/benjamin/Single_cell_supervised/BF_MOA/CellProfiler/autogluon_results/specs5k_all_confusion_{model}.png")
        #plt.show()
    all_reports_df = pd.concat(all_reports, axis=0).reset_index().rename(columns={'index': 'metric'})
    output_path = f'/home/jovyan/share/data/analyses/benjamin/Single_cell_supervised/BF_MOA/CellProfiler/autogluon_results/classification_report_specs5k.csv'
    all_reports_df.to_csv(output_path, index=False)
    return all_reports_df

In [None]:
autogl_res = evaluate_models_and_display_summary(test_set_CP, "label", predictor= predictor_CP,top_n=10, model_name= ["LightGBMXT_BAG_L1_FULL", "XGBoost_BAG_L2_FULL", "WeightedEnsemble_L3_FULL"])

## Compare MLP to AutoGluon

In [None]:
mlp_res = {}
mlp_res["split_1"] = pd.read_csv("/share/data/analyses/benjamin/Single_cell_supervised/BF_MOA/CellProfiler/mlp_results/split_1/classification_report_lr0.001_ep100_layers[200, 100]_dropout0.6.csv")
mlp_res["split_2"] = pd.read_csv("/share/data/analyses/benjamin/Single_cell_supervised/BF_MOA/CellProfiler/mlp_results/split_2/classification_report_lr0.001_ep100_layers[200, 100]_dropout0.6.csv")
mlp_res["split_3"] = pd.read_csv("/share/data/analyses/benjamin/Single_cell_supervised/BF_MOA/CellProfiler/mlp_results/split_3/classification_report_lr0.001_ep100_layers[200, 100]_dropout0.6.csv")

In [None]:
for key, value in mlp_res.items():
    mlp_res[key] = mlp_res[key].rename(columns={"Unnamed: 0" : "metric"})

In [None]:
averages = {}

# Assuming all DataFrames have the same columns, use the columns from the first DataFrame
keep_column = "metric"
# Calculate the average for each column
averages = {}

# Assuming all DataFrames have the same columns, use the columns from the first DataFrame
columns = [col for col in mlp_res[next(iter(mlp_res))].columns if col != keep_column]

# Calculate the average for each numeric column
for col in columns:
    # Extract the column from each DataFrame and calculate the average
    column_data = [df[col] for df in mlp_res.values()]
    averages[col] = pd.concat(column_data, axis=1).mean(axis=1)

# Assuming the keep_column is the same across all DataFrames, take it from the first DataFrame
averages[keep_column] = mlp_res[next(iter(mlp_res))][keep_column]

# Convert the averages dictionary back to a DataFrame
average_df = pd.DataFrame(averages)

# Reordering columns to place the keep_column first
average_mlp = average_df[[keep_column] + [col for col in average_df.columns if col != keep_column]]


In [None]:
average_mlp

In [None]:
def plot_f1_scores(df1, df2, label_codes, df1_label='DataFrame 1', save_path=None):
    """
    Plots F1 scores for classes from df1 and multiple models in df2, with an option to save the figure.

    Parameters:
    - df1: DataFrame with F1 scores for one model.
    - df2: DataFrame with F1 scores for multiple models.
    - label_codes: Dictionary mapping class indices to class names.
    - df1_label: Label for the bars corresponding to df1.
    - save_path: File path to save the figure. If None, the figure is not saved.
    """
    # Exclude 'accuracy', 'macro avg', 'weighted avg' from plotting
    df1_filtered = df1[df1['metric'].str.isnumeric()]
    
    # Extract unique models from df2
    models_df2 = df2['model'].unique()

    # Prepare plot
    fig, ax = plt.subplots(figsize=(14, 8), dpi=120)  # Increased figure width and DPI
    total_width = 0.8
    width_per_model = total_width / (len(models_df2) + 1)
    indices = range(len(df1_filtered))

    # Plot F1 scores for df1
    df1_f1_scores = df1_filtered['f1-score']
    ax.bar([x - total_width / 2 for x in indices], df1_f1_scores, width_per_model, label=df1_label, color='skyblue')

    # Plot F1 scores for each model in df2
    for i, model in enumerate(models_df2, start=1):
        df2_filtered = df2[(df2['model'] == model) & (df2['metric'].str.isnumeric())]
        df2_f1_scores = df2_filtered['f1-score'].values
        ax.bar([x - total_width / 2 + i * width_per_model for x in indices], df2_f1_scores, width_per_model, label=model)

    # Set plot details
    ax.set_xlabel('Class')
    ax.set_ylabel('F1 Score')
    ax.set_title('F1 Scores by Class')
    ax.set_xticks(indices)
    ax.set_xticklabels([label_codes.get(int(idx), f"Class {idx}") for idx in df1_filtered['metric']], rotation = 90)
    ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')

    plt.tight_layout()
    
    # Save the figure if a save path is provided
    if save_path:
        plt.savefig(save_path, bbox_inches='tight', dpi=300)  # Save with the same DPI as the figure
    
    plt.show()

In [None]:
plot_f1_scores(average_mlp, autogl_res, label_codes, "Average MLP")

In [None]:
label_codes

In [None]:
autogl_res