## **Labelling algorithm**

### **Imports and Installs**

In [None]:
import os
import pandas as pd
import numpy as np
import networkx as nx
import seaborn as sns
from numpy.linalg import norm
import plotly.graph_objs as go
import matplotlib.pyplot as plt
from scipy.spatial import distance_matrix
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier, AdaBoostClassifier, HistGradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
import xgboost as xgb
import lightgbm as lgb
# import catboost as cb
import lightgbm as lgba
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import Perceptron
from sklearn.linear_model import RidgeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.neural_network import MLPClassifier

from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


from scipy.spatial.distance import cdist

import warnings
# Setting the warnings to be ignored
warnings.filterwarnings('ignore')

### **Google Drive connection**

In [None]:
# Connect to Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### **Upload files**

In [None]:
path = '/content/drive/Shared drives/TFGs Coronarias 2024_25/Maren/Data/Skeletonization/Dataframes/'

### **Functions**

In [None]:
def load_and_combine_data(output_dir, segment_type):
    """
    Loads and combines all CSV files for a given segment type ('rca' or 'lca'),
    keeping the case number and condition (normal or diseased).

    Parameters:
    - output_dir (str): Directory containing the CSV files.
    - segment_type (str): 'rca' or 'lca'.

    Returns:
    - pd.DataFrame: Combined DataFrame with all loaded data, including case number and condition.
    """
    combined_df = pd.DataFrame(columns=["Px", "Py", "Pz", "label", "case", "condition"])  # Add "condition" column

    for file_name in os.listdir(output_dir):
        if file_name.startswith(f"df_{segment_type}_") and file_name.endswith(".csv"):
            file_path = os.path.join(output_dir, file_name)
            df = pd.read_csv(file_path)

            # Extract the case number (e.g., df_rca_n5.csv -> "5", df_lca_d12.csv -> "12")
            case_number = ''.join(filter(str.isdigit, file_name))
            df["case"] = int(case_number)  # Convert to integer

            # Determine the condition (e.g., 'n' -> Normal, 'd' -> Diseased)
            if f"_{segment_type}_n" in file_name:
                condition = "Normal"
            elif f"_{segment_type}_d" in file_name:
                condition = "Diseased"
            else:
                condition = "Unknown"  # Fallback for unexpected cases

            df["condition"] = condition  # Add the condition column

            combined_df = pd.concat([combined_df, df], ignore_index=True)

    return combined_df

In [None]:
def visualize_case(df, case, condition):
    """
    Visualizes the segmented branches of the artery for a specific case and condition using a 3D scatter plot.
    Both the true labels and predicted labels are visualized.

    Parameters:
    df (pd.DataFrame): DataFrame containing artery segments with 'Px', 'Py', 'Pz', 'label', 'prediction', 'case', and 'condition'.
    case (str or int): The case identifier to visualize.
    condition (str): The condition identifier to visualize.
    """
    # Filter data for the selected case and condition
    case_condition_data = df[(df['case'] == case) & (df['condition'] == condition)]

    if case_condition_data.empty:
        print(f"No data found for case {case} and condition {condition}.")
        return

    # Get unique labels
    true_labels = case_condition_data['label'].unique()

    # Generate color mappings
    true_color_map = dict(zip(true_labels, np.linspace(0, 0.5, len(true_labels))))  # True labels in one color range

    fig = go.Figure()

    # Plot true labels
    for label in true_labels:
        label_data = case_condition_data[case_condition_data['label'] == label]
        fig.add_trace(go.Scatter3d(
            x=label_data['Px'],
            y=label_data['Py'],
            z=label_data['Pz'],
            mode='markers',
            marker=dict(size=3, color=true_color_map[label], colorscale='Blues', opacity=0.6),
            name=f'True Label {label}'
        ))

    # Configure layout
    fig.update_layout(
        title=f'True Labels for Case {case} ({condition})',
        scene=dict(
            xaxis=dict(title='Px', backgroundcolor='white', gridcolor='lightgrey', showbackground=True),
            yaxis=dict(title='Py', backgroundcolor='white', gridcolor='lightgrey', showbackground=True),
            zaxis=dict(title='Pz', backgroundcolor='white', gridcolor='lightgrey', showbackground=True),
            aspectmode='data'
        ),
        margin=dict(l=10, r=10, b=10, t=50),
        height=800
    )

    fig.show()


In [None]:
def visualize_changes(df, case, condition):

    # Filter data for the selected case and condition
    case_condition_data = df[(df['case'] == case) & (df['condition'] == condition)]

    if case_condition_data.empty:
        print(f"No data found for case {case} and condition {condition}.")
        return

    # Get unique labels
    true_labels = case_condition_data['label_change'].unique()

    # Generate color mappings
    true_color_map = dict(zip(true_labels, np.linspace(0, 0.5, len(true_labels))))  # True labels in one color range

    fig = go.Figure()

    # Plot true labels
    for label in true_labels:
        label_data = case_condition_data[case_condition_data['label_change'] == label]
        fig.add_trace(go.Scatter3d(
            x=label_data['Px'],
            y=label_data['Py'],
            z=label_data['Pz'],
            mode='markers',
            marker=dict(size=3, color=true_color_map[label], colorscale='Blues', opacity=0.6),
            name=f'Change Point {label}'
        ))

    # Configure layout
    fig.update_layout(
        title=f'Labels for Case {case} ({condition})',
        scene=dict(
            xaxis=dict(title='Px', backgroundcolor='white', gridcolor='lightgrey', showbackground=True),
            yaxis=dict(title='Py', backgroundcolor='white', gridcolor='lightgrey', showbackground=True),
            zaxis=dict(title='Pz', backgroundcolor='white', gridcolor='lightgrey', showbackground=True),
            aspectmode='data'
        ),
        margin=dict(l=10, r=10, b=10, t=50),
        height=800
    )

    fig.show()

In [None]:
def visualize_changes_predictions(df, case, condition):
    """
    Visualizes the segmented branches of the artery for a specific case and condition using a 3D scatter plot.
    Both the true labels and predicted labels are visualized.

    Parameters:
    df (pd.DataFrame): DataFrame containing artery segments with 'Px', 'Py', 'Pz', 'label', 'prediction', 'case', and 'condition'.
    case (str or int): The case identifier to visualize.
    condition (str): The condition identifier to visualize.
    """
    # Filter data for the selected case and condition
    case_condition_data = df[(df['case'] == case) & (df['condition'] == condition)]

    if case_condition_data.empty:
        print(f"No data found for case {case} and condition {condition}.")
        return

    # Get unique labels
    true_labels = case_condition_data['label_change'].unique()
    predicted_labels = case_condition_data['pred_change'].unique()

    # Generate color mappings
    true_color_map = dict(zip(true_labels, np.linspace(0, 0.5, len(true_labels))))  # True labels in one color range
    predicted_color_map = dict(zip(predicted_labels, np.linspace(0.5, 1, len(predicted_labels))))  # Predictions in another color range

    fig = go.Figure()

    # Plot true labels
    for label in true_labels:
        label_data = case_condition_data[case_condition_data['label_change'] == label]
        fig.add_trace(go.Scatter3d(
            x=label_data['Px'],
            y=label_data['Py'],
            z=label_data['Pz'],
            mode='markers',
            marker=dict(size=3, color=true_color_map[label], colorscale='Blues', opacity=0.6),
            name=f'True Label Change {label}'
        ))

    # Plot predicted labels
    for label in predicted_labels:
        label_data = case_condition_data[case_condition_data['pred_change'] == label]
        fig.add_trace(go.Scatter3d(
            x=label_data['Px'],
            y=label_data['Py'],
            z=label_data['Pz'],
            mode='markers',
            marker=dict(size=3, color=predicted_color_map[label], colorscale='Reds', opacity=0.6),
            name=f'Predicted Label Change {label}'
        ))

    # Configure layout
    fig.update_layout(
        title=f'Predictions vs True Labels changes for Case {case} ({condition})',
        scene=dict(
            xaxis=dict(title='Px', backgroundcolor='white', gridcolor='lightgrey', showbackground=True),
            yaxis=dict(title='Py', backgroundcolor='white', gridcolor='lightgrey', showbackground=True),
            zaxis=dict(title='Pz', backgroundcolor='white', gridcolor='lightgrey', showbackground=True),
            aspectmode='data'
        ),
        margin=dict(l=10, r=10, b=10, t=50),
        height=800
    )

    fig.show()


In [None]:
# Function to identify label changes
def add_label_change_column(df):
    """
    Adds a column 'label_change' to identify label changes.
    Changes are numbered sequentially within each combination of 'case' and 'condition'.
    """
    # Initialize the column with zeros
    df['label_change'] = 0

    # Create a global counter for changes
    change_counter = 1

    # Iterate over each combination of 'case' and 'condition'
    for (case, condition), group in df.groupby(['case', 'condition']):
        # Identify indices where the label changes
        change_indices = group.index[group['label'].diff().fillna(0) != 0].tolist()

        # Skip the first index in the group, as it is not a change
        for idx in change_indices[1:]:
            df.loc[idx, 'label_change'] = change_counter
            change_counter += 1

        change_counter = 1

    return df

In [None]:
def plot_classification_report_heatmap(y_true, y_pred, label_encoder, figsize=(12, 8)):
    """
    Generate and visualize a classification report as a heatmap.

    Parameters:
    - y_true (array-like): True labels of the test set.
    - y_pred (array-like): Predicted labels of the test set.
    - label_encoder (LabelEncoder): The label encoder used to encode the labels.
    - figsize (tuple): Size of the figure for the heatmap.

    Returns:
    - None: Displays the heatmap.
    """
    # Generate the classification report as a dictionary
    report_dict = classification_report(
        y_true,
        y_pred,
        target_names=label_encoder.classes_.astype(str),
        output_dict=True
    )

    # Convert the classification report to a DataFrame
    report_df = pd.DataFrame(report_dict).transpose()

    # Visualize the report using a heatmap
    plt.figure(figsize=figsize)
    sns.heatmap(
        report_df.iloc[:-1, :-1],  # Exclude 'accuracy' row and support column
        annot=True,
        cmap="coolwarm",  # More vibrant color palette
        cbar=True,
        fmt=".2f",
        linewidths=1,
        linecolor='white',  # White lines to separate cells
        annot_kws={"size": 12, "weight": 'bold', "color": 'black'}  # Larger and bold annotation text
    )

    # Set title and labels
    plt.title("Classification Report Heatmap", fontsize=16, weight='bold')
    plt.ylabel("Classes", fontsize=14)
    plt.xlabel("\nMetrics", fontsize=14)

    # Rotate axis labels for better readability
    plt.xticks(rotation=0, fontsize=12)
    plt.yticks(rotation=0, fontsize=12)

    # Show the plot
    plt.tight_layout()
    plt.show()

In [None]:
def create_mst_with_basic_and_node_features(df):
    """
    Given a dataframe with columns ['Px', 'Py', 'Pz', 'case', 'condition'],
    returns a dataframe with additional features:
    - visit_order (BFS traversal order)
    - node (index in graph)
    - distance_to_origin (Euclidean)
    - num_neighbors (degree)
    - path_length_to_root (graph hops)
    - geodesic_distance_to_root (sum of edge weights)
    """

    feature_dfs = []

    # Procesar cada caso de forma independiente
    for (case, condition), group in df.groupby(['case', 'condition']):
        points = group[['Px', 'Py', 'Pz']].values
        indices = group.index

        # Crear matriz de distancias
        dist_matrix = distance_matrix(points, points)

        # Crear el grafo completo y extraer el MST
        G_full = nx.complete_graph(len(points))
        for i in range(len(points)):
            for j in range(i+1, len(points)):
                G_full.add_edge(i, j, weight=dist_matrix[i, j])

        # Extraer MST
        MST = nx.minimum_spanning_tree(G_full)

        # Escoger el root como el punto con mayor Pz (más "alto")
        root_idx = points[:, 2].argmax()

        # Recorrido BFS para orden de visita y distancias
        bfs_order = list(nx.bfs_tree(MST, root_idx))
        visit_order = np.zeros(len(points))
        path_length_to_root = np.zeros(len(points))
        geodesic_distance_to_root = np.zeros(len(points))
        num_neighbors = np.zeros(len(points))

        for order, node in enumerate(bfs_order):
            visit_order[node] = order
            num_neighbors[node] = len(list(MST.neighbors(node)))
            try:
                path = nx.shortest_path(MST, source=root_idx, target=node)
                path_length_to_root[node] = len(path) - 1  # Número de saltos
                geodesic_distance_to_root[node] = sum(
                    MST[u][v]['weight'] for u, v in zip(path[:-1], path[1:])
                )
            except nx.NetworkXNoPath:
                path_length_to_root[node] = np.nan
                geodesic_distance_to_root[node] = np.nan

        # Distancia euclidiana al root
        root_point = points[root_idx]
        distance_to_origin = np.linalg.norm(points - root_point, axis=1)

        # Construir dataframe con features
        feature_df = pd.DataFrame({
            'visit_order': visit_order,
            'node': np.arange(len(points)),
            'distance_to_origin': distance_to_origin,
            'num_neighbors': num_neighbors,
            'path_length_to_root': path_length_to_root,
            'geodesic_distance_to_root': geodesic_distance_to_root
        }, index=indices)

        # Añadir columnas de case/condition
        feature_df['case'] = case
        feature_df['condition'] = condition
        feature_df['Px'] = group['Px'].values
        feature_df['Py'] = group['Py'].values
        feature_df['Pz'] = group['Pz'].values

        feature_dfs.append(feature_df)

    return pd.concat(feature_dfs).reset_index(drop=True)


In [None]:
SEGMENT_COLORS = {
    1: '#006400',   # pRCA → dark green
    2: '#FFFF00',   # mRCA → yellow
    3: '#8B4513',   # dRCA → brown
    4: '#0000FF',   # R-PDA → blue
    5: '#FF0000',   # LM → red
    6: '#FFA500',   # pLAD → orange
    7: '#6B8E23',   # mLAD → olive drab (apagado, terroso)
    8: '#A52A2A',   # dLAD → reddish brown
    9: '#7CFC00',   # D1 → lime green / neon green
    10: '#2F1B0C',  # D2 → very dark brown
    11: '#F5F5DC',  # pLCx → beige
    12: '#DFFF00',  # OM1 → lemon/lime yellow
    13: '#C8A2C8',  # LCx → lilac
    14: '#FFFFE0',  # OM2 → very light yellow
    15: '#DAA520',  # L-PDA → goldenrod
    16: '#CCFF00',  # R-PLB → phosphorescent yellow
    17: '#00008B',  # RI → dark blue
    18: '#B22222',  # L-PLB → firebrick red
    19: '#FF4500'   # tertiary → reddish orange
}


In [None]:
import plotly.graph_objects as go

CHANGE_COLORS = {
    0: '#40E0D0',  # turquoise (para no cambio)
    1: '#1f77b4',  # blue
    2: '#ff7f0e',  # orange
    3: '#2ca02c',  # green
    4: '#d62728',  # red
    5: '#9467bd',  # purple
    6: '#8c564b',  # brown
    7: '#e377c2',  # pink
    8: '#7f7f7f',  # grey
    9: '#bcbd22',  # olive
    10: '#17becf'  # cyan
}


def visualize_changes_predictions(df, case, condition):
    """
    Visualizes the segmented branches of the artery for a specific case and condition using a 3D scatter plot.
    Each label change and predicted change is assigned a consistent fixed colour.
    """
    case_condition_data = df[(df['case'] == case) & (df['condition'] == condition)]

    if case_condition_data.empty:
        print(f"No data found for case {case} and condition {condition}.")
        return

    fig = go.Figure()

    # Unique true and predicted labels
    true_labels = sorted(case_condition_data['label_change'].dropna().unique())
    pred_labels = sorted(case_condition_data['pred_change'].dropna().unique())

    # Plot true label changes
    for label in true_labels:
        label_data = case_condition_data[case_condition_data['label_change'] == label]
        color = CHANGE_COLORS.get(label, 'black')  # fallback to black if label not defined
        fig.add_trace(go.Scatter3d(
            x=label_data['Px'],
            y=label_data['Py'],
            z=label_data['Pz'],
            mode='markers',
            marker=dict(size=12, color=color, opacity=0.8),
            name=f'Transition {label}'
        ))

    # Plot predicted label changes
    for label in pred_labels:
        label_data = case_condition_data[case_condition_data['pred_change'] == label]
        color = CHANGE_COLORS.get(label, 'black')
        fig.add_trace(go.Scatter3d(
            x=label_data['Px'],
            y=label_data['Py'],
            z=label_data['Pz'],
            mode='markers',
            marker=dict(size=4, color=color, opacity=0.8),
            name=f'Pred Change {label}'
        ))

    fig.update_layout(
        title=f'Predictions vs True Label Changes for Case {case} ({condition})',
        scene=dict(
            xaxis=dict(title='Px', backgroundcolor='white', gridcolor='lightgrey', showbackground=True),
            yaxis=dict(title='Py', backgroundcolor='white', gridcolor='lightgrey', showbackground=True),
            zaxis=dict(title='Pz', backgroundcolor='white', gridcolor='lightgrey', showbackground=True),
            aspectmode='data'
        ),
        margin=dict(l=10, r=10, b=10, t=50),
        height=800,
        showlegend=True
    )

    fig.show()


#### **RCA LOGISTIC REGRESSION**

In [None]:
from sklearn.linear_model import LogisticRegression

# Cargar datos y generar label_change
df_rca = load_and_combine_data(path, segment_type="rca")
df_rca = add_label_change_column(df_rca)

# Añadir features de MST y nodos
df_features_rca = create_mst_with_basic_and_node_features(df_rca)
df_rca_full = pd.merge(df_rca, df_features_rca, on=["case", "condition", "Px", "Py", "Pz"], how="inner")

# Split train/test por condición
df_rca_train_list = []
df_rca_test_list = []

for condition in df_rca_full["condition"].unique():
    df_condition = df_rca_full[df_rca_full["condition"] == condition]
    unique_cases = df_condition["case"].unique()
    train_cases, test_cases = train_test_split(unique_cases, test_size=0.3, random_state=42)
    df_rca_train_list.append(df_condition[df_condition["case"].isin(train_cases)])
    df_rca_test_list.append(df_condition[df_condition["case"].isin(test_cases)])

df_rca_train = pd.concat(df_rca_train_list).copy()
df_rca_test = pd.concat(df_rca_test_list).copy()

# Selección de features
features = [
    "Px", "Py", "Pz",
    "visit_order", "node",
    "distance_to_origin", "num_neighbors",
    "path_length_to_root", "geodesic_distance_to_root"
]

X_rca_train = df_rca_train[features].values
y_rca_train = df_rca_train["label_change"].values
X_rca_test = df_rca_test[features].values
y_rca_test = df_rca_test["label_change"].values

# Standardize features
scaler = StandardScaler()
X_rca_train = scaler.fit_transform(X_rca_train)
X_rca_test = scaler.transform(X_rca_test)

# Encode labels
label_encoder_rca = LabelEncoder()
y_rca_train = label_encoder_rca.fit_transform(y_rca_train)
y_rca_test = label_encoder_rca.transform(y_rca_test)

# Entrenar Logistic Regression
print("Training Logistic Regression on RCA...")
logreg_rca = LogisticRegression(max_iter=1000, random_state=42)
logreg_rca.fit(X_rca_train, y_rca_train)

# Predicciones
y_rca_pred_logreg = logreg_rca.predict(X_rca_test)

df_rca_test_logreg = df_rca_test.copy()
df_rca_test_logreg["pred_change"] = label_encoder_rca.inverse_transform(y_rca_pred_logreg)

# Evaluar resultados
print(classification_report(y_rca_test, y_rca_pred_logreg, target_names=label_encoder_rca.classes_.astype(str)))

Training Logistic Regression on RCA...
              precision    recall  f1-score   support

           0       0.99      1.00      1.00      4583
           1       0.00      0.00      0.00        12
           2       0.00      0.00      0.00         9
           3       0.00      0.00      0.00         8

    accuracy                           0.99      4612
   macro avg       0.25      0.25      0.25      4612
weighted avg       0.99      0.99      0.99      4612



In [None]:
# Visualizar caso Normal 16
visualize_changes_predictions(df_rca_test_logreg, 16, 'Normal')

In [None]:
df_rca_train

Unnamed: 0,Px,Py,Pz,label,case,condition,label_change,visit_order,node,distance_to_origin,num_neighbors,path_length_to_root,geodesic_distance_to_root
1300,73.691309,72.542873,79.0625,1,3,Normal,0,46.0,0,23.309883,2.0,39.0,27.104312
1301,73.691309,72.925685,79.6875,1,3,Normal,0,45.0,1,22.643585,2.0,38.0,26.371393
1302,73.691309,73.308497,80.3125,1,3,Normal,0,44.0,2,21.981528,2.0,37.0,25.638474
1303,74.074121,73.308497,80.9375,1,3,Normal,0,43.0,3,21.337848,2.0,36.0,24.905555
1304,74.074121,73.308497,81.5625,1,3,Normal,0,42.0,4,20.865806,2.0,35.0,24.280555
...,...,...,...,...,...,...,...,...,...,...,...,...,...
15032,128.266569,177.219682,47.1875,16,20,Diseased,0,362.0,362,95.325996,2.0,255.0,172.349467
15033,128.737272,177.690385,46.5625,16,20,Diseased,0,363.0,363,96.184016,2.0,256.0,173.262565
15034,129.207975,177.690385,46.5625,16,20,Diseased,0,364.0,364,96.383064,2.0,257.0,173.733268
15035,129.678678,177.690385,45.9375,16,20,Diseased,0,365.0,365,96.880804,2.0,258.0,174.515691


#### **LCA LOGISTIC REGRESSION**

In [None]:
from sklearn.linear_model import LogisticRegression

# Load data and generate label_change for LCA
df_lca = load_and_combine_data(path, segment_type="lca")
df_lca = add_label_change_column(df_lca)

# Add MST and node features
df_features_lca = create_mst_with_basic_and_node_features(df_lca)
df_lca_full = pd.merge(df_lca, df_features_lca, on=["case", "condition", "Px", "Py", "Pz"], how="inner")

# Split train/test by condition
df_lca_train_list = []
df_lca_test_list = []

for condition in df_lca_full["condition"].unique():
    df_condition = df_lca_full[df_lca_full["condition"] == condition]
    unique_cases = df_condition["case"].unique()
    train_cases, test_cases = train_test_split(unique_cases, test_size=0.3, random_state=42)
    df_lca_train_list.append(df_condition[df_condition["case"].isin(train_cases)])
    df_lca_test_list.append(df_condition[df_condition["case"].isin(test_cases)])

df_lca_train = pd.concat(df_lca_train_list).copy()
df_lca_test = pd.concat(df_lca_test_list).copy()

# Feature selection
features = [
    "Px", "Py", "Pz",
    "visit_order", "node",
    "distance_to_origin", "num_neighbors",
    "path_length_to_root", "geodesic_distance_to_root"
]

X_lca_train = df_lca_train[features].values
y_lca_train = df_lca_train["label_change"].values
X_lca_test = df_lca_test[features].values
y_lca_test = df_lca_test["label_change"].values

# Standardize features
scaler = StandardScaler()
X_lca_train = scaler.fit_transform(X_lca_train)
X_lca_test = scaler.transform(X_lca_test)

# Encode labels
label_encoder_lca = LabelEncoder()
y_lca_train = label_encoder_lca.fit_transform(y_lca_train)
y_lca_test = label_encoder_lca.transform(y_lca_test)

# Train Logistic Regression
print("Training Logistic Regression on LCA...")
logreg_lca = LogisticRegression(max_iter=1000, random_state=42)
logreg_lca.fit(X_lca_train, y_lca_train)

# Predictions
y_lca_pred_logreg = logreg_lca.predict(X_lca_test)

df_lca_test_logreg = df_lca_test.copy()
df_lca_test_logreg["pred_change"] = label_encoder_lca.inverse_transform(y_lca_pred_logreg)

# Evaluation
from sklearn.utils.multiclass import unique_labels

# Get the unique labels in the test set
labels_test = unique_labels(y_lca_test, y_lca_pred_logreg)

# Use only the target names corresponding to these labels
target_names = label_encoder_lca.inverse_transform(labels_test).astype(str)

print(classification_report(y_lca_test, y_lca_pred_logreg, labels=labels_test, target_names=target_names))

Training Logistic Regression on LCA...
              precision    recall  f1-score   support

           0       0.99      1.00      1.00      7405
           1       0.00      0.00      0.00        12
           2       0.00      0.00      0.00        12
           3       0.00      0.00      0.00        12
           4       0.00      0.00      0.00        12
           5       0.00      0.00      0.00        11
           6       0.00      0.00      0.00         9
           7       0.00      0.00      0.00         5
           8       0.00      0.00      0.00         1

    accuracy                           0.99      7479
   macro avg       0.11      0.11      0.11      7479
weighted avg       0.98      0.99      0.99      7479



In [None]:
# Visualize case Normal 16
visualize_changes_predictions(df_lca_test_logreg, 16, 'Normal')