In [2]:
import os
os.chdir('../')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style='white')

from scipy.stats import pearsonr, spearmanr, zscore
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_regression, mutual_info_regression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV

In [3]:
data = pd.read_csv('inference_feat.csv')
data['error'] = data['true_ef'] - data['predicted_ef']

df = data[(data['split'] == 'TRAIN') | (data['split'] == 'VAL')]
test = data[ data['split'] == 'TEST' ]

In [4]:
z = np.abs(zscore(df['error']))
filtered_df = df[(z < 2)]

In [5]:
df = filtered_df

In [6]:
df.drop(columns=['filename', 'split', 'true_ef', 'predicted_ef'], inplace=True)

X, y = df.drop('error', axis=1), df['error']

In [7]:
features = ['volume_ratio', 'length_ratio', 'dice_overlap_std']

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X[features], y, test_size=0.2, random_state=42)

In [9]:
reg = GradientBoostingRegressor(learning_rate=0.01, max_depth=4, n_estimators=200, random_state=1)
reg.fit(X[features], y)

X_test = test[features]
y_test = test['error']

y_pred = reg.predict(X_test)

In [10]:
test = data[data['split'] == 'TEST'].copy()
test.loc[:, 'corrected_ef'] = test['predicted_ef'] + y_pred

LVEF values categorized according to American Heart Association
- Normal EF: Typically 55%–75%.
- Heart Failure with Preserved Ejection Fraction (HFpEF): LVEF >= 50%.
- Heart Failure with Mildly Reduced Ejection Fraction (HFmrEF): LVEF 41%–49%.
- Heart Failure with Reduced Ejection Fraction (HFrEF): LVEF <= 40%.
- Hypertrophic Cardiomyopathy: LVEF > 75% could indicate this condition.

In [11]:
def clasif_ef(ef):
    if ef > 75:
        return "Hypertrophic CM"
    elif ef >= 55:
        return "Normal EF"
    elif ef >= 50:
        return "HFpEF"
    elif ef >= 41:
        return "HFmrEF"
    else:
        return "HFrEF"

In [18]:
test['true_ef'] = test['true_ef'].round().astype(int)
test['predicted_ef'] = test['predicted_ef'].round().astype(int)
test['corrected_ef'] = test['corrected_ef'].round().astype(int)


test['clasif_true'] = test['true_ef'].apply(clasif_ef)
test['clasif_orig'] = test['predicted_ef'].apply(clasif_ef)
test['clasif_corr'] = test['corrected_ef'].apply(clasif_ef)


In [19]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, accuracy_score

def threshold_analysis(true_ef, predicted_ef, threshold, condition=">"):

    true_bin = (true_ef > threshold) if condition == ">" else (true_ef <= threshold)
    pred_bin = (predicted_ef > threshold) if condition == ">" else (predicted_ef <= threshold)
    
    tn, fp, fn, tp = confusion_matrix(true_bin, pred_bin).ravel()
    
    precision = precision_score(true_bin, pred_bin, zero_division=0)
    recall = recall_score(true_bin, pred_bin, zero_division=0)
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    
    return {
        'sensitivity': recall,
        'specificity': specificity,
        'precision': precision,
        'f1_score': f1_score(true_bin, pred_bin, zero_division=0),
        'true_positives': tp,
        'true_negatives': tn,
        'false_positives': fp,
        'false_negatives': fn
    }

thresholds = {
    "HFrEF (≤40%)": {"threshold": 40, "condition": "<="},
    "HFmrEF (41-49%)": {"threshold": (41, 49), "type": "range"},
    "HFpEF (≥50%)": {"threshold": 50, "condition": ">"},
    "Normal EF (≥55%)": {"threshold": 55, "condition": ">"},
    "Hypertrophic CM (>75%)": {"threshold": 75, "condition": ">"}
}

In [20]:
def HFmrEF(ef_real, ef_pred):

    true_bin = ((ef_real >= 41) & (ef_real <= 49)).astype(int)
    pred_bin = ((ef_pred >= 41) & (ef_pred <= 49)).astype(int)
    
    tn, fp, fn, tp = confusion_matrix(true_bin, pred_bin).ravel()
    precision = precision_score(true_bin, pred_bin, zero_division=0)
    recall = recall_score(true_bin, pred_bin, zero_division=0)
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    
    
    return {
        'sensitivity': recall,
        'specificity': specificity,
        'precision': precision,
        'f1_score': f1_score(true_bin, pred_bin, zero_division=0),
        'true_positives': tp,
        'true_negatives': tn,
        'false_positives': fp,
        'false_negatives': fn
    }

In [21]:
results = {}

for name, params in thresholds.items():
    if "HFmrEF" in name:
        orig = HFmrEF(test['true_ef'], test['predicted_ef'])
        corr = HFmrEF(test['true_ef'], test['corrected_ef'])
    else:
        orig = threshold_analysis(test['true_ef'], test['predicted_ef'], params["threshold"], params["condition"])
        corr = threshold_analysis(test['true_ef'], test['corrected_ef'], params["threshold"], params["condition"])
    
    results[name] = {'original': orig, 'corrected': corr}

In [22]:
from sklearn.metrics import classification_report, cohen_kappa_score

cm_orig = confusion_matrix(test['clasif_true'], test['clasif_orig'])
cm_corr = confusion_matrix(test['clasif_true'], test['clasif_corr'])

accuracy_orig = accuracy_score(test['clasif_true'], test['clasif_orig'])
accuracy_corr = accuracy_score(test['clasif_true'], test['clasif_corr'])
kappa_orig = cohen_kappa_score(test['clasif_true'], test['clasif_orig'])
kappa_corr = cohen_kappa_score(test['clasif_true'], test['clasif_corr'])

report_orig = classification_report(test['clasif_true'], test['clasif_orig'], zero_division=1)
report_corr = classification_report(test['clasif_true'], test['clasif_corr'], zero_division=1)