# Imports

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import polars as pl
import pandas as pd
import numpy as np
import json
import os
from scipy.stats import t
import seaborn as sns

In [None]:
with open('../params.json', 'r') as file :
    params = json.load(file)

DATASET, VERSION = params['dataset'], params['version']
DATA_FOLD = params['data_folder']

In [None]:
DIR = f'{DATA_FOLD}/{VERSION}/3.analysis/imputation_48/{DATASET}/tables/'
print({DATASET})

In [None]:
def calculate_regression_with_ci(x, y, confidence=0.95):
    """
    Calculate regression coefficients (slope and intercept) and their confidence intervals.
    """
    n = len(x)
    p = np.polyfit(x, y, 1)  # Linear regression (y = slope * x + intercept)
    slope, intercept = p
    
    # Residuals
    y_pred = np.polyval(p, x)
    residuals = y - y_pred
    residual_std = np.sqrt(np.sum(residuals**2) / (n - 2))
    
    # Standard errors for slope and intercept
    mean_x = np.mean(x)
    std_x = np.std(x)
    slope_se = residual_std / np.sqrt(np.sum((x - mean_x)**2))
    intercept_se = residual_std * np.sqrt(1 / n + mean_x**2 / np.sum((x - mean_x)**2))
    
    # Confidence intervals
    t_value = t.ppf((1 + confidence) / 2, n - 2)  # Two-tailed t-value
    slope_ci = (slope - t_value * slope_se, slope + t_value * slope_se)
    intercept_ci = (intercept - t_value * intercept_se, intercept + t_value * intercept_se)
    
    return slope, intercept, slope_ci, intercept_ci

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Updated plotting function
def plot_combined_graphs_3x1(
    variable, 
    df_mimic_mean, 
    df_mimic_lin, 
    df_mimic_saits,
):
    """
    Generates a 3x1 figure with Bland-Altman style plots for each dataset:
    1) Mean imputation (MIMIC-IV)
    2) Linear interpolation (MIMIC-IV)
    3) SAITS (MIMIC-IV)

    Includes regression lines and 95% confidence intervals.
    """
    # Use a clean style for publication
    sns.set_style("whitegrid")
    
    # Increase font sizes for a more publication-ready look
    plt.rcParams.update({
        'font.size': 12,
        'axes.titlesize': 14,
        'axes.labelsize': 12,
        'legend.fontsize': 10,
        'xtick.labelsize': 11,
        'ytick.labelsize': 11
    })

    # Set up the figure with 3 rows and 1 column
    fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(8, 12), sharex=False, sharey=False)

    datasets = [
        (df_mimic_mean, "A : Mean", axes[0]),
        (df_mimic_lin, "B : Linear Interpolation", axes[1]),
        (df_mimic_saits, "C : SAITS", axes[2]),
    ]
    
    for df, title, ax in datasets:
        # Filter columns for x (true masked values) and y (imputed - masked)
        # Adjust the column names as needed
        x = df['moyenne']
        y = df['différence']

        # Compute mean difference and limits of agreement
        mean_diff = np.mean(y)
        std_diff = np.std(y, ddof=1)
        upper_limit = mean_diff + 1.96 * std_diff
        lower_limit = mean_diff - 1.96 * std_diff

        # Scatter plot
        """ax.scatter(
            x, y,
            alpha=0.4,  # slightly transparent
            s=10,       # bigger marker size
            edgecolor='none'
        )
"""
        sns.jointplot(data=df, x=x, y=y, kind='kde', ax=ax)

        # Plot mean diff and ±1.96 SD lines
        ax.axhline(mean_diff, color='red', linestyle='--',
                   label=f'Mean diff = {mean_diff:.2f}')
        ax.axhline(upper_limit, color='grey', linestyle='--',
                   label=f'+1.96 SD = {upper_limit:.2f}')
        ax.axhline(lower_limit, color='grey', linestyle='--',
                   label=f'-1.96 SD = {lower_limit:.2f}')

        # Optionally: add regression line if you want a trend line in BA plot
        # x_line = np.linspace(min(x), max(x), 100)
        # y_line = slope * x_line + intercept
        # ax.plot(x_line, y_line, color='blue', 
        #         label=f'Regression: slope={slope:.2f}')

        ax.set_title(title, loc='left')
        ax.set_xlabel('Average of Imputed and Msked Values (mmHg)')
        ax.set_ylabel('Imputed Value - Masked Values (mmHg)')
        ax.grid(True, alpha=0.3)

        # Place legend outside or in a corner
        ax.legend(loc='lower left', frameon=True)

    # Adjust spacing between subplots
    plt.tight_layout(rect=[0, 0, 1, 0.94])
    plt.suptitle(
        f'Mean Blood Pressure imputation : Bland-Altman Plots',
        fontsize=16,
        y=0.98
    )

    # Uncomment to save figure (update path & filename)
    # plt.savefig(f"{variable}_bland_altman_plots.png", dpi=300, bbox_inches='tight')

    plt.show()
        

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def plot_combined_graphs_3x2(variable, df_mimic_mean, df_chu_mean, df_mimic_lin, df_chu_lin, df_mimic_saits, df_chu_saits):
        """
        Displays a 2x2 figure with:
        - Top row: Linear interpolation (MIMIC-IV on the left, CHU on the right).
        - Bottom row: SAITS (MIMIC-IV on the left, CHU on the right).
        """
        fig, axes = plt.subplots(3, 2, figsize=(16, 12))

        # List of datasets and methods for iteration
        datasets = [
        (df_mimic_mean, "MIMIC-IV - Mean", axes[0, 0]),
        (df_chu_mean, "CHU - Mean", axes[0, 1]),
        (df_mimic_lin, "MIMIC-IV - Linear Interpolation", axes[1, 0]),
        (df_chu_lin, "CHU - Linear Interpolation", axes[1, 1]),
        (df_mimic_saits, "MIMIC-IV - SAITS", axes[2, 0]),
        (df_chu_saits, "CHU - SAITS", axes[2, 1]),

        ]
    
    # Loop through each dataset and subplot
        for df, title, ax in datasets:
                # Filter dataset within range
                x = df['masquées']
                y = df['différence']
                mean_diff = np.mean(df['différence'])
                std_diff = np.std(df['différence'])
                upper_limit = mean_diff + 1.96 * std_diff
                lower_limit = mean_diff - 1.96 * std_diff

                # Calculate regression for mean difference
                slope, intercept, slope_ci, intercept_ci = calculate_regression_with_ci(x, y)

                # Scatter plot
                ax.scatter(x, y, alpha=0.5)
                
                # Plot regression line for mean difference
                ax.plot(x, slope * x + intercept, color='black', linestyle='-', 
                        )
                
                # Display slope and intercept with CI
                ax.text(0.02, 0.05, 
                        f"Slope: {slope:.2f} ({slope_ci[0]:.2f}, {slope_ci[1]:.2f})\n"
                        f"Intercept: {intercept:.2f} ({intercept_ci[0]:.2f}, {intercept_ci[1]:.2f})\n"
                        f"Standard deviation : {round(std_diff, 2)}"
                        ,
                        transform=ax.transAxes, fontsize=10, color='black', va = 'bottom')
                
                # Regression for upper limit
                z_upper = np.polyfit(df['masquées'], df['différence'] - mean_diff + upper_limit, 1)
                p_upper = np.poly1d(z_upper)
                ax.plot(df['masquées'], p_upper(df['masquées']), color='blue', linestyle='-',)

                # Regression for lower limit
                z_lower = np.polyfit(df['masquées'], df['différence'] - mean_diff + lower_limit, 1)
                p_lower = np.poly1d(z_lower)
                ax.plot(df['masquées'], p_lower(df['masquées']), color='blue', linestyle='-',)

                # Set titles, labels, and grid
                ax.set_title(title)
                ax.set_xlabel('Masked Value')
                ax.set_ylabel('Difference (Imputed - Masked)')
                ax.legend()
                ax.grid(alpha=0.3)
        
# Adjust spacing between subplots
        plt.tight_layout(rect=[0, 0, 1, 0.95])
        plt.suptitle(f' {variable.capitalize()} : Bland-Altman Plots with Regression Lines and CI', fontsize=16, y=0.98)
        plt.savefig(f'/data2/poette.m/dypo/v3/3.analysis/imputation_48/global/bland_altmann/{variable}_bland_altman.png')
        plt.show()

In [None]:
mimic_mean.head()

In [None]:
list_dirs = [x[0] for x in os.walk(DIR)]

for dir in list_dirs:
    if dir.endswith('pam_comparaison'):
        print(dir)
        variable = dir.split('/')[-1]
        chu_dir = dir
        mimic_dir = dir.replace('chu', 'mimic')
        masking_scenario = 'Random'
        mimic_mean = pd.read_excel(f'{mimic_dir}/{masking_scenario}_mean.xlsx')
        #chu_mean = pd.read_excel(f'{chu_dir}/{masking_scenario}_mean.xlsx')
        mimic_lin_interpol = pd.read_excel(f'{mimic_dir}/{masking_scenario}_lin_interpol.xlsx')
        #chu_lin_interpol = pd.read_excel(f'{chu_dir}/{masking_scenario}_lin_interpol.xlsx')
        mimic_saits = pd.read_excel(f'{mimic_dir}/{masking_scenario}_saits.xlsx')
        #chu_saits = pd.read_excel(f'{chu_dir}/{masking_scenario}_saits.xlsx')

        plot_combined_graphs_3x1(
                                    variable, 
                                    mimic_mean, 
                                    #chu_mean, 
                                    mimic_lin_interpol, 
                                    #chu_lin_interpol, 
                                    mimic_saits, 
                                    #chu_saits
                                    )
        

In [None]:
chu_lin_interpol = pd.read_excel(f'{DATA_FOLD}/{VERSION}/3.analysis/imputation_48/chu/tables/pam_comparaison/pa_only_lin_interpol.xlsx')
chu_saits = pd.read_excel(f'{DATA_FOLD}/{VERSION}/3.analysis/imputation_48/chu/tables/pam_comparaison/pa_only_saits.xlsx')
mimic_lin_interpol = pd.read_excel(f'{DATA_FOLD}/{VERSION}/3.analysis/imputation_48/mimic/tables/pam_comparaison/pa_only_lin_interpol.xlsx')
mimic_saits = pd.read_excel(f'{DATA_FOLD}/{VERSION}/3.analysis/imputation_48/mimic/tables/pam_comparaison/pa_only_saits.xlsx')
chu_mean = pd.read_excel(f'{DATA_FOLD}/{VERSION}/3.analysis/imputation_48/chu/tables/pam_comparaison/pa_only_mean.xlsx')

In [None]:
chu_saits

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def plot_combined_graphs_2x2(df_mimic_lin, df_chu_lin, df_mimic_saits, df_chu_saits):
    """
    Displays a 2x2 figure with:
    - Top row: Linear interpolation (MIMIC-IV on the left, CHU on the right).
    - Bottom row: SAITS (MIMIC-IV on the left, CHU on the right).
    """
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    
    # List of datasets and methods for iteration
    datasets = [
        (df_mimic_lin, "MIMIC-IV - Linear Interpolation", axes[0, 0]),
        (df_chu_lin, "CHU - Linear Interpolation", axes[0, 1]),
        (df_mimic_saits, "MIMIC-IV - SAITS", axes[1, 0]),
        (df_chu_saits, "CHU - SAITS", axes[1, 1])
    ]
    
    # Loop through each dataset and subplot
    for df, title, ax in datasets:
        # Bland-Altman plot
        ranged_df = df[df['masquées'].between(20, 160, inclusive='both' )]
        mean_diff = np.mean(ranged_df['différence'])
        std_diff = np.std(ranged_df['différence'])
        upper_limit = mean_diff + 1.96 * std_diff
        lower_limit = mean_diff - 1.96 * std_diff

        ax.scatter(ranged_df['masquées'], ranged_df['différence'], alpha=0.5, label='Points')
        ax.axhline(mean_diff, color='red', linestyle='--', label=f'Mean ({mean_diff:.2f})')
        ax.axhline(upper_limit, color='blue', linestyle='--', label=f'Upper Limit ({upper_limit:.2f})')
        ax.axhline(lower_limit, color='blue', linestyle='--', label=f'Lower Limit ({lower_limit:.2f})')
        
        ax.set_title(title)
        ax.set_xlabel('Masked Value')
        ax.set_ylabel('Difference (Imputed - Masked)')
        ax.set_ylim(-200,200)
        ax.legend()
        ax.grid(alpha=0.3)
    
    # Adjust spacing between subplots
    plt.tight_layout(rect=[0, 0, 1, 0.95])
    plt.suptitle('Bland-Altman Plots by Dataset and Method', fontsize=16, y=0.98)
    plt.show()

# Example of calling the function with your datasets
plot_combined_graphs_2x2(mimic_lin_interpol, chu_lin_interpol, mimic_saits, chu_saits)


In [None]:
import seaborn as sns

def plot_combined_hist_2x2(df_mimic_lin, df_chu_lin, df_mimic_saits, df_chu_saits):
    """
    Displays a 2x2 figure with:
    - Top row: Linear interpolation (MIMIC-IV on the left, CHU on the right).
    - Bottom row: SAITS (MIMIC-IV on the left, CHU on the right).
    """
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    
    # List of datasets and methods for iteration
    datasets = [
        (df_mimic_lin, "MIMIC-IV - Linear Interpolation", axes[0, 0]),
        (df_chu_lin, "CHU - Linear Interpolation", axes[0, 1]),
        (df_mimic_saits, "MIMIC-IV - SAITS", axes[1, 0]),
        (df_chu_saits, "CHU - SAITS", axes[1, 1])
    ]
    
    # Loop through each dataset and subplot
    for df, title, ax in datasets:
        # Bland-Altman plot
        mean_diff = np.mean(df['différence'])
        std_diff = np.std(df['différence'])
        upper_limit = mean_diff + 1.96 * std_diff
        lower_limit = mean_diff - 1.96 * std_diff

        sns.histplot(x=df['différence'], kde = True, binwidth= 5, ax=ax)
        
        ax.set_title(title)
        ax.set_xlabel('Error distribution')
        ax.legend()
        ax.grid(alpha=0.3)
    
    # Adjust spacing between subplots
    plt.tight_layout(rect=[0, 0, 1, 0.95])
    plt.suptitle('Bland-Altman Plots by Dataset and Method', fontsize=16, y=0.98)
    plt.show()

plot_combined_hist_2x2(mimic_lin_interpol, chu_lin_interpol, mimic_saits, chu_saits)

In [None]:
import seaborn as sns

def plot_combined_hist_2x2(df_mimic_lin, df_chu_lin, df_mimic_saits, df_chu_saits):
    """
    Displays a 2x2 figure with:
    - Top row: Linear interpolation (MIMIC-IV on the left, CHU on the right).
    - Bottom row: SAITS (MIMIC-IV on the left, CHU on the right).
    """
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    
    # List of datasets and methods for iteration
    datasets = [
        (df_mimic_lin, "MIMIC-IV - Linear Interpolation", axes[0, 0]),
        (df_chu_lin, "CHU - Linear Interpolation", axes[0, 1]),
        (df_mimic_saits, "MIMIC-IV - SAITS", axes[1, 0]),
        (df_chu_saits, "CHU - SAITS", axes[1, 1])
    ]
    
    # Loop through each dataset and subplot
    for df, title, ax in datasets:
        # Bland-Altman plot
        mean_diff = np.mean(df['différence'])
        std_diff = np.std(df['différence'])
        upper_limit = mean_diff + 1.96 * std_diff
        lower_limit = mean_diff - 1.96 * std_diff

        sns.boxplot(x=df['différence'], ax=ax)
        
        ax.set_title(title)
        ax.set_xlabel('Error distribution')
        ax.legend()
        ax.grid(alpha=0.3)
    
    # Adjust spacing between subplots
    plt.tight_layout(rect=[0, 0, 1, 0.95])
    plt.suptitle('Bland-Altman Plots by Dataset and Method', fontsize=16, y=0.98)
    plt.show()

plot_combined_hist_2x2(mimic_lin_interpol, chu_lin_interpol, mimic_saits, chu_saits)

In [None]:
test = chu_saits[chu_saits['masquées'].between(30, 140)]

In [None]:
test['différence'].mean()

In [None]:
test['différence'].between(-10,10, inclusive='both').sum()/len(test)

In [None]:
test['différence'].std()

In [None]:
mean_diff = np.mean(chu_mean['différence'])
std_diff = np.std(chu_mean['différence'])
upper_limit = mean_diff + 1.96 * std_diff
lower_limit = mean_diff - 1.96 * std_diff

sns.histplot(x=chu_mean['différence'], kde = True, binwidth= 5)


plt.xlabel('Error distribution')

# Adjust spacing between subplots
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.suptitle('Bland-Altman Plots by Dataset and Method', fontsize=16, y=0.98)
plt.show()

In [None]:
np.abs(chu_mean['différence']).mean()

In [None]:
np.abs(test['différence']).mean()

In [None]:
for n in range(40,61,10) :
    test = chu_saits[chu_saits['masquées'].between(n, n+10, inclusive='right')]
    value = test['différence']
    mean = value.mean()
    std = value.std()
    print(f'agreement [{n}-{n+10}]: {round(mean,2)} ({round((mean - (2*std)),2)} - {round((mean + (2*std)),2)})')

In [None]:
test_data = chu_saits[['masquées', 'différence']]


In [None]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()

model.fit(test_data['masquées'].values.reshape(-1,1), test_data['différence'])

In [None]:
model.coef_

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import t

def calculate_regression_with_ci(x, y, confidence=0.95):
    """
    Calculate regression coefficients (slope and intercept) and their confidence intervals.
    """
    n = len(x)
    p = np.polyfit(x, y, 1)  # Linear regression (y = slope * x + intercept)
    slope, intercept = p
    
    # Residuals
    y_pred = np.polyval(p, x)
    residuals = y - y_pred
    residual_std = np.sqrt(np.sum(residuals**2) / (n - 2))
    
    # Standard errors for slope and intercept
    mean_x = np.mean(x)
    std_x = np.std(x)
    slope_se = residual_std / np.sqrt(np.sum((x - mean_x)**2))
    intercept_se = residual_std * np.sqrt(1 / n + mean_x**2 / np.sum((x - mean_x)**2))
    
    # Confidence intervals
    t_value = t.ppf((1 + confidence) / 2, n - 2)  # Two-tailed t-value
    slope_ci = (slope - t_value * slope_se, slope + t_value * slope_se)
    intercept_ci = (intercept - t_value * intercept_se, intercept + t_value * intercept_se)
    
    return slope, intercept, slope_ci, intercept_ci

def plot_combined_graphs_2x2(df_mimic_lin, df_chu_lin, df_mimic_saits, df_chu_saits):
    """
    Displays a 2x2 figure with:
    - Top row: Linear interpolation (MIMIC-IV on the left, CHU on the right).
    - Bottom row: SAITS (MIMIC-IV on the left, CHU on the right).
    """
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    
    # List of datasets and methods for iteration
    datasets = [
        (df_mimic_lin, "MIMIC-IV - Linear Interpolation", axes[0, 0]),
        (df_chu_lin, "CHU - Linear Interpolation", axes[0, 1]),
        (df_mimic_saits, "MIMIC-IV - SAITS", axes[1, 0]),
        (df_chu_saits, "CHU - SAITS", axes[1, 1])
    ]
    
    for df, title, ax in datasets:
        # Filter dataset within range
        ranged_df = df[df['masquées'].between(20, 160, inclusive='both')]
        x = ranged_df['masquées']
        y = ranged_df['différence']

        # Calculate regression for mean difference
        slope, intercept, slope_ci, intercept_ci = calculate_regression_with_ci(x, y)

        # Scatter plot
        ax.scatter(x, y, alpha=0.5)
        
        # Plot regression line for mean difference
        ax.plot(x, slope * x + intercept, color='black', linestyle='-', 
                label=f'Reg: y={slope:.2f}x + {intercept:.2f}')
        
        # Display slope and intercept with CI
        ax.text(0.02, 0.95, 
                f"Slope: {slope:.2f} ({slope_ci[0]:.2f}, {slope_ci[1]:.2f})\n"
                f"Intercept: {intercept:.2f} ({intercept_ci[0]:.2f}, {intercept_ci[1]:.2f})"
                ,
                transform=ax.transAxes, fontsize=10, verticalalignment='top', color='darkgreen')
        
        # Regression for upper limit
        z_upper = np.polyfit(ranged_df['masquées'], ranged_df['différence'] - mean_diff + upper_limit, 1)
        p_upper = np.poly1d(z_upper)
        ax.plot(ranged_df['masquées'], p_upper(ranged_df['masquées']), color='blue', linestyle='-', 
                label=f'+2σ')

        # Regression for lower limit
        z_lower = np.polyfit(ranged_df['masquées'], ranged_df['différence'] - mean_diff + lower_limit, 1)
        p_lower = np.poly1d(z_lower)
        ax.plot(ranged_df['masquées'], p_lower(ranged_df['masquées']), color='blue', linestyle='-', 
                label=f'-2σ')

        # Set titles, labels, and grid
        ax.set_title(title)
        ax.set_xlabel('Original Value')
        ax.set_ylabel('Difference (Imputed - Masked)')
        ax.set_ylim(-200, 200)
        ax.legend()
        ax.grid(alpha=0.3)
    
    # Adjust spacing between subplots
    plt.tight_layout(rect=[0, 0, 1, 0.95])
    plt.suptitle('Bland-Altman Plots with Regression Lines and CI', fontsize=16, y=0.98)
    plt.show()

# Example of calling the function with your datasets
plot_combined_graphs_2x2(mimic_lin_interpol, chu_lin_interpol, mimic_saits, chu_saits)
