<a href="https://colab.research.google.com/github/khosh90/prospect/blob/main/Imputation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imputation

Installations and functions


In [None]:
from google.colab import drive
drive.mount('/content/drive')
%cd '/content/drive/MyDrive/Colab Notebooks/Mini_DIVA'

Mounted at /content/drive
/content/drive/MyDrive/Colab Notebooks/Mini_DIVA


In [None]:
!pip install MIDASpy
!pip install statsmodels

In [None]:
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from utils import *
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import MIDASpy as md
import sklearn
import tensorflow as tf
print("Tensorflow version is: ", tf.__version__)
import seaborn as sns
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from sklearn.preprocessing import StandardScaler
from scipy import stats
from scipy.stats import f
from scipy import stats



Tensorflow version is:  2.11.1


In [None]:
def encode_categorical_columns(df, categorical_cols):
    df_encoded = df.copy()
    encoding_mappings = {}

    for col in categorical_cols:
        encoder = LabelEncoder()
        df_encoded[col] = encoder.fit_transform(df_encoded[col])
        encoding_mappings[col] = {
            i: category for i, category in enumerate(encoder.classes_)
        }

    return df_encoded, encoding_mappings


In [None]:

def numerical_backto_categorical(df, encoding_mappings):
    for col_index, col_name in enumerate(df.columns):
        if col_name in encoding_mappings:
            mapping = encoding_mappings[col_name]
            df[col_name] = df[col_name].apply(lambda x: mapping.get(int(x), x))
    return df

In [None]:
def decode_midas(imputations, cat_cols_list, cat_var):
    flat_cats = [cat for variable in cat_cols_list for cat in variable]
    data_flat = imputations.copy()
    _, cat_var = get_num_cat_vars(X)

    for i in range(len(data_flat)):
        tmp_cat = [data_flat[i][x].idxmax(axis=1) for x in cat_cols_list]
        cat_df = pd.DataFrame({cat_var[i]: tmp_cat[i] for i in range(len(cat_var))})
        data_flat[i] = pd.concat([data_flat[i], cat_df], axis=1).drop(flat_cats, axis=1)

    return data_flat


In [None]:

def no_impute_variables(df, threshold_identifier=0.9, threshold_cardinality=0.05):
    identifier_columns = []
    datetime_columns = []
    high_cardinality_columns = []
    free_text_columns = []

    for col in df.columns:
        # Check for Identifier Columns
        uniqueness = df[col].nunique() / len(df[col])
        if uniqueness > threshold_identifier:
            identifier_columns.append(col)

        # Check for Datetime Columns
        if pd.api.types.is_datetime64_any_dtype(df[col]):
            datetime_columns.append(col)

        # Check for High Cardinality Categorical Columns
        unique_ratio = df[col].nunique() / len(df[col])
        if unique_ratio > threshold_cardinality:
            high_cardinality_columns.append(col)

        # Check for Free Text Columns
        if df[col].dtype == 'O':
            free_text_columns.append(col)

    return {
        "Identifier Columns": identifier_columns,
        "Datetime Columns": datetime_columns,
        "High Cardinality Columns": high_cardinality_columns,
        "Free Text Columns": free_text_columns
    }

# Assuming 'credit' is your DataFrame
#result = no_impute_variables(credit)

# Display identified columns
#for category, columns in result.items():
#    print(f"{category}: {columns}")


In [None]:
def preprocess_midas(input_data):
    data_1 = input_data.copy()

    # Assuming these functions are defined somewhere
    num_col, cat_col = get_num_cat_vars(data_1)
    data_cat, cat_cols_list = md.cat_conv(data_1[cat_col])

    data_1.drop(cat_col, axis=1, inplace=True)

    constructor_list = [data_1, data_cat]
    data_in = pd.concat(constructor_list, axis=1)

    na_loc = data_in.isnull()
    data_in[na_loc] = np.nan

    return data_in, data_cat, cat_cols_list

# Example usage:
# processed_data = process_data(credit)


In [None]:

def numerical_rubin(selected_dataframes, columns_of_interest, m=10, alpha=0.05):
    # Step 1: Calculate Point Estimates for each imputed dataset and each dataframe
    point_estimates_by_dataframe = []
    for df in selected_dataframes:
      means = df.mean()
      point_estimates_by_dataframe.append(means)

    # Step 2: Calculate Within-Imputation Variance for each variable in each dataframe
    within_imputation_var_by_dataframe = []
    for df, means in zip(selected_dataframes, point_estimates_by_dataframe):
      within_imputation_var_for_variables = []
      for column in columns_of_interest:
        squared_deviations = (df[column] - means[column]) ** 2
        within_var_for_variable = squared_deviations.mean()
        within_imputation_var_for_variables.append(within_var_for_variable)
      within_imputation_var_by_dataframe.append(within_imputation_var_for_variables)

    # Step 3: Calculate Between-Imputation Variance for each variable in each dataframe
    between_imputation_var_for_variables = []
    for column in columns_of_interest:
      squared_deviations = [(df[column] - means[column]) ** 2 for df, means in zip(selected_dataframes, point_estimates_by_dataframe)]
      between_var_for_variable = np.mean(squared_deviations)
      between_imputation_var_for_variables.append(between_var_for_variable)

    # Step 4: Calculate Pooled Point Estimate
    pooled_point_estimate = np.mean(point_estimates_by_dataframe, axis=0)
    # Step 5: Calculate Pooled Within-Imputation Variance
    pooled_within_var = np.mean(within_imputation_var_by_dataframe, axis=0)
    # Step 6: Calculate Pooled Between-Imputation Variance
    pooled_between_var = np.mean(between_imputation_var_for_variables)
    # Step 7: Calculate Total Variance
    total_variance = pooled_within_var + pooled_between_var
    # Step 8: Calculate Standard Error
    se = np.sqrt(total_variance)
    # Step 9: Calculate Degrees of Freedom (if needed)
    df_lambda = (pooled_between_var + (pooled_between_var / m)) / total_variance
    old_df = (m - 1) / ((df_lambda) ** 2)
    # Step 10: Calculate t-statistic and p-value
    t_statistic = pooled_point_estimate / se
    p_value = 2 * (1 - stats.t.cdf(np.abs(t_statistic), df=old_df))

    # Step 11: Calculate 95% confidence interval for the original method
    t_critical = stats.t.ppf(1 - alpha / 2, df=old_df)
    margin_of_error = t_critical * se
    confidence_interval_lower = pooled_point_estimate - margin_of_error
    confidence_interval_upper = pooled_point_estimate + margin_of_error

    # Step 12: Summarize Results
    results_summary = pd.DataFrame({
        'Variable': columns_of_interest,
        'Pooled Estimate': pooled_point_estimate,
        'SE': se,
        't-statistic': t_statistic,
        'old df': old_df,
        'p-value': p_value,
        'CI': print(f"95% Confidence Interval: ({confidence_interval_lower}, {confidence_interval_upper})")
        #'Confidence Interval Lower': confidence_interval_lower,
        #'Confidence Interval Upper': confidence_interval_upper
    })
    return results_summary

In [None]:


def categorical_rubin(imputation, flat_cat_cols_list, m=10,decode_func=None):
    # Initialize lists to store results
    pooled_prop_results = []

    # Calculate proportion for each level and each imputed dataset
    for variable in flat_cat_cols_list:
        prop_values = []

        for i, imputed_data in enumerate(imputation):
            # Select the categorical variable from the current imputed dataset
            cat_data = imputed_data[variable]

            # Calculate frequency for each level
            frequency = cat_data.sum()

            # Calculate proportion for each level
            total_obs = len(cat_data)
            proportion = frequency / total_obs

            # Append the proportion values for Rubin's rules
            prop_values.append(proportion)

        # Rubin's rules for proportions
        pooled_proportion = np.mean(prop_values)
        pooled_variance = np.mean((prop_values - pooled_proportion)**2)

        # Store results in a dictionary
        result = {
            'Variable': variable,
            'Pooled Proportion': pooled_proportion,
            'Pooled Variance': pooled_variance
        }

        # Append the result to the list
        pooled_prop_results.append(result)

    # Convert results to DataFrame
    df_results = pd.DataFrame(pooled_prop_results)

    # Number of parameters (number of categorical variables)
    k = len(df_results)

    # Pooled proportions and variances
    pooled_proportions = df_results['Pooled Proportion'].values
    pooled_variances = df_results['Pooled Variance'].values

    # Pooled covariance matrix (diagonal matrix with pooled variances)
    V_T = np.diag(pooled_variances)

    # Calculate the relative increase in variance
    r_1 = (1 + 1/m) * np.trace(V_T) / k

    # Calculate the multivariate Wald statistic
    theta_0 = np.zeros(k)  # Assuming the null hypothesis is that all parameters are zero
    theta_bar = pooled_proportions
    D_1 = np.dot((theta_bar - theta_0).T, np.linalg.inv(V_T))
    D_1 = np.dot(D_1, (theta_bar - theta_0))

    # Degrees of freedom
    t = k * (m - 1)
    v_1 = 4 + (t - 4) * (1 + (1 - 2*t - 1) * r_1 - 1)**2 if t > 4 else t * (1 + k - 1) * (1 + r_1 - 1)**2 / 2

    # Calculate p-value
    p_value = 1 - f.cdf(D_1, k, v_1)

    # Display results
    print("Multivariate Wald Test Results:")
    print(f"Multivariate Wald Statistic (D_1): {D_1}")
    print(f"Degrees of Freedom (v_1): {v_1}")
    print(f"P-Value: {p_value}")

    return df_results, {'D_1': D_1, 'v_1': v_1, 'p_value': p_value}

# Example usage:
# Assuming you have imputation as a list of DataFrames and flat_cat_cols_list defined
# df_results, wald_results = categorical_rubin(imputation, flat_cat_cols_list)
# print(df_results)
# print(wald_results)


In [None]:

def evaluate_imputed_data(actual_data, imputed_data, confidence_interval=1.96):
    """
    Evaluate imputed data and calculate various metrics.

    Parameters:
    - actual_data: DataFrame representing the original dataset with true labels.
    - imputed_data: DataFrame representing the imputed dataset.
    - confidence_interval: Z-score for calculating lower and upper bounds (default is 1.96 for a 95% confidence interval).

    Returns:
    - evaluation_results: Dictionary containing evaluation metrics.
    """
    # Placeholder for results
    evaluation_results = {}

    # Assuming you have your original data stored in actual_data with the true labels
    true_labels = actual_data.to_numpy()

    # Assuming you have your imputed dataset stored in imputed_data
    imputed_data_np = imputed_data.to_numpy()

    # Calculate evaluation metrics
    raw_bias = np.mean(imputed_data_np - true_labels)
    lower_bound = imputed_data_np - confidence_interval  # Assuming 1.96 for a 95% confidence interval
    upper_bound = imputed_data_np + confidence_interval
    coverage_rate = np.mean((true_labels >= lower_bound) & (true_labels <= upper_bound))
    average_width = np.mean(upper_bound - lower_bound)
    rmse = np.sqrt(np.mean((imputed_data_np - true_labels)**2))
    mse = np.mean((imputed_data_np - true_labels)**2)
    r_squared = 1 - mse / np.var(true_labels)
    predictions = np.round(imputed_data_np)
    accuracy = np.mean(predictions == true_labels)

    # Store the results
    evaluation_results["Results"] = {
        'Raw Bias': raw_bias,
        'Coverage Rate': coverage_rate,
        'Average Width': average_width,
        'Root Mean Squared Error': rmse,
        'Accuracy': accuracy,
        'Mean Squared Error': mse,
        'R-squared': r_squared,
    }

    return evaluation_results

# Implementation

In [None]:
data, X, y, Xinds, yinds = read_dataset(dataset="credit")

In [None]:
num_var, cat_var = get_num_cat_vars(X)

In [None]:
datam, datam_ind = set_fraction_missing(X, fraction=0.2, random_state=42)

In [None]:
data_in, a, cat_cols_list = preprocess_midas(datam)


Unnamed: 0,A1_a,A1_b,A4_u,A4_y,A5_g,A5_gg,A5_p,A6_aa,A6_c,A6_cc,...,A11_19.0,A11_20.0,A11_23.0,A11_40.0,A11_67.0,A12_f,A12_t,A13_g,A13_p,A13_s
0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
1,,,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
2,1.0,0.0,,,1.0,0.0,0.0,,,,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
3,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
4,,,1.0,0.0,1.0,0.0,0.0,,,,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
648,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
649,1.0,0.0,,,1.0,0.0,0.0,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
650,,,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
651,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0


Check if the columns are suitable for imputation. Avoid imputing variables such as IDs, Datetimes, those with high cardinality (excluding numericals), and Free text. Note that high cardinality columns, if numerical, can be included in the imputation process for all variables. Since the high Cardinality columns are numerical we can implement the imputation on all variables.

In [None]:
not_impute = no_impute_variables(data_in)
not_impute

{'Identifier Columns': [],
 'Datetime Columns': [],
 'High Cardinality Columns': ['A2', 'A3', 'A8', 'A14', 'A15'],
 'Free Text Columns': []}

In [None]:
imputer = md.Midas(layer_structure= [128], vae_layer= False, seed= 42)
imputer.build_model(data_in, softmax_columns= b)
#imputer.overimpute(training_epochs= 5, report_ival= 1,
 #                  report_samples= 32, skip_plot = True)

Size index: [5, 2, 2, 3, 14, 9, 2, 2, 23, 2, 3]

Computation graph constructed



<MIDASpy.midas_base.Midas at 0x7966b49561d0>

Epoch: 0 , loss: 2611.080348205566


Epoch: 1 , loss: 2419.632500457764

Epoch: 2 , loss: 2323.5426231384276


Epoch: 3 , loss: 2239.863776397705

Epoch: 4 , loss: 1582.1817073822021

Epoch: 5 , loss: 2268.323371887207


In [None]:
imputer.train_model(training_epochs=5)

Model initialised

Epoch: 0 , loss: 2840.6561241149902
Epoch: 1 , loss: 2598.817559814453
Epoch: 2 , loss: 2506.788185119629
Epoch: 3 , loss: 2339.7757820129395
Epoch: 4 , loss: 1735.8692935943604
Training complete. Saving file...
Model saved in file: tmp/MIDAS


<MIDASpy.midas_base.Midas at 0x7966b49561d0>

In [None]:
#imputer.train_model(training_epochs=6)


If you run this line
```
imputer.train_model(training_epochs=6)
```
you will get:


Epoch: 0 , loss: 2620.795078277588 <br>
Epoch: 1 , loss: 2434.656007385254<br>
Epoch: 2 , loss: 2319.800427246094<br>
Epoch: 3 , loss: 2161.0723258972166<br>
Epoch: 4 , loss: 1601.0904134750367<br>
Epoch: 5 , loss: 2279.7784065246583<br>
Training complete. Saving file...<br>
Model saved in file: tmp/MIDAS<br>
<MIDASpy.midas_base.Midas at 0x7802afcafa90>

In [None]:
imputations = imputer.generate_samples(m=10).output_list
imputations

In [None]:
num_var

['A2', 'A3', 'A8', 'A14', 'A15']

In [None]:
selected_dataframes = [dataframe[num_var].copy() for dataframe in imputations]
selected_dataframes

Rubin's rule for numerical variables:

In [None]:
numerical_rubin(selected_dataframes, num_var, m=10, alpha=0.05)

95% Confidence Interval: ([-4975.11020101 -4995.76348533 -5000.99595723 -4863.02063592
 -9575.5321733 ], [ 5024.85239801  5003.84435676  4999.08015714  5143.03418271
 11050.89904873])


Unnamed: 0,Variable,Pooled Estimate,SE,t-statistic,old df,p-value,CI
0,A2,24.871099,2140.177339,0.011621,7.445435,0.991033,
1,A3,4.040436,2139.866458,0.001888,7.441109,0.998543,
2,A8,-0.9579,2140.276687,-0.000448,7.446817,0.999655,
3,A14,140.006773,2145.425456,0.065258,7.518734,0.94967,
4,A15,737.683438,5238.105085,0.14083,267.17009,0.88811,


In [None]:
imputations  # Your list of 10 imputed datasets

data_flat = X.copy()
flat_cat_cols_list = [col for sublist in b for col in sublist]
flat_cat_cols_list

In [None]:
imputations

And for categoricals:

In [None]:
df_results, wald_results = categorical_rubin(imputations, flat_cat_cols_list,m=10, decode_func=None)
print(df_results)
print(wald_results)

Multivariate Wald Test Results:
Multivariate Wald Statistic (D_1): 255903.16187681348
Degrees of Freedom (v_1): 4.094275756558672
P-Value: 1.821975903482098e-11
   Variable  Pooled Proportion  Pooled Variance
0      A1_a           0.323519         0.000012
1      A1_b           0.676481         0.000012
2      A4_u           0.756541         0.000020
3      A4_y           0.243459         0.000020
4      A5_g           0.757542         0.000016
..      ...                ...              ...
57    A12_f           0.539825         0.000029
58    A12_t           0.460175         0.000029
59    A13_g           0.845339         0.000034
60    A13_p           0.020516         0.000005
61    A13_s           0.134145         0.000028

[62 rows x 3 columns]
{'D_1': 255903.16187681348, 'v_1': 4.094275756558672, 'p_value': 1.821975903482098e-11}


In [None]:
#s = decode_midas(imputations, cat_cols_list, cat_var )
#ss = encode_categorical_columns(s, cat_var)



Decoding of the MIDAS categorical variables is complex, so in here I only check the evaluation only on numerical variables.

In [None]:

# Specify the columns of interest
columns_of_interest = ['A2', 'A3', 'A8', 'A14', 'A15']

# Placeholder for results
all_evaluation_results = {}
confidence_interval = 1.96
# Loop over each imputed dataset
for i, imputed_data in enumerate(imputations):
    # Extract the columns of interest from the imputed dataset
    imputed_data_subset = imputed_data[columns_of_interest]

    # Call the evaluate_imputed_data function for each dataset
    evaluation_results = evaluate_imputed_data(X[columns_of_interest], imputed_data_subset, confidence_interval)

    # Store the results in the dictionary
    all_evaluation_results[f'Imputation_{i + 1}'] = evaluation_results

# Print or analyze the evaluation results as needed
for imputation_name, results in all_evaluation_results.items():
    print(f"\nResults for {imputation_name}:")
    for key, value in results["Results"].items():
        print(f"{key}: {value}")



Results for Imputation_1:
Raw Bias: -65.8269281938739
Coverage Rate: 0.845635528330781
Average Width: 3.920000000000002
Root Mean Squared Error: 1020.4597303465616
Accuracy: 0.4113323124042879
Mean Squared Error: 1041338.0612589772
R-squared: 0.8162808206789844

Results for Imputation_2:
Raw Bias: -65.34054614654734
Coverage Rate: 0.8447166921898928
Average Width: 3.920000000000002
Root Mean Squared Error: 1021.8421455681176
Accuracy: 0.4104134762633997
Mean Squared Error: 1044161.3704592541
R-squared: 0.8157827153388059

Results for Imputation_3:
Raw Bias: -64.47852221024927
Coverage Rate: 0.8419601837672281
Average Width: 3.920000000000002
Root Mean Squared Error: 1020.1076673215841
Accuracy: 0.4104134762633997
Mean Squared Error: 1040619.6529282837
R-squared: 0.8164075666357901

Results for Imputation_4:
Raw Bias: -64.80671422910228
Coverage Rate: 0.8462480857580398
Average Width: 3.920000000000002
Root Mean Squared Error: 1020.1390336738639
Accuracy: 0.4098009188361409
Mean Square

# Iterative imputer

The process to investigate how is the performance of IterativeImputer from Slearn is similar to MIDAS. Only the decoding can be choose differently.

In [None]:
credit_num, credit_cat = get_num_cat_vars(X)
credit_encoded, credit_mappings = encode_categorical_columns(X, credit_cat)


In [None]:
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.ensemble import RandomForestRegressor
import pandas as pd
import numpy as np

# Assuming credit_encoded is your original dataset
fraction = 0.25
data, data_ind = set_fraction_missing(credit_encoded, fraction=fraction, random_state=42)

# Initialize the Random Forest imputer
rf_imputer = IterativeImputer(estimator=RandomForestRegressor(), random_state=42)

# Initialize an empty list to store the imputed datasets
imputed_datasets = []

# Generate three different imputed datasets using a for loop
for _ in range(3):
    imputed_data = pd.DataFrame(rf_imputer.fit_transform(data), columns=credit_encoded.columns)
    imputed_datasets.append(imputed_data)

In [None]:
imputed_datasets

[       A1       A2        A3   A4   A5     A6    A7       A8    A9  A10  A11  \
 0    1.00  30.8300   0.00000  1.0  0.0  12.00  7.00  1.98805  0.63  1.0  1.0   
 1    0.56  58.6700   4.46000  1.0  0.0  10.00  3.00  3.04000  1.00  1.0  6.0   
 2    0.00  24.5000   0.50000  1.0  0.0   4.04  3.00  1.50000  1.00  0.0  0.0   
 3    1.00  27.8300   1.54000  1.0  0.0  12.00  7.00  3.75000  1.00  1.0  5.0   
 4    0.87  20.1700   3.97150  1.0  0.0   6.61  7.00  1.71000  0.31  0.0  0.0   
 ..    ...      ...       ...  ...  ...    ...   ...      ...   ...  ...  ...   
 648  1.00  23.8226  10.08500  2.0  2.0   4.00  3.00  1.25000  0.22  0.0  0.0   
 649  0.00  22.6700   3.65005  1.0  0.0   6.75  6.04  2.00000  0.63  1.0  2.0   
 650  0.67  25.2500   3.70460  2.0  2.0   5.00  2.00  2.00000  0.00  1.0  1.0   
 651  1.00  27.2682   0.20500  1.0  0.0   0.00  5.39  0.04000  0.00  0.0  0.0   
 652  1.00  36.3972   3.37500  1.0  0.0   1.00  2.91  8.29000  0.00  0.0  0.0   
 
      A12  A13     A14    

In [None]:
columns_of_interest = ['A2', 'A3', 'A8', 'A14', 'A15']

# Placeholder for results
all_evaluation_results = {}
confidence_interval = 1.96
# Loop over each imputed dataset
for i, imputed_data in enumerate(imputed_datasets):
    # Extract the columns of interest from the imputed dataset
    imputed_data_subset = imputed_data[columns_of_interest]

    # Call the evaluate_imputed_data function for each dataset
    evaluation_results = evaluate_imputed_data(X[columns_of_interest], imputed_data_subset, confidence_interval)

    # Store the results in the dictionary
    all_evaluation_results[f'Imputation_{i + 1}'] = evaluation_results

# Print or analyze the evaluation results as needed
for imputation_name, results in all_evaluation_results.items():
    print(f"\nResults for {imputation_name}:")
    for key, value in results["Results"].items():
        print(f"{key}: {value}")


Results for Imputation_1:
Raw Bias: -13.765191837672285
Coverage Rate: 0.8131699846860643
Average Width: 3.920000000000003
Root Mean Squared Error: 1151.739251754756
Accuracy: 0.3816232771822358
Mean Squared Error: 1326503.3040326054
R-squared: 0.7659702382443914

Results for Imputation_2:
Raw Bias: -9.674623062787134
Coverage Rate: 0.8159264931087289
Average Width: 3.920000000000003
Root Mean Squared Error: 1183.94881589213
Accuracy: 0.38284839203675347
Mean Squared Error: 1401734.7986523765
R-squared: 0.7526974414794987

Results for Imputation_3:
Raw Bias: -10.489341393568143
Coverage Rate: 0.8134762633996937
Average Width: 3.9200000000000026
Root Mean Squared Error: 1276.2722452881478
Accuracy: 0.38254211332312404
Mean Squared Error: 1628870.8440928499
R-squared: 0.7126247221436722
