# with BMI

In [31]:
# !pip install itables

import pandas as pd
from scipy.stats import f_oneway, levene, kruskal, mannwhitneyu
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from statsmodels.stats.multitest import multipletests
import itables.interactive
from itables import show

# Load the data
with_bmi = pd.read_excel('./with_BMI.xlsx')

# Combine all columns into a single DataFrame for analysis
all_columns_with_bmi_df = with_bmi[['actual_casetime_minutes', 'scheduled_duration', 'with_bmi_with_scheduled_duration', 'with_bmi_without_scheduled_duration']]

# Summary statistics for all columns
summary_all_columns_with_bmi = all_columns_with_bmi_df.describe()
print("Summary Statistics for All Columns (with BMI):")
show(summary_all_columns_with_bmi)

# Function to perform ANOVA and Tukey's HSD test or Kruskal-Wallis test
def perform_tests_all_columns_with_bmi(df):
    # Perform Levene's test for homogeneity of variances
    levene_test_result = levene(
        df['actual_casetime_minutes'],
        df['scheduled_duration'],
        df['with_bmi_with_scheduled_duration'],
        df['with_bmi_without_scheduled_duration']
    )
    print("Levene's Test Result:")
    print(f"Statistic: {levene_test_result.statistic}")
    print(f"p-value: {levene_test_result.pvalue}")

    if levene_test_result.pvalue >= 0.05:
        # Perform ANOVA test
        anova_result = f_oneway(
            df['actual_casetime_minutes'],
            df['scheduled_duration'],
            df['with_bmi_with_scheduled_duration'],
            df['with_bmi_without_scheduled_duration']
        )
        print("ANOVA Test Result:")
        print(f"F-statistic: {anova_result.statistic}")
        print(f"p-value: {anova_result.pvalue}")

        # Prepare the data for Tukey's test
        data_long = df.melt(var_name='Group', value_name='Value')

        # Fit the model for Tukey's HSD test
        tukey_result = pairwise_tukeyhsd(endog=data_long['Value'], groups=data_long['Group'], alpha=0.05)
        print("Multiple Comparison of Means - Tukey HSD, FWER=0.05")

        tukey_summary = pd.DataFrame(data=tukey_result.summary().data[1:], columns=tukey_result.summary().data[0])
        show(tukey_summary)
    else:
        # Perform Kruskal-Wallis test
        kruskal_result = kruskal(
            df['actual_casetime_minutes'],
            df['scheduled_duration'],
            df['with_bmi_with_scheduled_duration'],
            df['with_bmi_without_scheduled_duration']
        )
        print("Kruskal-Wallis Test Result:")
        print(f"H-statistic: {kruskal_result.statistic}")
        print(f"p-value: {kruskal_result.pvalue}")

        # Prepare the data for pairwise comparisons
        data_long = df.melt(var_name='Group', value_name='Value')

        # Perform pairwise Mann-Whitney U tests with Holm-Bonferroni correction
        groups = data_long['Group'].unique()
        comparisons = [(group1, group2) for i, group1 in enumerate(groups) for group2 in groups[i+1:]]
        p_values = []
        meandiffs = []
        for group1, group2 in comparisons:
            data1 = data_long[data_long['Group'] == group1]['Value']
            data2 = data_long[data_long['Group'] == group2]['Value']
            stat, p = mannwhitneyu(data1, data2)
            p_values.append(p)
            meandiffs.append(data1.mean() - data2.mean())
        p_adjusted = multipletests(p_values, method='holm')[1]

        result_summary = pd.DataFrame({
            'group1': [comp[0] for comp in comparisons],
            'group2': [comp[1] for comp in comparisons],
            'meandiff': meandiffs,
            'p-adj': p_adjusted,
            'reject': p_adjusted < 0.05
        })

        print("Multiple Comparison of Means - Mann-Whitney U Test with Holm-Bonferroni Correction")
        show(result_summary)

# Perform analysis for all columns with BMI
print("\nAnalysis for All Columns (with BMI):")
perform_tests_all_columns_with_bmi(all_columns_with_bmi_df)


Summary Statistics for All Columns (with BMI):


Unnamed: 0,actual_casetime_minutes,scheduled_duration,with_bmi_with_scheduled_duration,with_bmi_without_scheduled_duration
Loading ITables v2.1.4 from the internet... (need help?),,,,



Analysis for All Columns (with BMI):
Levene's Test Result:
Statistic: 1.159349288957427
p-value: 0.3236833340651443
ANOVA Test Result:
F-statistic: 17.558765829878805
p-value: 2.314940204654016e-11
Multiple Comparison of Means - Tukey HSD, FWER=0.05


group1,group2,meandiff,p-adj,lower,upper,reject
Loading ITables v2.1.4 from the internet... (need help?),,,,,,


In [32]:
import pandas as pd
from scipy.stats import f_oneway, levene, kruskal, mannwhitneyu
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from statsmodels.stats.multitest import multipletests
import itables.interactive
from itables import show

# Load the data
with_bmi = pd.read_excel('./with_BMI.xlsx')

# Calculate absolute errors
absolute_error_scheduled = abs(with_bmi['scheduled_duration'] - with_bmi['actual_casetime_minutes'])
absolute_error_with_scheduled = abs(with_bmi['with_bmi_with_scheduled_duration'] - with_bmi['actual_casetime_minutes'])
absolute_error_without_scheduled = abs(with_bmi['with_bmi_without_scheduled_duration'] - with_bmi['actual_casetime_minutes'])

# Calculate squared errors
squared_error_scheduled = (with_bmi['scheduled_duration'] - with_bmi['actual_casetime_minutes']) ** 2
squared_error_with_scheduled = (with_bmi['with_bmi_with_scheduled_duration'] - with_bmi['actual_casetime_minutes']) ** 2
squared_error_without_scheduled = (with_bmi['with_bmi_without_scheduled_duration'] - with_bmi['actual_casetime_minutes']) ** 2

# Combine errors into DataFrames for analysis
absolute_errors_df = pd.DataFrame({
    'actual_casetime_minutes': with_bmi['actual_casetime_minutes'],
    'scheduled_duration': absolute_error_scheduled,
    'with_bmi_with_scheduled_duration': absolute_error_with_scheduled,
    'with_bmi_without_scheduled_duration': absolute_error_without_scheduled
})

squared_errors_df = pd.DataFrame({
    'actual_casetime_minutes': with_bmi['actual_casetime_minutes'],
    'scheduled_duration': squared_error_scheduled,
    'with_bmi_with_scheduled_duration': squared_error_with_scheduled,
    'with_bmi_without_scheduled_duration': squared_error_without_scheduled
})

# Function to perform ANOVA and Tukey's HSD test or Kruskal-Wallis test
def perform_tests(errors_df, error_type):
    # Perform Levene's test for homogeneity of variances
    levene_test_result = levene(
        errors_df['scheduled_duration'],
        errors_df['with_bmi_with_scheduled_duration'],
        errors_df['with_bmi_without_scheduled_duration']
    )
    print(f"Levene's Test Result for {error_type}:")
    print(f"Statistic: {levene_test_result.statistic}")
    print(f"p-value: {levene_test_result.pvalue}")

    if levene_test_result.pvalue >= 0.05:
        # Perform ANOVA test
        anova_result = f_oneway(
            errors_df['scheduled_duration'],
            errors_df['with_bmi_with_scheduled_duration'],
            errors_df['with_bmi_without_scheduled_duration']
        )
        print(f"ANOVA Test Result for {error_type}:")
        print(f"F-statistic: {anova_result.statistic}")
        print(f"p-value: {anova_result.pvalue}")

        # Prepare the data for Tukey's test
        data_long = errors_df.melt(id_vars=['actual_casetime_minutes'], var_name='Group', value_name='Error')

        # Fit the model for Tukey's HSD test
        tukey_result = pairwise_tukeyhsd(endog=data_long['Error'], groups=data_long['Group'], alpha=0.05)
        print(f"Multiple Comparison of Means - Tukey HSD, FWER=0.05 for {error_type}")

        tukey_summary = pd.DataFrame(data=tukey_result.summary().data[1:], columns=tukey_result.summary().data[0])
        show(tukey_summary)
    else:
        # Perform Kruskal-Wallis test
        kruskal_result = kruskal(
            errors_df['scheduled_duration'],
            errors_df['with_bmi_with_scheduled_duration'],
            errors_df['with_bmi_without_scheduled_duration']
        )
        print(f"Kruskal-Wallis Test Result for {error_type}:")
        print(f"H-statistic: {kruskal_result.statistic}")
        print(f"p-value: {kruskal_result.pvalue}")

        # Prepare the data for pairwise comparisons
        data_long = errors_df.melt(id_vars=['actual_casetime_minutes'], var_name='Group', value_name='Error')

        # Perform pairwise Mann-Whitney U tests with Holm-Bonferroni correction
        groups = data_long['Group'].unique()
        comparisons = [(group1, group2) for i, group1 in enumerate(groups) for group2 in groups[i+1:]]
        p_values = []
        meandiffs = []
        for group1, group2 in comparisons:
            data1 = data_long[data_long['Group'] == group1]['Error']
            data2 = data_long[data_long['Group'] == group2]['Error']
            stat, p = mannwhitneyu(data1, data2)
            p_values.append(p)
            meandiffs.append(data1.mean() - data2.mean())
        p_adjusted = multipletests(p_values, method='holm')[1]

        result_summary = pd.DataFrame({
            'group1': [comp[0] for comp in comparisons],
            'group2': [comp[1] for comp in comparisons],
            'meandiff': meandiffs,
            'p-adj': p_adjusted,
            'reject': p_adjusted < 0.05
        })

        print(f"Multiple Comparison of Means - Mann-Whitney U Test with Holm-Bonferroni Correction for {error_type}")
        show(result_summary)

# Summary statistics for absolute errors
summary_absolute_errors = absolute_errors_df.describe()
print("Summary Statistics for Absolute Errors:")
show(summary_absolute_errors)

# Perform analysis for absolute errors
print("\nAnalysis for Absolute Errors:")
perform_tests(absolute_errors_df, "Absolute Error")


# Summary statistics for squared errors
summary_squared_errors = squared_errors_df.describe()
print("Summary Statistics for Squared Errors:")
show(summary_squared_errors)

# Perform analysis for squared errors
print("\nAnalysis for Squared Errors:")
perform_tests(squared_errors_df, "Squared Error")


Summary Statistics for Absolute Errors:


Unnamed: 0,actual_casetime_minutes,scheduled_duration,with_bmi_with_scheduled_duration,with_bmi_without_scheduled_duration
Loading ITables v2.1.4 from the internet... (need help?),,,,



Analysis for Absolute Errors:
Levene's Test Result for Absolute Error:
Statistic: 13.604775953474679
p-value: 1.2689986267208049e-06
Kruskal-Wallis Test Result for Absolute Error:
H-statistic: 174.41680199137474
p-value: 1.3362037060589006e-38
Multiple Comparison of Means - Mann-Whitney U Test with Holm-Bonferroni Correction for Absolute Error


group1,group2,meandiff,p-adj,reject
Loading ITables v2.1.4 from the internet... (need help?),,,,


Summary Statistics for Squared Errors:


Unnamed: 0,actual_casetime_minutes,scheduled_duration,with_bmi_with_scheduled_duration,with_bmi_without_scheduled_duration
Loading ITables v2.1.4 from the internet... (need help?),,,,



Analysis for Squared Errors:
Levene's Test Result for Squared Error:
Statistic: 7.024467114745199
p-value: 0.0008963961956377259
Kruskal-Wallis Test Result for Squared Error:
H-statistic: 174.41680199137474
p-value: 1.3362037060589006e-38
Multiple Comparison of Means - Mann-Whitney U Test with Holm-Bonferroni Correction for Squared Error


group1,group2,meandiff,p-adj,reject
Loading ITables v2.1.4 from the internet... (need help?),,,,


#without BMI

In [29]:
import pandas as pd
from scipy.stats import f_oneway, levene, kruskal, mannwhitneyu
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from statsmodels.stats.multitest import multipletests
import itables.interactive
from itables import show

# Load the data
without_bmi = pd.read_excel('./without_BMI.xlsx')

# Combine all columns into a single DataFrame for analysis
all_columns_df = without_bmi[['actual_casetime_minutes', 'scheduled_duration', 'without_bmi_with_scheduled_duration', 'without_bmi_without_scheduled_duration']]

# Summary statistics for all columns
summary_all_columns = all_columns_df.describe()
print("Summary Statistics for All Columns:")
show(summary_all_columns)

# Function to perform ANOVA and Tukey's HSD test or Kruskal-Wallis test
def perform_tests_all_columns(df):
    # Perform Levene's test for homogeneity of variances
    levene_test_result = levene(
        df['actual_casetime_minutes'],
        df['scheduled_duration'],
        df['without_bmi_with_scheduled_duration'],
        df['without_bmi_without_scheduled_duration']
    )
    print("Levene's Test Result:")
    print(f"Statistic: {levene_test_result.statistic}")
    print(f"p-value: {levene_test_result.pvalue}")

    if levene_test_result.pvalue >= 0.05:
        # Perform ANOVA test
        anova_result = f_oneway(
            df['actual_casetime_minutes'],
            df['scheduled_duration'],
            df['without_bmi_with_scheduled_duration'],
            df['without_bmi_without_scheduled_duration']
        )
        print("ANOVA Test Result:")
        print(f"F-statistic: {anova_result.statistic}")
        print(f"p-value: {anova_result.pvalue}")

        # Prepare the data for Tukey's test
        data_long = df.melt(var_name='Group', value_name='Value')

        # Fit the model for Tukey's HSD test
        tukey_result = pairwise_tukeyhsd(endog=data_long['Value'], groups=data_long['Group'], alpha=0.05)
        print("Multiple Comparison of Means - Tukey HSD, FWER=0.05")

        tukey_summary = pd.DataFrame(data=tukey_result.summary().data[1:], columns=tukey_result.summary().data[0])
        show(tukey_summary)
    else:
        # Perform Kruskal-Wallis test
        kruskal_result = kruskal(
            df['actual_casetime_minutes'],
            df['scheduled_duration'],
            df['without_bmi_with_scheduled_duration'],
            df['without_bmi_without_scheduled_duration']
        )
        print("Kruskal-Wallis Test Result:")
        print(f"H-statistic: {kruskal_result.statistic}")
        print(f"p-value: {kruskal_result.pvalue}")

        # Prepare the data for pairwise comparisons
        data_long = df.melt(var_name='Group', value_name='Value')

        # Perform pairwise Mann-Whitney U tests with Holm-Bonferroni correction
        groups = data_long['Group'].unique()
        comparisons = [(group1, group2) for i, group1 in enumerate(groups) for group2 in groups[i+1:]]
        p_values = []
        meandiffs = []
        for group1, group2 in comparisons:
            data1 = data_long[data_long['Group'] == group1]['Value']
            data2 = data_long[data_long['Group'] == group2]['Value']
            stat, p = mannwhitneyu(data1, data2)
            p_values.append(p)
            meandiffs.append(data1.mean() - data2.mean())
        p_adjusted = multipletests(p_values, method='holm')[1]

        result_summary = pd.DataFrame({
            'group1': [comp[0] for comp in comparisons],
            'group2': [comp[1] for comp in comparisons],
            'meandiff': meandiffs,
            'p-adj': p_adjusted,
            'reject': p_adjusted < 0.05
        })

        print("Multiple Comparison of Means - Mann-Whitney U Test with Holm-Bonferroni Correction")
        show(result_summary)

# Perform analysis for all columns
print("\nAnalysis for All Columns:")
perform_tests_all_columns(all_columns_df)


Summary Statistics for All Columns:


Unnamed: 0,actual_casetime_minutes,scheduled_duration,without_bmi_with_scheduled_duration,without_bmi_without_scheduled_duration
Loading ITables v2.1.4 from the internet... (need help?),,,,



Analysis for All Columns:
Levene's Test Result:
Statistic: 4.382916027412893
p-value: 0.004337423089827527
Kruskal-Wallis Test Result:
H-statistic: 316.1805651200377
p-value: 3.129949971413688e-68
Multiple Comparison of Means - Mann-Whitney U Test with Holm-Bonferroni Correction


group1,group2,meandiff,p-adj,reject
Loading ITables v2.1.4 from the internet... (need help?),,,,


In [28]:
import pandas as pd
from scipy.stats import f_oneway, levene, kruskal, mannwhitneyu
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from statsmodels.stats.multitest import multipletests
import itables.interactive
from itables import show

# Load the data
without_bmi = pd.read_excel('./without_BMI.xlsx')

# Calculate absolute errors
absolute_error_scheduled = abs(without_bmi['scheduled_duration'] - without_bmi['actual_casetime_minutes'])
absolute_error_with_scheduled = abs(without_bmi['without_bmi_with_scheduled_duration'] - without_bmi['actual_casetime_minutes'])
absolute_error_without_scheduled = abs(without_bmi['without_bmi_without_scheduled_duration'] - without_bmi['actual_casetime_minutes'])

# Calculate squared errors
squared_error_scheduled = (without_bmi['scheduled_duration'] - without_bmi['actual_casetime_minutes']) ** 2
squared_error_with_scheduled = (without_bmi['without_bmi_with_scheduled_duration'] - without_bmi['actual_casetime_minutes']) ** 2
squared_error_without_scheduled = (without_bmi['without_bmi_without_scheduled_duration'] - without_bmi['actual_casetime_minutes']) ** 2

# Combine errors into DataFrames for analysis
absolute_errors_df = pd.DataFrame({
    'actual_casetime_minutes': without_bmi['actual_casetime_minutes'],
    'scheduled_duration': absolute_error_scheduled,
    'without_bmi_with_scheduled_duration': absolute_error_with_scheduled,
    'without_bmi_without_scheduled_duration': absolute_error_without_scheduled
})

squared_errors_df = pd.DataFrame({
    'actual_casetime_minutes': without_bmi['actual_casetime_minutes'],
    'scheduled_duration': squared_error_scheduled,
    'without_bmi_with_scheduled_duration': squared_error_with_scheduled,
    'without_bmi_without_scheduled_duration': squared_error_without_scheduled
})


# Function to perform ANOVA and Tukey's HSD test or Kruskal-Wallis test
def perform_tests(errors_df, error_type):
    # Perform Levene's test for homogeneity of variances
    levene_test_result = levene(
        errors_df['scheduled_duration'],
        errors_df['without_bmi_with_scheduled_duration'],
        errors_df['without_bmi_without_scheduled_duration']
    )
    print(f"Levene's Test Result for {error_type}:")
    print(f"Statistic: {levene_test_result.statistic}")
    print(f"p-value: {levene_test_result.pvalue}")

    if levene_test_result.pvalue >= 0.05:
        # Perform ANOVA test
        anova_result = f_oneway(
            errors_df['scheduled_duration'],
            errors_df['without_bmi_with_scheduled_duration'],
            errors_df['without_bmi_without_scheduled_duration']
        )
        print(f"ANOVA Test Result for {error_type}:")
        print(f"F-statistic: {anova_result.statistic}")
        print(f"p-value: {anova_result.pvalue}")

        # Prepare the data for Tukey's test
        data_long = errors_df.melt(id_vars=['actual_casetime_minutes'], var_name='Group', value_name='Error')

        # Fit the model for Tukey's HSD test
        tukey_result = pairwise_tukeyhsd(endog=data_long['Error'], groups=data_long['Group'], alpha=0.05)
        print(f"Multiple Comparison of Means - Tukey HSD, FWER=0.05 for {error_type}")

        tukey_summary = pd.DataFrame(data=tukey_result.summary().data[1:], columns=tukey_result.summary().data[0])
        show(tukey_summary)
    else:
        # Perform Kruskal-Wallis test
        kruskal_result = kruskal(
            errors_df['scheduled_duration'],
            errors_df['without_bmi_with_scheduled_duration'],
            errors_df['without_bmi_without_scheduled_duration']
        )
        print(f"Kruskal-Wallis Test Result for {error_type}:")
        print(f"H-statistic: {kruskal_result.statistic}")
        print(f"p-value: {kruskal_result.pvalue}")

        # Prepare the data for pairwise comparisons
        data_long = errors_df.melt(id_vars=['actual_casetime_minutes'], var_name='Group', value_name='Error')

        # Perform pairwise Mann-Whitney U tests with Holm-Bonferroni correction
        groups = data_long['Group'].unique()
        comparisons = [(group1, group2) for i, group1 in enumerate(groups) for group2 in groups[i+1:]]
        p_values = []
        meandiffs = []
        for group1, group2 in comparisons:
            data1 = data_long[data_long['Group'] == group1]['Error']
            data2 = data_long[data_long['Group'] == group2]['Error']
            stat, p = mannwhitneyu(data1, data2)
            p_values.append(p)
            meandiffs.append(data1.mean() - data2.mean())
        p_adjusted = multipletests(p_values, method='holm')[1]

        result_summary = pd.DataFrame({
            'group1': [comp[0] for comp in comparisons],
            'group2': [comp[1] for comp in comparisons],
            'meandiff': meandiffs,
            'p-adj': p_adjusted,
            'reject': p_adjusted < 0.05
        })

        print(f"Multiple Comparison of Means - Mann-Whitney U Test with Holm-Bonferroni Correction for {error_type}")
        show(result_summary)

# Summary statistics for absolute errors
summary_absolute_errors = absolute_errors_df.describe()
print("Summary Statistics for Absolute Errors:")
show(summary_absolute_errors)

# Perform analysis for absolute errors
print("\nAnalysis for Absolute Errors:")
perform_tests(absolute_errors_df, "Absolute Error")


# Summary statistics for squared errors
summary_squared_errors = squared_errors_df.describe()
print("Summary Statistics for Squared Errors:")
show(summary_squared_errors)

# Perform analysis for squared errors
print("\nAnalysis for Squared Errors:")
perform_tests(squared_errors_df, "Squared Error")


Summary Statistics for Absolute Errors:


Unnamed: 0,actual_casetime_minutes,scheduled_duration,without_bmi_with_scheduled_duration,without_bmi_without_scheduled_duration
Loading ITables v2.1.4 from the internet... (need help?),,,,



Analysis for Absolute Errors:
Levene's Test Result for Absolute Error:
Statistic: 14.417198123718949
p-value: 5.589829432521733e-07
Kruskal-Wallis Test Result for Absolute Error:
H-statistic: 255.83117819586272
p-value: 2.798759014240766e-56
Multiple Comparison of Means - Mann-Whitney U Test with Holm-Bonferroni Correction for Absolute Error


group1,group2,meandiff,p-adj,reject
Loading ITables v2.1.4 from the internet... (need help?),,,,


Summary Statistics for Squared Errors:


Unnamed: 0,actual_casetime_minutes,scheduled_duration,without_bmi_with_scheduled_duration,without_bmi_without_scheduled_duration
Loading ITables v2.1.4 from the internet... (need help?),,,,



Analysis for Squared Errors:
Levene's Test Result for Squared Error:
Statistic: 8.832860114987202
p-value: 0.00014696319261536193
Kruskal-Wallis Test Result for Squared Error:
H-statistic: 255.83117819586272
p-value: 2.798759014240766e-56
Multiple Comparison of Means - Mann-Whitney U Test with Holm-Bonferroni Correction for Squared Error


group1,group2,meandiff,p-adj,reject
Loading ITables v2.1.4 from the internet... (need help?),,,,
