# Cohen's d for DBNS vs SS and DBNS vs DBI for each technique

In [6]:
import pandas as pd
import numpy as np
from scipy.stats import ttest_ind

# Prepare data from all tables into a single DataFrame for analysis
data = {
    "Dataset": [
        "Pima India", "Pima India", "Pima India", "Pima India", "Pima India", "Pima India", "Pima India", "Pima India",
        "Parkinsons", "Parkinsons", "Parkinsons", "Parkinsons", "Parkinsons", "Parkinsons", "Parkinsons", "Parkinsons",
        "Heart", "Heart", "Heart", "Heart", "Heart", "Heart", "Heart", "Heart",
        "Hepatitis", "Hepatitis", "Hepatitis", "Hepatitis", "Hepatitis", "Hepatitis", "Hepatitis", "Hepatitis",
        "Ionosphere", "Ionosphere", "Ionosphere", "Ionosphere", "Ionosphere", "Ionosphere", "Ionosphere", "Ionosphere",
        "Anemia", "Anemia", "Anemia", "Anemia", "Anemia", "Anemia", "Anemia", "Anemia",
        "Syn1", "Syn1", "Syn1", "Syn1", "Syn1", "Syn1", "Syn1", "Syn1",
        "Syn2", "Syn2", "Syn2", "Syn2", "Syn2", "Syn2", "Syn2", "Syn2",
    ],
    "Technique": [
        "Proposed", "ODEM", "DGOF", "EBOD", "MFRFN", "ISBFK", "ROMD", "ODEC",
        "Proposed", "ODEM", "DGOF", "EBOD", "MFRFN", "ISBFK", "ROMD", "ODEC",
        "Proposed", "ODEM", "DGOF", "EBOD", "MFRFN", "ISBFK", "ROMD", "ODEC",
        "Proposed", "ODEM", "DGOF", "EBOD", "MFRFN", "ISBFK", "ROMD", "ODEC",
        "Proposed", "ODEM", "DGOF", "EBOD", "MFRFN", "ISBFK", "ROMD", "ODEC",
        "Proposed", "ODEM", "DGOF", "EBOD", "MFRFN", "ISBFK", "ROMD", "ODEC",
        "Proposed", "ODEM", "DGOF", "EBOD", "MFRFN", "ISBFK", "ROMD", "ODEC",
        "Proposed", "ODEM", "DGOF", "EBOD", "MFRFN", "ISBFK", "ROMD", "ODEC",
    ],
    "SS": [
        0.372, 0.150, 0.264, 0.224, 0.032, 0.027, 0.078, 0.181,
        0.662, 0.228, 0.379, 0.418, 0.228, 0.168, 0.272, 0.276,
        0.414, 0.153, 0.176, 0.0942, 0.133, 0.125, 0.138, 0.112,
        0.493, 0.161, 0.251, 0.26, 0.161, 0.05, 0.1, 0.106,
        0.645, 0.242, 0.314, 0.305, 0.019, 0.259, 0.177, 0.271,
        0.346, 0.211, 0.04, 0.03, 0.014, 0.014, 0.142, 0.15,
        0.944, 0.877, 0.039, 0.036, 0.877, 0.398, 0.877, 0.877,
        0.829, 0.565, 0.037, 0.033, 0.565, 0.225, 0.531, 0.531,
    ],
    "DBI": [
        0.927, 1.91, 4.429, 2.167, 3.173, 3.164, 2.351, 1.678,
        0.544, 1.395, 1.828, 1.293, 1.395, 2.097, 1.226, 1.213,
        0.947, 2.308, 3.922, 5.987, 2.118, 2.609, 2.218, 2.544,
        0.675, 2.265, 3.016, 2.282, 2.265, 2.568, 2.242, 2.177,
        0.844, 2.526, 4.367, 4.49, 3.107, 1.865, 2.2, 1.298,
        1.025, 1.938, 11.431, 14.847, 4.289, 4.289, 2.057, 2.01,
        0.08, 0.157, 10.077, 9.329, 0.157, 1.016, 0.157, 0.157,
        0.24, 0.719, 9.016, 8.798, 0.719, 2.973, 0.757, 0.757,
    ],
    "S/DB": [
        0.401, 0.078, 0.059, 0.103, 0.011, 0.009, 0.033, 0.108,
        1.216, 0.163, 0.207, 0.323, 0.163, 0.08, 0.222, 0.228,
        0.437, 0.066, 0.045, 0.0157, 0.063, 0.048, 0.062, 0.044,
        0.731, 0.071, 0.083, 0.114, 0.071, 0.019, 0.045, 0.048,
        0.765, 0.096, 0.072, 0.068, 0.006, 0.139, 0.08, 0.209,
        0.338, 0.109, 0.004, 0.002, 0.003, 0.003, 0.069, 0.075,
        11.744, 5.602, 0.004, 0.004, 5.601, 0.392, 5.602, 5.602,
        3.456, 0.785, 0.004, 0.004, 0.785, 0.076, 0.701, 0.701,
    ],
}

df = pd.DataFrame(data)

# Calculate Cohen's d for DBNS vs SS and DBNS vs DBI for each technique
results = []
for technique in df['Technique'].unique():
    subset = df[df['Technique'] == technique]
    cohen_d_ss = (subset['S/DB'].mean() - subset['SS'].mean()) / np.sqrt(
        (np.std(subset['S/DB'], ddof=1) ** 2 + np.std(subset['SS'], ddof=1) ** 2) / 2
    )
    cohen_d_dbi = (subset['S/DB'].mean() - subset['DBI'].mean()) / np.sqrt(
        (np.std(subset['S/DB'], ddof=1) ** 2 + np.std(subset['DBI'], ddof=1) ** 2) / 2
    )
    results.append((technique, cohen_d_ss, cohen_d_dbi))

# Convert results to DataFrame for analysis
effect_size_df = pd.DataFrame(results, columns=['Technique', "Cohen's d (DBNS vs SS)", "Cohen's d (DBNS vs DBI)"])
effect_size_df


Unnamed: 0,Technique,Cohen's d (S/DB vs SS),Cohen's d (S/DB vs DBI)
0,Proposed,0.648138,0.620672
1,ODEM,0.398461,-0.525876
2,DGOF,-1.191033,-2.334077
3,EBOD,-0.73912,-1.854162
4,MFRFN,0.420103,-0.782392
5,ISBFK,-0.479745,-3.574551
6,ROMD,0.407384,-0.537319
7,ODEC,0.411125,-0.41044


In [3]:
# Re-import necessary modules since environment was reset
import pandas as pd
import numpy as np

# Reconstruct the complete dataset based on user's earlier input
data = {
    "Dataset": ["Pima India"]*10 + ["Parkinsons"]*10 + ["Heart"]*10 + ["Hepatitis"]*10 +
               ["Ionosphere"]*10 + ["Anemia"]*10 + ["Syn1"]*10 + ["Syn2"]*10,
    "Technique": ["Proposed", "ODEM", "DGOF", "EBOD", "MFRFN", "ISBFK", "ROMD", "ODEC", "UDEC", "DLCA"] * 8,
    "SS": [0.372, 0.150, 0.264, 0.224, 0.032, 0.027, 0.078, 0.181, 0.196, 0.225,
           0.662, 0.228, 0.379, 0.418, 0.228, 0.168, 0.272, 0.276, 0.578, 0.469,
           0.414, 0.153, 0.176, 0.094, 0.133, 0.125, 0.138, 0.112, 0.237, 0.227,
           0.493, 0.161, 0.251, 0.260, 0.161, 0.050, 0.100, 0.106, 0.204, 0.235,
           0.645, 0.242, 0.314, 0.305, 0.019, 0.259, 0.177, 0.271, 0.467, 0.440,
           0.346, 0.211, 0.040, 0.030, 0.014, 0.014, 0.142, 0.150, 0.221, 0.205,
           0.944, 0.877, 0.039, 0.036, 0.877, 0.398, 0.877, 0.877, 0.690, 0.668,
           0.829, 0.565, 0.037, 0.033, 0.565, 0.225, 0.531, 0.531, 0.443, 0.420],
    "DBI": [0.927, 1.910, 4.429, 2.167, 3.173, 3.164, 2.351, 1.678, 1.906, 1.833,
            0.544, 1.395, 1.828, 1.293, 1.395, 2.097, 1.226, 1.213, 0.835, 1.109,
            0.947, 2.308, 3.922, 5.987, 2.118, 2.609, 2.218, 2.544, 1.712, 1.892,
            0.675, 2.265, 3.016, 2.282, 2.265, 2.568, 2.242, 2.177, 2.024, 1.795,
            0.844, 2.526, 4.367, 4.490, 3.107, 1.865, 2.200, 1.298, 1.274, 1.555,
            1.025, 1.938, 11.431, 14.847, 4.289, 4.289, 2.057, 2.010, 1.855, 1.961,
            0.080, 0.157, 10.077, 9.329, 0.157, 1.016, 0.157, 0.157, 0.470, 0.508,
            0.240, 0.719, 9.016, 8.798, 0.719, 2.973, 0.757, 0.757, 1.360, 0.951]
}

# Create DataFrame
df = pd.DataFrame(data)

# Calculate S/DB ratio
df["S/DB"] = df["SS"] / df["DBI"]

# Calculate Cohen's d for S/DB vs SS and S/DB vs DBI for each technique
results = []
for technique in df['Technique'].unique():
    subset = df[df['Technique'] == technique]
    cohen_d_ss = (subset['S/DB'].mean() - subset['SS'].mean()) / np.sqrt(
        (np.std(subset['S/DB'], ddof=1) ** 2 + np.std(subset['SS'], ddof=1) ** 2) / 2
    )
    cohen_d_dbi = (subset['S/DB'].mean() - subset['DBI'].mean()) / np.sqrt(
        (np.std(subset['S/DB'], ddof=1) ** 2 + np.std(subset['DBI'], ddof=1) ** 2) / 2
    )
    results.append((technique, cohen_d_ss, cohen_d_dbi))

# Convert results to DataFrame for analysis
effect_size_df = pd.DataFrame(results, columns=['Technique', "Cohen's d (S/DB vs SS)", "Cohen's d (S/DB vs DBI)"])
effect_size_df.sort_values(by="Technique", inplace=True)
effect_size_df.reset_index(drop=True, inplace=True)

# Display output
effect_size_df


Unnamed: 0,Technique,Cohen's d (S/DB vs SS),Cohen's d (S/DB vs DBI)
0,DGOF,-1.189934,-2.334055
1,DLCA,0.020775,-2.284607
2,EBOD,-0.738414,-1.854155
3,ISBFK,-0.480341,-3.574678
4,MFRFN,0.419913,-0.784878
5,ODEC,0.410842,-0.412839
6,ODEM,0.398326,-0.528379
7,Proposed,0.647412,0.620087
8,ROMD,0.407172,-0.539929
9,UDEC,0.096474,-1.974705
