Group Size are Equal

In [6]:
import numpy as np
import pandas as pd
from scipy.stats import f, studentized_range

In [52]:
class Duncan:
    
    def __init__(self):
        self.f_stats = None
        self.p_value = None
        self.group_means = []
        self.group_sizes = []
        self.msw = None
        self.msb = None
        self.dcr = None
        self.se = None
        self.df_within = None
        self.duncan_results = []  # Store results here

    # Fit the ANOVA model to the data
    def fit(self, data, value_col, group_col):
        self.data = data
        self.value_col = value_col
        self.group_col = group_col

        # Get unique groups and their respective means and sizes
        groups = data[group_col].unique()
        self.group_means = []
        self.group_sizes = []
        grand_mean = data[value_col].mean()

        # Compute the Sum of Squares Within (SSW)
        ssw = 0
        for g in groups:
            group_data = data[data[group_col] == g][value_col]
            m = group_data.mean()
            n = len(group_data)
            ssw += ((group_data - m) ** 2).sum()

            self.group_means.append(m)
            self.group_sizes.append(n)

        # Compute the Total Sum of Squares (SST)
        group_data = data[value_col]
        sst = ((group_data - grand_mean) ** 2).sum()

        # Compute the Between-group Sum of Squares (SSB)
        ssb = sst - ssw

        # Degrees of freedom
        dfb = len(groups) - 1
        self.df_within = len(data) - len(groups)

        # Mean Square Between and Mean Square Within
        self.msb = ssb / dfb
        self.msw = ssw / self.df_within

        # F-statistic
        self.f_stats = self.msb / self.msw
        self.p_value = 1 - f.cdf(self.f_stats, dfb, self.df_within)

        print(f'F-Value: {self.f_stats}')
        print(f'P-Value: {self.p_value}')

    # Make predictions based on the ANOVA result
    def predict(self):
        if self.p_value < 0.05:
            print("Reject the null hypothesis: At least one group mean is different.")
        else:
            print("Fail to reject the null hypothesis: No significant difference between group means.")

    # Function to get the q-value for Duncan's Test from a Studentized Range distribution
    def get_q_value(self, dfw, num_groups, alpha=0.05):
        """
        This function returns the q-value for Duncan's test based on the given dfw
        (degrees of freedom for error) and number of groups.
        """
        # Calculate the critical value for the studentized range distribution
        q_value = studentized_range.ppf(1 - alpha, num_groups, dfw)
        return q_value

    # Perform Duncan's Test for pairwise comparisons
    def duncan_test(self, alpha=0.05):
        # Check if ANOVA is significant
        if self.p_value >= alpha:
            print("ANOVA is not significant, skipping Duncan's Test.")
            return
        
        # Compute the Standard Error (SE)
        n = self.group_sizes[0]  # Assuming equal sample sizes for simplicity
        self.se = np.sqrt(self.msw / n)

        # Look up the q-value for the given degrees of freedom and number of groups
        num_groups = len(self.group_means)
        q_value = self.get_q_value(self.df_within, num_groups)

        if q_value:
            self.dcr = q_value * self.se
            print(f"Duncan Critical Range (DCR): {self.dcr}")
        else:
            return

        # Perform pairwise comparisons
        comparisons = []
        for i in range(num_groups):
            for j in range(i + 1, num_groups):
                diff = np.abs(self.group_means[i] - self.group_means[j])
                comparisons.append((f'Group {i+1} vs Group {j+1}', diff))

        # Store all the results in self.duncan_results
        self.duncan_results = []
        for comparison in comparisons:
            groups_pair, diff = comparison
            result = {
                'pair': groups_pair,
                'difference': diff,
                'significant': diff >= self.dcr
            }
            if result['significant']:  # Only store significant results
                self.duncan_results.append(result)

        # Display only significant comparisons
        if self.duncan_results:
            print("\nDuncan's Test Significant Results:")
            for result in self.duncan_results:
                print(f"Comparing {result['pair']}: Difference = {result['difference']} -> Significant Difference")
        else:
            print("No significant differences found in Duncan's test.")

    # Access all Duncan test results
    def get_all_duncan_results(self):
        return self.duncan_results

In [76]:
df = pd.read_excel(r"C:\Users\saqli\Desktop\FULL DataSet.xlsx")
df_new = pd.read_excel(r"C:\Users\saqli\Desktop\df_replicate_3.xlsx")
df_replicate = pd.concat([df], ignore_index=True)

In [77]:
model = Duncan()
model.fit(df_replicate, 'delta_E', 'Days')

F-Value: 124.9318332148811
P-Value: 2.886579864025407e-15


In [78]:
model.predict()

Reject the null hypothesis: At least one group mean is different.


In [79]:
model.duncan_test()

Duncan Critical Range (DCR): 2.0204072675260694

Duncan's Test Significant Results:
Comparing Group 1 vs Group 2: Difference = 4.880521279286297 -> Significant Difference
Comparing Group 1 vs Group 3: Difference = 12.832692816838376 -> Significant Difference
Comparing Group 2 vs Group 3: Difference = 7.952171537552079 -> Significant Difference


In [74]:
model.get_all_duncan_results()

[{'pair': 'Group 1 vs Group 3',
  'difference': 1.7335569079358595,
  'significant': True},
 {'pair': 'Group 2 vs Group 3',
  'difference': 1.6318589470793374,
  'significant': True}]