Group Size are Equal

In [6]:
import numpy as np
import pandas as pd
from scipy.stats import f, studentized_range

In [127]:
class Duncan:
    
    def __init__(self):
        self.f_stats = None
        self.p_value = None
        self.group_means = []
        self.group_sizes = []
        self.msw = None
        self.msb = None
        self.dcr = None
        self.se = None
        self.df_within = None
        self.duncan_results = [] 


    def fit(self, data, value_col, group_col):
        self.data = data
        self.value_col = value_col
        self.group_col = group_col


        groups = data[group_col].unique()
        self.group_means = []
        self.group_sizes = []
        grand_mean = data[value_col].mean()


        ssw = 0
        for g in groups:
            group_data = data[data[group_col] == g][value_col]
            m = group_data.mean()
            n = len(group_data)
            ssw += ((group_data - m) ** 2).sum()

            self.group_means.append(m)
            self.group_sizes.append(n)

  
        group_data = data[value_col]
        sst = ((group_data - grand_mean) ** 2).sum()

  
        ssb = sst - ssw

 
        dfb = len(groups) - 1
        self.df_within = len(data) - len(groups)

     
        self.msb = ssb / dfb
        self.msw = ssw / self.df_within

  
        self.f_stats = self.msb / self.msw
        self.p_value = 1 - f.cdf(self.f_stats, dfb, self.df_within)

        print(f'F-Value: {self.f_stats}')
        print(f'P-Value: {self.p_value}')


    def predict(self):
        if self.p_value < 0.05:
            print("Reject the null hypothesis: At least one group mean is different.")
        else:
            print("Fail to reject the null hypothesis: No significant difference between group means.")


    def get_q_value(self, dfw, num_groups, alpha=0.05):
        """
        This function returns the q-value for Duncan's test based on the given dfw
        (degrees of freedom for error) and number of groups.
        """
  
        q_value = studentized_range.ppf(1 - alpha, num_groups, dfw)
        return q_value

    
    def duncan_test(self, alpha=0.05):
       
        if self.p_value >= alpha:
            print("ANOVA is not significant, skipping Duncan's Test.")
            return
        
       
        n = self.group_sizes[0] 
        self.se = np.sqrt(self.msw / n)

    
        num_groups = len(self.group_means)
        q_value = self.get_q_value(self.df_within, num_groups)

        if q_value:
            self.dcr = q_value * self.se
            print(f"Duncan Critical Range (DCR): {self.dcr}")
        else:
            return

        
        comparisons = []
        for i in range(num_groups):
            for j in range(i + 1, num_groups):
                diff = np.abs(self.group_means[i] - self.group_means[j])
                comparisons.append((f'Group {i+1} vs Group {j+1}', diff))

        
        self.duncan_results = []
        for comparison in comparisons:
            groups_pair, diff = comparison
            result = {
                'pair': groups_pair,
                'difference': diff,
                'significant': diff >= self.dcr
            }
            if result['significant']:
                self.duncan_results.append(result)

       
        if self.duncan_results:
            print("\nDuncan's Test Significant Results:")
            for result in self.duncan_results:
                print(f"Comparing {result['pair']}: Difference = {result['difference']} -> Significant Difference")
        else:
            print("No significant differences found in Duncan's test.")

 
    def get_all_duncan_results(self):
        return self.duncan_results

In [122]:
df = pd.read_excel(r"C:\Users\saqli\Desktop\FULL DataSet.xlsx")
df_new = pd.read_excel(r"C:\Users\saqli\Desktop\TSS_.xlsx")
df_replicate = pd.concat([df_new], ignore_index=True)

In [123]:
model = Duncan()
model.fit(df_replicate, 'TSS 2', 'Treatment_code')

F-Value: 9.71493854725289
P-Value: 5.569393612958606e-09


In [124]:
model.predict()

Reject the null hypothesis: At least one group mean is different.


In [125]:
model.duncan_test()

Duncan Critical Range (DCR): 1.6781901565228823

Duncan's Test Significant Results:
Comparing Group 1 vs Group 4: Difference = 2.500000000000001 -> Significant Difference
Comparing Group 1 vs Group 5: Difference = 2.000000000000001 -> Significant Difference
Comparing Group 1 vs Group 8: Difference = 2.041666666666666 -> Significant Difference
Comparing Group 2 vs Group 4: Difference = 1.8000000000000003 -> Significant Difference
Comparing Group 3 vs Group 4: Difference = 2.2333333333333343 -> Significant Difference
Comparing Group 3 vs Group 5: Difference = 1.7333333333333343 -> Significant Difference
Comparing Group 3 vs Group 8: Difference = 1.7749999999999995 -> Significant Difference
Comparing Group 4 vs Group 6: Difference = 2.166666666666667 -> Significant Difference
Comparing Group 4 vs Group 7: Difference = 2.533333333333334 -> Significant Difference
Comparing Group 5 vs Group 7: Difference = 2.033333333333334 -> Significant Difference
Comparing Group 6 vs Group 8: Difference =

In [117]:
model.get_all_duncan_results()

[]

In [97]:
# Since your data has Weight_loss, Firmness, TSS, pH, L, a, b*, delta_E, TPC, and DPPH columns for each day
def tukey_test_by_day(df):
	# List of parameters to analyze
	parameters = ['Weight_loss', 'Firmness (N)', 'TSS', 'pH', 'L', 'a', 'b*', 'delta_E', 'TPC (mg/100g)', 'DPPH']
	
	# For each day
	for day in [2, 4, 6]:
		print(f'\n=== Tukey HSD Test Results for Day {day} ===')
		
		# Filter data for current day
		day_data = df[df['Days'] == day]
		
		# For each parameter
		for param in parameters:
			print(f'\n{param}:')
			
			# Perform Tukey HSD
			tukey = pairwise_tukeyhsd(day_data[param], 
									day_data['Treatment_code'],
									alpha=0.05)
			
			# Print results
			print(tukey.summary())

# Run the analysis
tukey_test_by_day(df_replicate)



=== Tukey HSD Test Results for Day 2 ===

Weight_loss:
Multiple Comparison of Means - Tukey HSD, FWER=0.05 
 group1 group2 meandiff p-adj   lower  upper  reject
----------------------------------------------------
Control     x1     -2.7 0.2448 -7.2912 1.8912  False
Control  x1*x2     -3.6 0.0695 -7.5761 0.3761  False
Control  x1*x3    -3.44 0.1256 -8.0312 1.1512  False
Control x1x2x3    -3.56 0.0534 -7.1897 0.0697  False
Control     x2    -1.45 0.7342 -6.0412 3.1412  False
Control  x2*x3     -1.1 0.8943 -5.6912 3.4912  False
Control x2*x3     -1.45 0.7342 -6.0412 3.1412  False
Control     x3    -2.37 0.3334 -6.9612 2.2212  False
     x1  x1*x2     -0.9  0.916 -4.8761 3.0761  False
     x1  x1*x3    -0.74 0.9838 -5.3312 3.8512  False
     x1 x1x2x3    -0.86 0.8988 -4.4897 2.7697  False
     x1     x2     1.25 0.8315 -3.3412 5.8412  False
     x1  x2*x3      1.6 0.6578 -2.9912 6.1912  False
     x1 x2*x3      1.25 0.8315 -3.3412 5.8412  False
     x1     x3     0.33 0.9999 -4.2612 4.92