In [11]:
import pandas as pd
import numpy as np
from scipy.stats import studentized_range

In [21]:
from scipy.stats import studentized_range
import numpy as np

class TukeyHSD:
	def __init__(self):
		self.result = []

	def fit(self, data, value_col, group_col, alpha = 0.05):
		self.data = data
		self.value_col = value_col
		self.group_col = group_col
		self.alpha = alpha

		groups = data[group_col].unique()
		group_means = data.groupby(group_col)[value_col].mean()
		group_sizes = data.groupby(group_col)[value_col].count()

		dfw = len(data) - len(groups)

		# Compute MSW (Mean Square Within)
		ssw = 0
		for g in groups:
			group_data = data[data[group_col] == g][value_col]
			m = group_data.mean()
			ssw += ((group_data - m) ** 2).sum()

		msw = ssw / dfw

		# Pairwise comparisons
		for i in range(len(groups)):
			for j in range(i + 1, len(groups)):
				g1 = groups[i]
				g2 = groups[j]

				mean1 = group_means[g1]
				mean2 = group_means[g2]
				n1 = group_sizes[g1]
				n2 = group_sizes[g2]

				mean_diff = abs(mean1 - mean2)
				se = np.sqrt(msw * (1/n1 + 1/n2))  # supports unequal sizes
				q_stat = mean_diff / se

				q_crit = studentized_range.ppf(1 - alpha, len(groups), dfw)
				significant = q_stat > q_crit

				result = {
					'Group 1': g1,
					'Group 2': g2,
					'Mean1': round(mean1, 2),
					'Mean2': round(mean2, 2),
					'Mean Diff': round(mean_diff, 2),
					'q stat': round(q_stat, 3),
					'q crit': round(q_crit, 3),
					'Significant': significant
				}

				self.result.append(result)

	def summary(self):
		for r in self.result:
			print(f"Group 1: {r['Group 1']}, Group 2: {r['Group 2']}, "
				  f"Mean1: {r['Mean1']}, Mean2: {r['Mean2']}, "
				  f"Mean Diff: {r['Mean Diff']}, "
				  f"Q Stat: {r['q stat']}, Q Crit: {r['q crit']}, "
				  f"Significant: {'Yes' if r['Significant'] else 'No'}")


In [40]:
df = pd.read_excel(r"C:\Users\saqli\Desktop\FULL DataSet.xlsx")

In [28]:
df.head()

Unnamed: 0,Alginate,Guargum,Pectin,Days,Weight_loss,Firmness,TSS,pH,L*,a*,b*,delta_E,TPC,DPPH
0,0.0,0.0,0.0,0,0.0,721.0,7.9,4.12,19.62,9.51,21.46,0.0,510.83,67.76
1,0.0,0.0,0.0,2,18.19776,468.69,4.76,4.11,15.48,6.85,18.87,6.508748,716.88,123.2
2,3.0,0.0,0.0,2,18.43008,500.96,4.85,4.07,16.47,7.51,19.52,4.172649,498.13,120.1
3,0.0,3.0,0.0,2,18.33792,513.9,4.8,4.08,15.93,6.98,19.46,5.33911,436.88,161.5
4,0.0,0.0,3.0,2,18.47232,591.24,5.04,4.11,17.41,8.42,19.81,2.965586,413.13,162.3


In [41]:
df_1 = pd.read_excel(r"C:\Users\saqli\Desktop\FULL DATASET_ORIGINAL.xlsx")

In [42]:
df_new = pd.concat([df, df_1], ignore_index=True)


In [43]:
df_new

Unnamed: 0,Alginate,Guargum,Pectin,Days,Weight_loss,Firmness,TSS,pH,L*,a*,b*,delta_E,TPC,DPPH
0,0.0,0.0,0.0,0,0.00000,721.00,7.90,4.12,19.62,9.51,21.46,0.000000,510.83,67.76
1,0.0,0.0,0.0,2,18.19776,468.69,4.76,4.11,15.48,6.85,18.87,6.508748,716.88,123.20
2,3.0,0.0,0.0,2,18.43008,500.96,4.85,4.07,16.47,7.51,19.52,4.172649,498.13,120.10
3,0.0,3.0,0.0,2,18.33792,513.90,4.80,4.08,15.93,6.98,19.46,5.339110,436.88,161.50
4,0.0,0.0,3.0,2,18.47232,591.24,5.04,4.11,17.41,8.42,19.81,2.965586,413.13,162.30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63,0.0,1.5,1.5,6,15.33504,139.00,1.30,4.42,9.40,3.82,8.88,19.008148,734.38,140.71
64,2.0,0.5,0.5,6,17.61216,266.00,4.48,4.38,11.94,6.05,11.71,12.884739,1110.63,160.67
65,0.5,2.0,0.5,6,16.12608,193.00,2.88,4.36,9.97,4.39,9.74,16.768283,852.37,152.46
66,0.5,0.5,2.0,6,16.79808,180.00,3.18,4.42,10.49,4.64,10.11,15.559573,997.00,152.76


In [44]:
tukey.fit(df_new, value_col='TSS', group_col='Days')

In [30]:
tukey.summary()

Group 1: A, Group 2: B, Mean1: 85.0, Mean2: 79.0, Mean Diff: 6.0, Q Stat: 3.795, Q Crit: 4.529, Significant: No
Group 1: A, Group 2: C, Mean1: 85.0, Mean2: 92.0, Mean Diff: 7.0, Q Stat: 4.427, Q Crit: 4.529, Significant: No
Group 1: A, Group 2: D, Mean1: 85.0, Mean2: 85.0, Mean Diff: 0.0, Q Stat: 0.0, Q Crit: 4.529, Significant: No
Group 1: B, Group 2: C, Mean1: 79.0, Mean2: 92.0, Mean Diff: 13.0, Q Stat: 8.222, Q Crit: 4.529, Significant: Yes
Group 1: B, Group 2: D, Mean1: 79.0, Mean2: 85.0, Mean Diff: 6.0, Q Stat: 3.795, Q Crit: 4.529, Significant: No
Group 1: C, Group 2: D, Mean1: 92.0, Mean2: 85.0, Mean Diff: 7.0, Q Stat: 4.427, Q Crit: 4.529, Significant: No
Group 1: 0, Group 2: 2, Mean1: 7.9, Mean2: 4.96, Mean Diff: 2.94, Q Stat: 3.948, Q Crit: 3.845, Significant: Yes
Group 1: 0, Group 2: 4, Mean1: 7.9, Mean2: 4.75, Mean Diff: 3.15, Q Stat: 4.224, Q Crit: 3.845, Significant: Yes
Group 1: 0, Group 2: 6, Mean1: 7.9, Mean2: 2.99, Mean Diff: 4.91, Q Stat: 6.586, Q Crit: 3.845, Signif