In [11]:
import pandas as pd
import numpy as np
from scipy.stats import studentized_range

In [21]:
from scipy.stats import studentized_range
import numpy as np

class TukeyHSD:
	def __init__(self):
		self.result = []

	def fit(self, data, value_col, group_col, alpha = 0.05):
		self.data = data
		self.value_col = value_col
		self.group_col = group_col
		self.alpha = alpha

		groups = data[group_col].unique()
		group_means = data.groupby(group_col)[value_col].mean()
		group_sizes = data.groupby(group_col)[value_col].count()

		dfw = len(data) - len(groups)

		# Compute MSW (Mean Square Within)
		ssw = 0
		for g in groups:
			group_data = data[data[group_col] == g][value_col]
			m = group_data.mean()
			ssw += ((group_data - m) ** 2).sum()

		msw = ssw / dfw

		# Pairwise comparisons
		for i in range(len(groups)):
			for j in range(i + 1, len(groups)):
				g1 = groups[i]
				g2 = groups[j]

				mean1 = group_means[g1]
				mean2 = group_means[g2]
				n1 = group_sizes[g1]
				n2 = group_sizes[g2]

				mean_diff = abs(mean1 - mean2)
				se = np.sqrt(msw * (1/n1 + 1/n2))  # supports unequal sizes
				q_stat = mean_diff / se

				q_crit = studentized_range.ppf(1 - alpha, len(groups), dfw)
				significant = q_stat > q_crit

				result = {
					'Group 1': g1,
					'Group 2': g2,
					'Mean1': round(mean1, 2),
					'Mean2': round(mean2, 2),
					'Mean Diff': round(mean_diff, 2),
					'q stat': round(q_stat, 3),
					'q crit': round(q_crit, 3),
					'Significant': significant
				}

				self.result.append(result)

	def summary(self):
		for r in self.result:
			print(f"Group 1: {r['Group 1']}, Group 2: {r['Group 2']}, "
				  f"Mean1: {r['Mean1']}, Mean2: {r['Mean2']}, "
				  f"Mean Diff: {r['Mean Diff']}, "
				  f"Q Stat: {r['q stat']}, Q Crit: {r['q crit']}, "
				  f"Significant: {'Yes' if r['Significant'] else 'No'}")


In [17]:
data = pd.DataFrame({
    'Method': ['A', 'A', 'A', 'B', 'B', 'B', 'C', 'C', 'C', 'D', 'D', 'D'],
    'Score':  [82, 85, 88, 78, 79, 80, 90, 92, 94, 84, 86, 85]
})

In [20]:
tukey = TukeyHSD()
tukey.fit(data, value_col='Score', group_col='Method')
tukey.summary()

Group 1: A, Group 2: B, Mean1: 85.0, Mean2: 79.0, Mean Diff: 6.0, Q Stat: 3.795, Q Crit: 4.529, Significant: No
Group 1: A, Group 2: C, Mean1: 85.0, Mean2: 92.0, Mean Diff: 7.0, Q Stat: 4.427, Q Crit: 4.529, Significant: No
Group 1: A, Group 2: D, Mean1: 85.0, Mean2: 85.0, Mean Diff: 0.0, Q Stat: 0.0, Q Crit: 4.529, Significant: No
Group 1: B, Group 2: C, Mean1: 79.0, Mean2: 92.0, Mean Diff: 13.0, Q Stat: 8.222, Q Crit: 4.529, Significant: Yes
Group 1: B, Group 2: D, Mean1: 79.0, Mean2: 85.0, Mean Diff: 6.0, Q Stat: 3.795, Q Crit: 4.529, Significant: No
Group 1: C, Group 2: D, Mean1: 92.0, Mean2: 85.0, Mean Diff: 7.0, Q Stat: 4.427, Q Crit: 4.529, Significant: No
