In [1]:
import pandas as pd
import numpy as np
from scipy.stats import studentized_range

In [None]:
from scipy.stats import studentized_range
import numpy as np

class TukeyHSD:
	def __init__(self):
		self.result = []

	def fit(self, data, value_col, group_col, alpha = 0.05):
		self.data = data
		self.value_col = value_col
		self.group_col = group_col
		self.alpha = alpha

		groups = data[group_col].unique()
		group_means = data.groupby(group_col)[value_col].mean()
		group_sizes = data.groupby(group_col)[value_col].count()

		dfw = len(data) - len(groups)

		
		ssw = 0
		for g in groups:
			group_data = data[data[group_col] == g][value_col]
			m = group_data.mean()
			ssw += ((group_data - m) ** 2).sum()

		msw = ssw / dfw

		
		for i in range(len(groups)):
			for j in range(i + 1, len(groups)):
				g1 = groups[i]
				g2 = groups[j]

				mean1 = group_means[g1]
				mean2 = group_means[g2]
				n1 = group_sizes[g1]
				n2 = group_sizes[g2]

				mean_diff = abs(mean1 - mean2)
				se = np.sqrt(msw * (1/n1 + 1/n2))  
				q_stat = mean_diff / se

				q_crit = studentized_range.ppf(1 - alpha, len(groups), dfw)
				significant = q_stat > q_crit

				result = {
					'Group 1': g1,
					'Group 2': g2,
					'Mean1': round(mean1, 2),
					'Mean2': round(mean2, 2),
					'Mean Diff': round(mean_diff, 2),
					'q stat': round(q_stat, 3),
					'q crit': round(q_crit, 3),
					'Significant': significant
				}

				self.result.append(result)

	def summary(self):
		for r in self.result:
			print(f"Group 1: {r['Group 1']}, Group 2: {r['Group 2']}, "
				  f"Mean1: {r['Mean1']}, Mean2: {r['Mean2']}, "
				  f"Mean Diff: {r['Mean Diff']}, "
				  f"Q Stat: {r['q stat']}, Q Crit: {r['q crit']}, "
				  f"Significant: {'Yes' if r['Significant'] else 'No'}")


In [11]:
df = pd.read_excel(r"C:\Users\saqli\Desktop\FULL DataSet.xlsx")

In [4]:
df.head()

Unnamed: 0,Treatment,Alginate,Guargum,Pectin,Days,Weight_loss,Firmness,TSS,pH,L*,a*,b*,delta_E,TPC,DPPH
0,0,0.0,0.0,0.0,0,0.0,721.0,7.9,4.12,19.62,9.51,21.46,0.0,510.83,67.76
1,0,0.0,0.0,0.0,2,18.19776,468.69,4.76,4.11,15.48,6.85,18.87,6.508748,716.88,123.2
2,1,3.0,0.0,0.0,2,18.43008,500.96,4.85,4.07,16.47,7.51,19.52,4.172649,498.13,120.1
3,2,0.0,3.0,0.0,2,18.33792,513.9,4.8,4.08,15.93,6.98,19.46,5.33911,436.88,161.5
4,3,0.0,0.0,3.0,2,18.47232,591.24,5.04,4.11,17.41,8.42,19.81,2.965586,413.13,162.3


In [15]:
df_1 = pd.read_excel(r"C:\Users\saqli\Desktop\FULL DATASET_ORIGINAL.xlsx")

In [34]:
df = pd.read_excel(r"C:\Users\saqli\Desktop\FULL DataSet.xlsx")
df_replicate_2 = pd.read_excel(r"C:\Users\saqli\Downloads\Replicate_1_Data.xlsx")
df_replicate_3 = pd.read_excel(r"C:\Users\saqli\Desktop\df_replicate_3.xlsx")
df_replicate = pd.concat([df, df_replicate_3], ignore_index=True)

In [39]:
df_replicate = pd.read_excel(r"C:\Users\saqli\Downloads\combined_data_with_replicates.xlsx")

In [42]:
df_new = pd.concat([df, df_1], ignore_index=True)


In [42]:
tukey = TukeyHSD()

In [44]:
tukey.fit(df_replicate, value_col='L', group_col='Treatment_code')

In [45]:
tukey.summary()

Group 1: Control, Group 2: x1, Mean1: 12.25, Mean2: 12.75, Mean Diff: 0.5, Q Stat: 0.187, Q Crit: 5.34, Significant: No
Group 1: Control, Group 2: x2, Mean1: 12.25, Mean2: 12.43, Mean Diff: 0.18, Q Stat: 0.067, Q Crit: 5.34, Significant: No
Group 1: Control, Group 2: x3, Mean1: 12.25, Mean2: 13.4, Mean Diff: 1.15, Q Stat: 0.434, Q Crit: 5.34, Significant: No
Group 1: Control, Group 2: x1*x2, Mean1: 12.25, Mean2: 14.6, Mean Diff: 2.35, Q Stat: 0.886, Q Crit: 5.34, Significant: No
Group 1: Control, Group 2: x1*x3, Mean1: 12.25, Mean2: 13.79, Mean Diff: 1.54, Q Stat: 0.582, Q Crit: 5.34, Significant: No
Group 1: Control, Group 2: x2*x3, Mean1: 12.25, Mean2: 13.13, Mean Diff: 0.88, Q Stat: 0.331, Q Crit: 5.34, Significant: No
Group 1: Control, Group 2: x1*x2*x3, Mean1: 12.25, Mean2: 14.8, Mean Diff: 2.55, Q Stat: 1.215, Q Crit: 5.34, Significant: No
Group 1: Control, Group 2: Control_Rep1, Mean1: 12.25, Mean2: 7.0, Mean Diff: 5.25, Q Stat: 1.98, Q Crit: 5.34, Significant: No
Group 1: Contr

In [41]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import matplotlib.pyplot as plt


# Step 1: Perform ANOVA to check if there's a significant difference
model = ols('L ~ C(Treatment_code)', data=df_replicate).fit()
anova_table = sm.stats.anova_lm(model, typ=2)

# Step 2: If ANOVA is significant, perform Tukey's HSD
if anova_table['PR(>F)'][0] < 0.05:
    print("ANOVA is significant, performing Tukey's HSD test...")
    
    tukey = pairwise_tukeyhsd(endog=df_replicate['L'], groups=df_replicate['Treatment_code'], alpha=0.05)
    print(tukey.summary())
    
    # Optional: Plot the results
    tukey.plot_simultaneous()
    plt.show()
else:
    print("ANOVA is not significant. No need for Tukey's test.")


ANOVA is not significant. No need for Tukey's test.


  if anova_table['PR(>F)'][0] < 0.05:
