# Significant difference analysis between two datasets

Compendium page 83: two samples, unknown true variance, two tail 
Test statistic: t = (X1-X2)/sqrt(S^2,1 /n1 + sqrt(S^2,2 /n2))
H0 rejected if: |t| > (W1*t1)+(W2*t2)/(W1+W2)
W1 = S^2,1 / n1
W2 = S^2,2 / n2
t1 = t(1-alpha/2, n1-1)
t2 = t(1-alpha/2, n2-1)

H0 = There is no significance difference between 2 areas
Ha = H0 is rejected

alpha = 5%

In [None]:
import pylab as pl
from scipy import stats
import pandas as pd
import matplotlib.pyplot as plt
import math
import numpy as np
import seaborn as sns



In [None]:
from scipy.stats import t


n_west= len()
n_east= len(])

std_west = .std()
x_west = .mean()
st_error_west = std_west**2 / (n_west) # this calculates how much the sample mean can differ from the population mean
df_west = n_west-1


std_east = .std()
x_east = .mean()
st_error_east = std_east**2 / (n_east) # this calculates how much the sample mean can differ from the population mean
df_east = n_east-1

alpha = 1 - (0.05/2) # here we estimate the confidence level

t_west = t.ppf(alpha, df_west) # critical value dependent on confidence level and degrees of freedom
t_east = t.ppf(alpha, df_east)

t_statistic = (x_west - x_east)/math.sqrt(st_error_west + st_error_east)

print(f"Standard Error: {st_error_west:.2f}")
print(f"Standard Error: {st_error_east:.2f}")
print(f"Degrees of Freedom: {df_west}")
print(f"Degrees of Freedom: {df_east}")
print(f"t-critical value west area: {t_west:.2f}")
print(f"t-critical value east area: {t_east:.2f}")
print('The statistic test is', round(t_statistic, 2))

t_final = ((st_error_west*t_west)+(st_error_east*t_east))/(st_error_west+st_error_east)
print('rejection criterion', round(t_final,2))

In [None]:
#boxplot

sns.boxplot(x='group', y='value', data=)
sns.swarmplot(x='group', y='value', data=, color=".25")  # optional for individual points
plt.show()


In [None]:
#Overlapping graphs

from scipy.stats import t

margin_error_west = t_west * st_error_west #the range of uncertainty around a measurement
margin_error_east = t_east * st_error_east

#confidence intervals
lower_west = x_west - margin_error_west
upper_west = x_west + margin_error_west
lower_east = x_east - margin_error_east
upper_east = x_east + margin_error_east

line_west = np.linspace(x_west - 4 * st_error_west, x_west + 4 * st_error_west, 500) #500 values spaced out around the mean using the t distribution with spacing of -+4 standard error
pdf_west = t.pdf(line_west, df_west, loc=x_west, scale=st_error_west) #probability density of the t distribution with the mean as the middle point and the error for the spacing

line_east = np.linspace(x_east - 4 * st_error_east, x_east + 4 * st_error_east, 500) #500 values spaced out around the mean using the t distribution with spacing of -+4 standard error
pdf_east = t.pdf(line_east, df_east, loc=x_east, scale=st_error_east)


plt.figure(figsize=(10, 6))
plt.plot(line_west, pdf_west, label='Estimated t-distribution of the sample mean west area')
plt.plot(line_east, pdf_east, label='Estimated t-distribution of the sample mean east area')

plt.axvline(x_west, color='red', linestyle='--', label=f'Sample Mean: {x_west}')
plt.axvline(lower_west, color='green', linestyle=':', label=f'Lower Bound (95% CI): {lower_west:.2f}')
plt.axvline(upper_west, color='green', linestyle=':', label=f'Upper Bound (95% CI): {upper_west:.2f}')

plt.axvline(x_east, color='red', linestyle='--', label=f'Sample Mean: {x_east}')
plt.axvline(lower_east, color='green', linestyle=':', label=f'Lower Bound (95% CI): {lower_east:.2f}')
plt.axvline(upper_east, color='green', linestyle=':', label=f'Upper Bound (95% CI): {upper_east:.2f}') 

plt.fill_between(line_west, 0, pdf_west, where=(line_west >= lower_west) & (line_west <= upper_west), color='lightgreen', alpha=0.5, label='95% Confidence Interval')
plt.fill_between(line_east, 0, pdf_east, where=(line_east >= lower_east) & (line_east <= upper_east), color='skyblue', alpha=0.5, label='95% Confidence Interval')


plt.title('Estimated t-Distribution of Sample Mean and 95% Confidence Interval')
plt.xlabel('Mean Value')
plt.ylabel('Probability Density')
plt.legend()
plt.grid(True)
plt.show()