# Biostatistics Analysis Notebook

This notebook takes as inputs X and Y, which are the input and output of an instrumental system. Then:

1. A detailed calculations table is generated with all necessary values
2. Basic statistics are calculated: mean, standard deviation, and variance
3. Pearson correlation coefficient is calculated with interpretation
4. Calibration curve is calculated (y = mx + b)

In [None]:
import numpy as np
import pandas as pd
from IPython.display import display

## 1. Input Data

In [None]:
X = np.array([0, 16, 32, 48, 64, 80, 90, 112, 138, 154, 170, 186, 202, 218, 250])
Y = np.array([0.1, 0.14, 0.35, 0.42, 0.5, 0.82, 1.05, 1.12, 1.4, 1.48, 1.72, 1.8, 2.1, 2.22, 2.56])

input_df = pd.DataFrame({'X': X, 'Y': Y}).T
input_df.columns = [f'{i+1}' for i in range(len(X))]
display(input_df)

## 2. Calculations Table

In [None]:
N = len(X)
x_mean = np.sum(X) / N
y_mean = np.sum(Y) / N

Xi_squared = X ** 2
Yi_squared = Y ** 2
Xi_Yi = X * Y
Xi_minus_mean_squared = (X - x_mean) ** 2
Yi_minus_mean_squared = (Y - y_mean) ** 2

# Sums
sum_Xi = np.sum(X)
sum_Yi = np.sum(Y)
sum_Xi_squared = np.sum(Xi_squared)
sum_Yi_squared = np.sum(Yi_squared)
sum_Xi_Yi = np.sum(Xi_Yi)
sum_Xi_minus_mean_squared = np.sum(Xi_minus_mean_squared)
sum_Yi_minus_mean_squared = np.sum(Yi_minus_mean_squared)
sum_Xi_sq = sum_Xi ** 2
sum_Yi_sq = sum_Yi ** 2

# Table
calc_df = pd.DataFrame({
    'Xi': X, 'Yi': Y, 'Xi²': Xi_squared, 'Yi²': Yi_squared, 'Xi·Yi': Xi_Yi,
    '(Xi-X̄)²': Xi_minus_mean_squared, '(Yi-Ȳ)²': Yi_minus_mean_squared
})

sum_row = pd.DataFrame({
    'Xi': [sum_Xi], 'Yi': [sum_Yi], 'Xi²': [sum_Xi_squared], 'Yi²': [sum_Yi_squared],
    'Xi·Yi': [sum_Xi_Yi], '(Xi-X̄)²': [sum_Xi_minus_mean_squared], '(Yi-Ȳ)²': [sum_Yi_minus_mean_squared]
}, index=['Σ'])

squared_sum_row = pd.DataFrame({
    'Xi': [sum_Xi_sq], 'Yi': [sum_Yi_sq], 'Xi²': [''], 'Yi²': [''],
    'Xi·Yi': [''], '(Xi-X̄)²': [''], '(Yi-Ȳ)²': ['']
}, index=['(Σ)²'])

display(pd.concat([calc_df, sum_row, squared_sum_row]))

print(f"\nN={N}  ΣXi={sum_Xi}  ΣYi={sum_Yi}  ΣXi²={sum_Xi_squared}  ΣYi²={sum_Yi_squared:.4f}  ΣXi·Yi={sum_Xi_Yi} Σ(Xi-X̄)²={sum_Xi_minus_mean_squared:.4f}  Σ(Yi-Ȳ)²={sum_Yi_minus_mean_squared:.4f}")
print(f"(ΣXi)²={sum_Xi_sq}  (ΣYi)²={sum_Yi_sq:.4f}")

## 3. Mean, Variance, Standard Deviation

In [None]:
# Mean: X̄ = ΣXi / N
print("MEAN")
print(f"X̄ = {sum_Xi}/{N} = {x_mean:.6f}")
print(f"Ȳ = {sum_Yi}/{N} = {y_mean:.6f}")

# Variance: S² = Σ(Xi - X̄)² / (N-1)
x_var = sum_Xi_minus_mean_squared / (N - 1)
y_var = sum_Yi_minus_mean_squared / (N - 1)
print(f"\nVARIANCE")
print(f"Sx² = {sum_Xi_minus_mean_squared:.4f}/{N-1} = {x_var:.6f}")
print(f"Sy² = {sum_Yi_minus_mean_squared:.4f}/{N-1} = {y_var:.6f}")

# Standard Deviation: S = √S²
x_std = np.sqrt(x_var)
y_std = np.sqrt(y_var)
print(f"\nSTANDARD DEVIATION")
print(f"Sx = √{x_var:.6f} = {x_std:.6f}")
print(f"Sy = √{y_var:.6f} = {y_std:.6f}")

## 4. Pearson Correlation Coefficient

In [None]:
# r = [N·ΣXiYi - (ΣXi)(ΣYi)] / √[N·ΣXi² - (ΣXi)²] · √[N·ΣYi² - (ΣYi)²]

num = N * sum_Xi_Yi - sum_Xi * sum_Yi
den_left = N * sum_Xi_squared - sum_Xi_sq
den_right = N * sum_Yi_squared - sum_Yi_sq

print(f"r = [{N}·{sum_Xi_Yi} - ({sum_Xi})({sum_Yi})] / √[{N}·{sum_Xi_squared} - ({sum_Xi})²] · √[{N}·{sum_Yi_squared:.4f} - ({sum_Yi})²]")
print(f"r = [{N*sum_Xi_Yi} - {sum_Xi*sum_Yi}] / √[{N*sum_Xi_squared} - {sum_Xi_sq}] · √[{N*sum_Yi_squared:.4f} - {sum_Yi_sq:.4f}]")
print(f"r = {num:.4f} / √{den_left} · √{den_right:.4f}")
print(f"r = {num:.4f} / {np.sqrt(den_left):.4f} · {np.sqrt(den_right):.4f}")
print(f"r = {num:.4f} / {np.sqrt(den_left) * np.sqrt(den_right):.4f}")

pearson_r = num / (np.sqrt(den_left) * np.sqrt(den_right))
print(f"\nr = {pearson_r:.6f}")

if abs(pearson_r) >= 0.9: strength = "very strong"
elif abs(pearson_r) >= 0.7: strength = "strong"
elif abs(pearson_r) >= 0.5: strength = "moderate"
elif abs(pearson_r) >= 0.3: strength = "weak"
else: strength = "very weak"
print(f"Interpretation: {strength} {'positive' if pearson_r > 0 else 'negative'} correlation")

## 5. Calibration Curve (y = mx + b)

In [None]:
# m = [N·ΣXi·Yi - (ΣXi)(ΣYi)] / [N·ΣXi² - (ΣXi)²]
# b = Ȳ - m·X̄

m_num = N * sum_Xi_Yi - sum_Xi * sum_Yi
m_den = N * sum_Xi_squared - sum_Xi_sq

print("SLOPE (m)")
print(f"m = [{N}·{sum_Xi_Yi} - ({sum_Xi})({sum_Yi})] / [{N}·{sum_Xi_squared} - ({sum_Xi})²]")
print(f"m = [{N*sum_Xi_Yi} - {sum_Xi*sum_Yi}] / [{N*sum_Xi_squared} - {sum_Xi_sq}]")
print(f"m = {m_num:.4f} / {m_den}")

m = m_num / m_den
print(f"m = {m:.6f}")

print(f"\nINTERCEPT (b)")
print(f"b = Ȳ - m·X̄")
print(f"b = {y_mean:.6f} - {m:.6f}·{x_mean:.6f}")
print(f"b = {y_mean:.6f} - {m*x_mean:.6f}")

b = y_mean - m * x_mean
print(f"b = {b:.6f}")

r_squared = pearson_r ** 2
print(f"\n" + "="*40)
print(f"CALIBRATION CURVE: y = {m:.6f}x + {b:.6f}")
print(f"R² = {r_squared:.6f} ({r_squared*100:.2f}% variance explained)")