# Biostatistics Analysis Notebook

In [1]:
import numpy as np
import pandas as pd
from IPython.display import display

## 1. Input Data Table (X and Y values)

In [None]:
X = np.array([0, 16, 32, 48, 64, 80, 90, 112, 138, 154, 170, 186, 202, 218, 250]) # blood pressure in mmHg
Y = np.array([0.1, 0.14, 0.35, 0.42, 0.5, 0.82, 1.05, 1.12, 1.4, 1.48, 1.72, 1.8, 2.1, 2.22, 2.56]) # output in volts

input_df = pd.DataFrame({'X': X, 'Y': Y}).T
input_df.columns = [f'Value {i+1}' for i in range(len(X))]
display(input_df)

Unnamed: 0,Value 1,Value 2,Value 3,Value 4,Value 5,Value 6,Value 7,Value 8,Value 9,Value 10,Value 11,Value 12,Value 13,Value 14,Value 15
X,0.0,16.0,32.0,48.0,64.0,80.0,90.0,112.0,138.0,154.0,170.0,186.0,202.0,218.0,250.0
Y,0.1,0.14,0.35,0.42,0.5,0.82,1.05,1.12,1.4,1.48,1.72,1.8,2.1,2.22,2.56


## 2. Basic Statistics: Mean, Standard Deviation, and Variance

In [None]:
x_mean = np.mean(X)
x_std = np.std(X, ddof=1)
x_var = np.var(X, ddof=1) 

y_mean = np.mean(Y)
y_std = np.std(Y, ddof=1) 
y_var = np.var(Y, ddof=1)

stats_df = pd.DataFrame({
    'Statistic': ['Mean (Media)', 'Standard Deviation', 'Variance'],
    'X': [x_mean, x_std, x_var],
    'Y': [y_mean, y_std, y_var]
})
display(stats_df)

Unnamed: 0,Statistic,X,Y
0,Mean (Media),117.333333,1.185333
1,Standard Deviation,78.057366,0.791625
2,Variance,6092.952381,0.62667


## 3. Detailed Calculations Table

In [None]:
Xi_squared = X ** 2
Yi_squared = Y ** 2
Xi_Yi = X * Y
Xi_minus_mean_squared = (X - x_mean) ** 2
Yi_minus_mean_squared = (Y - y_mean) ** 2

calc_df = pd.DataFrame({
    'Xi': X,
    'Yi': Y,
    'Xi²': Xi_squared,
    'Yi²': Yi_squared,
    'Xi·Yi': Xi_Yi,
    '(Xi - X̄)²': Xi_minus_mean_squared,
    '(Yi - Ȳ)²': Yi_minus_mean_squared
})

N = len(X)
sum_Xi = np.sum(X)
sum_Yi = np.sum(Y)
sum_Xi_squared = np.sum(Xi_squared)
sum_Yi_squared = np.sum(Yi_squared)
sum_Xi_Yi = np.sum(Xi_Yi)
sum_Xi_sq = sum_Xi ** 2  # (ΣXi)²
sum_Yi_sq = sum_Yi ** 2  # (ΣYi)²

sum_row = pd.DataFrame({
    'Xi': [sum_Xi],
    'Yi': [sum_Yi],
    'Xi²': [sum_Xi_squared],
    'Yi²': [sum_Yi_squared],
    'Xi·Yi': [sum_Xi_Yi],
    '(Xi - X̄)²': [np.sum(Xi_minus_mean_squared)],
    '(Yi - Ȳ)²': [np.sum(Yi_minus_mean_squared)]
}, index=['Σ (Sum)'])

squared_sum_row = pd.DataFrame({
    'Xi': [sum_Xi_sq],
    'Yi': [sum_Yi_sq],
    'Xi²': [''],
    'Yi²': [''],
    'Xi·Yi': [''],
    '(Xi - X̄)²': [''],
    '(Yi - Ȳ)²': ['']
}, index=['(Σ)²'])

final_table = pd.concat([calc_df, sum_row, squared_sum_row])
display(final_table)

print("\n" + "="*60)
print("EXTRACTED VARIABLES FOR REUSE IN FORMULAS")
print("="*60)
print(f"N (number of values)    = {N}")
print(f"ΣXi (sum of X)          = {sum_Xi}")
print(f"ΣYi (sum of Y)          = {sum_Yi}")
print(f"ΣXi² (sum of X squared) = {sum_Xi_squared}")
print(f"ΣYi² (sum of Y squared) = {sum_Yi_squared}")
print(f"ΣXi·Yi (sum of X*Y)     = {sum_Xi_Yi}")
print(f"(ΣXi)² (sum of X)²      = {sum_Xi_sq}")
print(f"(ΣYi)² (sum of Y)²      = {sum_Yi_sq:.4f}")

Unnamed: 0,Xi,Yi,Xi²,Yi²,Xi·Yi,(Xi - X̄)²,(Yi - Ȳ)²
0,0,0.1,0.0,0.01,0.0,13767.111111,1.177948
1,16,0.14,256.0,0.0196,2.24,10268.444444,1.092722
2,32,0.35,1024.0,0.1225,11.2,7281.777778,0.697782
3,48,0.42,2304.0,0.1764,20.16,4807.111111,0.585735
4,64,0.5,4096.0,0.25,32.0,2844.444444,0.469682
5,80,0.82,6400.0,0.6724,65.6,1393.777778,0.133468
6,90,1.05,8100.0,1.1025,94.5,747.111111,0.018315
7,112,1.12,12544.0,1.2544,125.44,28.444444,0.004268
8,138,1.4,19044.0,1.96,193.2,427.111111,0.046082
9,154,1.48,23716.0,2.1904,227.92,1344.444444,0.086828



EXTRACTED VARIABLES FOR REUSE IN FORMULAS
N (number of values)    = 15
ΣXi (sum of X)          = 1760
ΣYi (sum of Y)          = 17.78
ΣXi² (sum of X squared) = 291808
ΣYi² (sum of Y squared) = 29.848599999999998
ΣXi·Yi (sum of X*Y)     = 2947.62
(ΣXi)² (sum of X)²      = 3097600
(ΣYi)² (sum of Y)²      = 316.1284


## 4. Pearson Correlation Coefficient (Step by Step)

In [None]:
# ============================================================
# PEARSON CORRELATION COEFFICIENT - STEP BY STEP
# ============================================================
# Formula (Francisco Parra professor formula from class):
# r = [N·ΣXiYi - (ΣXi)(ΣYi)] / √[N·ΣXi² - (ΣXi)²] · √[N·ΣYi² - (ΣYi)²]

print("="*70)
print("PEARSON CORRELATION COEFFICIENT")
print("="*70)

# STEP 1: Show the formula with values substituted
print("\n" + "-"*70)
print("STEP 1: Substitute values from the table into the formula")
print("-"*70)
print(f"""
r = [N·ΣXi·Yi - (ΣXi)(ΣYi)] / √[N·ΣXi² - (ΣXi)²] · √[N·ΣYi² - (ΣYi)²]

r = [{N}·{sum_Xi_Yi} - ({sum_Xi})({sum_Yi})] / √[{N}·{sum_Xi_squared} - ({sum_Xi})²] · √[{N}·{sum_Yi_squared} - ({sum_Yi})²]
""")

# STEP 2: Resolve intermediate calculations
print("-"*70)
print("STEP 2: Resolve multiplications and squares")
print("-"*70)

# Numerator components
N_times_sum_XiYi = N * sum_Xi_Yi
sum_Xi_times_sum_Yi = sum_Xi * sum_Yi

# Denominator components
N_times_sum_Xi_squared = N * sum_Xi_squared
N_times_sum_Yi_squared = N * sum_Yi_squared

print(f"""
Numerator:
  N·ΣXi·Yi      = {N} × {sum_Xi_Yi} = {N_times_sum_XiYi}
  (ΣXi)(ΣYi)    = {sum_Xi} × {sum_Yi} = {sum_Xi_times_sum_Yi}

Denominator (left part):
  N·ΣXi²        = {N} × {sum_Xi_squared} = {N_times_sum_Xi_squared}
  (ΣXi)²        = {sum_Xi}² = {sum_Xi_sq}

Denominator (right part):
  N·ΣYi²        = {N} × {sum_Yi_squared} = {N_times_sum_Yi_squared}
  (ΣYi)²        = {sum_Yi}² = {sum_Yi_sq:.4f}
""")

# STEP 3: Simplify
print("-"*70)
print("STEP 3: Simplify (perform subtractions)")
print("-"*70)

numerator = N_times_sum_XiYi - sum_Xi_times_sum_Yi
denom_left = N_times_sum_Xi_squared - sum_Xi_sq
denom_right = N_times_sum_Yi_squared - sum_Yi_sq

print(f"""
r = [{N_times_sum_XiYi} - {sum_Xi_times_sum_Yi}] / √[{N_times_sum_Xi_squared} - {sum_Xi_sq}] · √[{N_times_sum_Yi_squared} - {sum_Yi_sq:.4f}]

r = [{numerator}] / √[{denom_left}] · √[{denom_right:.4f}]
""")

# STEP 4: Calculate square roots and final result
print("-"*70)
print("STEP 4: Calculate square roots and final result")
print("-"*70)

sqrt_left = np.sqrt(denom_left)
sqrt_right = np.sqrt(denom_right)
denominator = sqrt_left * sqrt_right
pearson_r = numerator / denominator

print(f"""
r = {numerator} / √{denom_left} · √{denom_right:.4f}

r = {numerator} / {sqrt_left:.4f} · {sqrt_right:.4f}

r = {numerator} / {denominator:.4f}

r = {pearson_r:.6f}
""")

# Final result with interpretation
print("="*70)
print(f"FINAL RESULT: r = {pearson_r:.6f}")
print("="*70)

if abs(pearson_r) >= 0.9:
    strength = "very strong"
elif abs(pearson_r) >= 0.7:
    strength = "strong"
elif abs(pearson_r) >= 0.5:
    strength = "moderate"
elif abs(pearson_r) >= 0.3:
    strength = "weak"
else:
    strength = "very weak"
direction = "positive" if pearson_r > 0 else "negative"
print(f"Interpretation: There is a {strength} {direction} correlation between X and Y.")

PEARSON CORRELATION COEFFICIENT

----------------------------------------------------------------------
STEP 1: Substitute values from the table into the formula
----------------------------------------------------------------------

r = [N·ΣXi·Yi - (ΣXi)(ΣYi)] / √[N·ΣXi² - (ΣXi)²] · √[N·ΣYi² - (ΣYi)²]

r = [15·2947.62 - (1760)(17.78)] / √[15·291808 - (1760)²] · √[15·29.848599999999998 - (17.78)²]

----------------------------------------------------------------------
STEP 2: Resolve multiplications and squares
----------------------------------------------------------------------

Numerator:
  N·ΣXi·Yi      = 15 × 2947.62 = 44214.299999999996
  (ΣXi)(ΣYi)    = 1760 × 17.78 = 31292.800000000003

Denominator (left part):
  N·ΣXi²        = 15 × 291808 = 4377120
  (ΣXi)²        = 1760² = 3097600

Denominator (right part):
  N·ΣYi²        = 15 × 29.848599999999998 = 447.729
  (ΣYi)²        = 17.78² = 316.1284

----------------------------------------------------------------------
STEP 3: S

## 5. Calibration Curve y = mx + b (Step by Step)

In [6]:
# ============================================================
# CALIBRATION CURVE y = mx + b - STEP BY STEP
# ============================================================
# Formulas (using computational form):
# m = [N·ΣXi·Yi - (ΣXi)(ΣYi)] / [N·ΣXi² - (ΣXi)²]
# b = Ȳ - m·X̄

print("="*70)
print("CALIBRATION CURVE: y = mx + b")
print("="*70)

# ============================================================
# CALCULATING SLOPE (m)
# ============================================================
print("\n" + "="*70)
print("CALCULATING SLOPE (m)")
print("="*70)

# STEP 1: Show the formula with values substituted
print("\n" + "-"*70)
print("STEP 1: Substitute values from the table into the formula")
print("-"*70)
print(f"""
m = [N·ΣXi·Yi - (ΣXi)(ΣYi)] / [N·ΣXi² - (ΣXi)²]

m = [{N}·{sum_Xi_Yi} - ({sum_Xi})({sum_Yi})] / [{N}·{sum_Xi_squared} - ({sum_Xi})²]
""")

# STEP 2: Resolve intermediate calculations
print("-"*70)
print("STEP 2: Resolve multiplications and squares")
print("-"*70)
print(f"""
Numerator:
  N·ΣXi·Yi      = {N} × {sum_Xi_Yi} = {N_times_sum_XiYi}
  (ΣXi)(ΣYi)    = {sum_Xi} × {sum_Yi} = {sum_Xi_times_sum_Yi}

Denominator:
  N·ΣXi²        = {N} × {sum_Xi_squared} = {N_times_sum_Xi_squared}
  (ΣXi)²        = {sum_Xi}² = {sum_Xi_sq}
""")

# STEP 3: Simplify
print("-"*70)
print("STEP 3: Simplify (perform subtractions)")
print("-"*70)

m_numerator = N_times_sum_XiYi - sum_Xi_times_sum_Yi
m_denominator = N_times_sum_Xi_squared - sum_Xi_sq

print(f"""
m = [{N_times_sum_XiYi} - {sum_Xi_times_sum_Yi}] / [{N_times_sum_Xi_squared} - {sum_Xi_sq}]

m = {m_numerator} / {m_denominator}
""")

# STEP 4: Calculate final result for m
print("-"*70)
print("STEP 4: Calculate final result")
print("-"*70)

m = m_numerator / m_denominator

print(f"""
m = {m_numerator} / {m_denominator}

m = {m:.6f}
""")

# ============================================================
# CALCULATING INTERCEPT (b)
# ============================================================
print("\n" + "="*70)
print("CALCULATING INTERCEPT (b)")
print("="*70)

# STEP 1: Show the formula with values substituted
print("\n" + "-"*70)
print("STEP 1: Substitute values into the formula")
print("-"*70)
print(f"""
b = Ȳ - m·X̄

b = {y_mean:.6f} - {m:.6f}·{x_mean:.6f}
""")

# STEP 2: Resolve multiplication
print("-"*70)
print("STEP 2: Resolve multiplication")
print("-"*70)

m_times_x_mean = m * x_mean

print(f"""
m·X̄ = {m:.6f} × {x_mean:.6f} = {m_times_x_mean:.6f}
""")

# STEP 3: Calculate final result for b
print("-"*70)
print("STEP 3: Calculate final result")
print("-"*70)

b = y_mean - m_times_x_mean

print(f"""
b = {y_mean:.6f} - {m_times_x_mean:.6f}

b = {b:.6f}
""")

# ============================================================
# FINAL CALIBRATION CURVE EQUATION
# ============================================================
print("="*70)
print("FINAL RESULTS")
print("="*70)
print(f"""
Slope (m)     = {m:.6f}
Intercept (b) = {b:.6f}

CALIBRATION CURVE EQUATION:

    y = {m:.6f}x + {b:.6f}
""")

# R² coefficient
r_squared = pearson_r ** 2
print(f"R² (Coefficient of Determination) = {r_squared:.6f}")
print(f"\nThis means {r_squared*100:.2f}% of the variance in Y is explained by X.")

CALIBRATION CURVE: y = mx + b

CALCULATING SLOPE (m)

----------------------------------------------------------------------
STEP 1: Substitute values from the table into the formula
----------------------------------------------------------------------

m = [N·ΣXi·Yi - (ΣXi)(ΣYi)] / [N·ΣXi² - (ΣXi)²]

m = [15·2947.62 - (1760)(17.78)] / [15·291808 - (1760)²]

----------------------------------------------------------------------
STEP 2: Resolve multiplications and squares
----------------------------------------------------------------------

Numerator:
  N·ΣXi·Yi      = 15 × 2947.62 = 44214.299999999996
  (ΣXi)(ΣYi)    = 1760 × 17.78 = 31292.800000000003

Denominator:
  N·ΣXi²        = 15 × 291808 = 4377120
  (ΣXi)²        = 1760² = 3097600

----------------------------------------------------------------------
STEP 3: Simplify (perform subtractions)
----------------------------------------------------------------------

m = [44214.299999999996 - 31292.800000000003] / [4377120 - 30976