<a href="https://colab.research.google.com/github/taylor33189-beep/Taylor_Hoskins_Repository/blob/main/Untitled12.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
from scipy.stats import pearsonr, shapiro, spearmanr
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import numpy as np

# Load the data
df = pd.read_csv('Monica3.csv')
hdl = df['hdl']
alcohol = df['alcohol']
n = len(df)
alpha = 0.05

# 1. Pearson Correlation Test (Assumes normality)
r_pearson, p_pearson = pearsonr(hdl, alcohol)
t_statistic = r_pearson * np.sqrt((n - 2) / (1 - r_pearson**2)) # Calculated for the test
print(f"Pearson r (Linear Correlation): {r_pearson:.4f}")
print(f"Pearson P-value: {p_pearson:.4g} (Test Statistic: t = {t_statistic:.4f})")
print("-" * 30)

# 2. Normality Check for HDL Cholesterol (The 'hdl_normality_plots.png' image)
shapiro_w, shapiro_p = shapiro(hdl)
print(f"Shapiro-Wilk Normality Test P-value for HDL: {shapiro_p:.4g}")

# Visual Normality Check
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
sns.histplot(hdl, kde=True)
plt.title('Histogram of HDL Cholesterol')
plt.subplot(1, 2, 2)
sm.qqplot(hdl, line='s', ax=plt.gca())
plt.title('Q-Q Plot of HDL Cholesterol')
plt.tight_layout()
plt.savefig('hdl_normality_plots.png')
plt.close() # Close plot for non-interactive environment
print("Normality plots saved to 'hdl_normality_plots.png'")
print("-" * 30)

# 3. Spearman Rank Correlation Test (Non-parametric alternative)
r_spearman, p_spearman = spearmanr(hdl, alcohol)
z_statistic = r_spearman * np.sqrt(n - 1) # Approximate test statistic for large samples
print(f"Spearman rho (Rank Correlation): {r_spearman:.4f}")
print(f"Spearman P-value: {p_spearman:.4g} (Approximate Test Statistic: Z = {z_statistic:.4f})")

Pearson r (Linear Correlation): 0.3281
Pearson P-value: 3.135e-123 (Test Statistic: t = 24.2983)
------------------------------
Shapiro-Wilk Normality Test P-value for HDL: 1.541e-32
Normality plots saved to 'hdl_normality_plots.png'
------------------------------
Spearman rho (Rank Correlation): 0.3366
Spearman P-value: 4.806e-130 (Approximate Test Statistic: Z = 23.5551)
