In [5]:
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from scipy.stats import shapiro, t
import numpy as np

**Model Evaluation**

In [6]:
X, y = make_classification(n_samples=1000, n_features=20, random_state=42)
model = RandomForestClassifier()

# Cross-validation scores
scores = cross_val_score(model, X, y, cv=10)

# 95% confidence interval
mean_score = np.mean(scores)
std_score = np.std(scores)
n = len(scores)

t_critical = t.ppf(0.975, df=n-1)
margin = t_critical * (std_score / np.sqrt(n))

print(f"Mean accuracy: {mean_score:.4f}")
print(f"95% CI: [{mean_score - margin:.4f}, {mean_score + margin:.4f}]")

Mean accuracy: 0.8920
95% CI: [0.8761, 0.9079]


**Feature Analysis**

In [7]:
# Check if feature is normally distributed
feature = np.random.normal(0, 1, 100)

stat, p_value = shapiro(feature)
if p_value > 0.05:
    print("Feature appears normally distributed")
else:
    print("Feature does NOT appear normally distributed")

Feature appears normally distributed
