In [None]:
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
import numpy as np

In [3]:
x, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [4]:
models = {
    'Logistic Regression': LogisticRegression(max_iter=1000, random_state=42),
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'SVM': SVC(random_state=42),
    'KNN': KNeighborsClassifier()
}

results = {}
for name, model in models.items():
    scores = cross_val_score(model, x_train, y_train, cv=5, scoring='accuracy', n_jobs=-1)
    results[name] = {
        'mean': scores.mean(),
        'std': scores.std(),
        'scores': scores
    }
    print(f"{name:20s}: {scores.mean():.3f} (+/- {scores.std():.3f})")
    
best_model_name = max(results, key=lambda x: results[x]['mean'])
print(f"\nBest model: {best_model_name}")

Logistic Regression : 0.867 (+/- 0.010)
Decision Tree       : 0.870 (+/- 0.016)
Random Forest       : 0.891 (+/- 0.029)
SVM                 : 0.861 (+/- 0.010)
KNN                 : 0.814 (+/- 0.012)

Best model: Random Forest


**Statistical Significance Testing**

In [7]:
from scipy import stats

model1_scores = results['Random Forest']['scores']
model2_scores = results['KNN']['scores']

t_stat, p_value = stats.ttest_rel(model1_scores, model2_scores)

print(f"T-statistic: {t_stat:.3f}")
print(f"P-value: {p_value:.3f}")

if p_value < 0.05:
    print("Difference is statistically significant")
    if model1_scores.mean() > model2_scores.mean():
        print("Random Forest is significantly better")
    else:
        print("Decision Tree is significantly better")
else:
    print("No significant difference")

T-statistic: 6.666
P-value: 0.003
Difference is statistically significant
Random Forest is significantly better
