In [3]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.datasets import make_classification


# Generate a random dataset
X, y = make_classification(n_samples=100, n_features=20, n_classes=2, random_state=42)

# Initialize the classifier
clf = RandomForestClassifier(random_state=42)

# Function to perform manual cross-validation and print results
def manual_cross_validation(X, y, test_size, n_splits=5):
    scores = []
    for i in range(n_splits):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42 + i)
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        scores.append(accuracy_score(y_test, y_pred))
    mean_score = np.mean(scores)
    return scores, mean_score

# Perform manual cross-validation for different ratios
ratios = [0.2, 0.3, 0.4]  # Corresponding to 80-20, 70-30, and 60-40 splits
for ratio in ratios:
    scores, mean_score = manual_cross_validation(X, y, test_size=ratio)
    print(f"Cross-Validation Scores for {int((1-ratio)*100)}-{int(ratio*100)} Split: {scores}")
    print(f"Mean Score: {mean_score}\n")


Cross-Validation Scores for 80-20 Split: [0.9, 0.9, 0.95, 0.9, 1.0]
Mean Score: 0.93

Cross-Validation Scores for 70-30 Split: [0.9, 0.9666666666666667, 0.9333333333333333, 0.9333333333333333, 1.0]
Mean Score: 0.9466666666666667

Cross-Validation Scores for 60-40 Split: [0.95, 0.975, 0.975, 0.925, 1.0]
Mean Score: 0.9650000000000001

