In [1]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.feature_selection import RFE

# Generate a random dataset
X, y = make_classification(n_samples=100, n_features=20, n_classes=2, random_state=42)

# Initialize the classifier
clf = RandomForestClassifier(random_state=42)

# Feature selection using RFE
def feature_selection_rfe(X, y, n_features_to_select=10):
    selector = RFE(clf, n_features_to_select=n_features_to_select, step=1)
    selector = selector.fit(X, y)
    return selector.support_

# Function to perform manual cross-validation with feature selection and print results
def manual_cross_validation_with_fs(X, y, test_size, n_splits=5, n_features_to_select=10):
    scores = []
    for i in range(n_splits):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42 + i)
        
        # Feature selection on training data
        selected_features = feature_selection_rfe(X_train, y_train, n_features_to_select)
        X_train_selected = X_train[:, selected_features]
        X_test_selected = X_test[:, selected_features]
        
        clf.fit(X_train_selected, y_train)
        y_pred = clf.predict(X_test_selected)
        scores.append(accuracy_score(y_test, y_pred))
    
    mean_score = np.mean(scores)
    return scores, mean_score

# Perform manual cross-validation for different ratios with feature selection
ratios = [0.2, 0.3, 0.4]  # Corresponding to 80-20, 70-30, and 60-40 splits
n_features_to_select = 10  # Number of features to select
for ratio in ratios:
    scores, mean_score = manual_cross_validation_with_fs(X, y, test_size=ratio, n_features_to_select=n_features_to_select)
    print(f"Cross-Validation Scores for {int((1-ratio)*100)}-{int(ratio*100)} Split with Feature Selection: {scores}")
    print(f"Mean Score: {mean_score}\n")


Cross-Validation Scores for 80-20 Split with Feature Selection: [0.9, 0.95, 0.95, 0.9, 1.0]
Mean Score: 0.9399999999999998

Cross-Validation Scores for 70-30 Split with Feature Selection: [0.9, 0.9333333333333333, 0.9666666666666667, 0.9, 1.0]
Mean Score: 0.9400000000000001

Cross-Validation Scores for 60-40 Split with Feature Selection: [0.95, 0.95, 0.975, 0.925, 1.0]
Mean Score: 0.96

