In [2]:
# Import necessary libraries
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler

import matplotlib.pyplot as plt
import numpy as np

def test_features(X, y, test_size=0.2, validation_size=0.2):
    
    # Split the dataset into training, validation, and testing sets
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=(test_size + validation_size), random_state=42)
    X_validation, X_test, y_validation, y_test = train_test_split(X_temp, y_temp, test_size=test_size/(test_size + validation_size), random_state=42)

    # Define a list of models to test
    models = [
        ("Linear Regression", LinearRegression()),
        ("Random Forest", RandomForestClassifier()),
        ("Support Vector Machine", SVC()),
        ("K-Nearest Neighbors", KNeighborsClassifier()),
        ("Gaussian Naive Bayes", GaussianNB()),
        ("Multi-layer Perceptron", MLPClassifier(max_iter=1000)),
    ]

    # Standardize the features using StandardScaler
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Results storage
    results = []

    # Hyperparameter search for each model
    for model_name, model in models:
        if model_name == "Random Forest":
            param_grid = {
                'n_estimators': [10, 50, 100, 200],
                'max_depth': [None, 10, 20, 30],
                'min_samples_split': [2, 5, 10],
                'min_samples_leaf': [1, 2, 4],
                'bootstrap': [True, False],
            }
        elif model_name == "Support Vector Machine":
            param_grid = {
                'C': [0.1, 1, 10, 100],
                'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
                'gamma': ['scale', 'auto'],
            }
        elif model_name == "K-Nearest Neighbors":
            param_grid = {
                'n_neighbors': [3, 5, 7, 10],
                'weights': ['uniform', 'distance'],
                'p': [1, 2],
            }
        elif model_name == "Multi-layer Perceptron":
            param_grid = {
                'hidden_layer_sizes': [(50,), (100,), (50, 50), (100, 50)],
                'activation': ['relu', 'tanh'],
                'alpha': [0.0001, 0.001, 0.01],
            }
        else:
            print(f"Unsupported model: {model_name}")
            continue

        # Randomized search for hyperparameter tuning using training and validation sets
        grid_search = RandomizedSearchCV(model, param_grid, n_iter=10, cv=5, random_state=42, n_jobs=-1)

        # Train the model on the combined training and validation sets
        grid_search.fit(np.concatenate((X_train, X_validation)), np.concatenate((y_train, y_validation)))

        # Get the best model from the search
        best_model = grid_search.best_estimator_

        # Make predictions on the test set
        y_pred = best_model.predict(X_test)

        # Evaluate the model on the test set and store accuracy
        accuracy = accuracy_score(y_test, y_pred)

        # Print the results
        print(f"\n{model_name} Best Parameters: {grid_search.best_params_}")
        print(f"{model_name} Accuracy: {accuracy:.4f}")

        # Store results
        results.append({
            'Model': model_name,
            'Accuracy': accuracy,
        })

    # Create a bar plot for accuracy
    models_names = [result['Model'] for result in results]
    accuracies = [result['Accuracy'] for result in results]
    plt.figure(figsize=(10, 6))
    plt.bar(models_names, accuracies, color='blue', alpha=0.7)
    plt.xlabel('Model')
    plt.ylabel('Accuracy')
    plt.title('Accuracy for Different Models')
    plt.show()

# Example usage:
# test_features(X, y)


ModuleNotFoundError: No module named 'sklearn'

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=3129096e-2078-4742-bd9d-c05dc1a0bf39' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>