### Pred for McNemar Test

In [None]:
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

def run_poly_logistic_regression_fixed_datasets(n, e):
    """
    Run polynomial logistic regression on fixed datasets and save predictions for further evaluation.

    Parameters:
    ----------
    n : int
        Identifier for the dataset (e.g., 9, 12, 15, 18), which specifies the corresponding dataset files.
    e : int
        The degree of the polynomial features to be created from the input data.

    Returns:
    -------
    None
        Saves predictions (`y_test_pred`) and prediction probabilities (`y_test_pred_proba`) 
        to .npy files for further evaluation (e.g., McNemar's test). Outputs test accuracy to the console.

    Notes:
    -----
    - Input datasets are expected to follow a specific naming convention:
        `Datasets_Train_Test_Split/kryptonite-{n}-X_train.npy` for training features,
        `Datasets_Train_Test_Split/kryptonite-{n}-y_train.npy` for training labels,
        and similarly for test data.
    - Polynomial feature transformations are applied to the input data.
    - Logistic regression uses the 'sag' solver and regularization strength `C=0.85`.
    """
    X_train = np.load('Datasets_Train_Test_Split/kryptonite-%s-X_train.npy' % (n))
    y_train = np.load('Datasets_Train_Test_Split/kryptonite-%s-y_train.npy' % (n))
    X_test = np.load('Datasets_Train_Test_Split/kryptonite-%s-X_test.npy' % (n))
    y_test = np.load('Datasets_Train_Test_Split/kryptonite-%s-y_test.npy' % (n))


    # Create polynomial features (set degree as desired)
    degree = e
    poly = PolynomialFeatures(degree)
    X_train_poly = poly.fit_transform(X_train)
    X_test_poly = poly.transform(X_test)

    # Initialize and fit logistic regression
    logreg = LogisticRegression(max_iter=100, solver='sag', C=0.85)
    logreg.fit(X_train_poly, y_train)
    print("Fit Model")

    # Evaluate on the test set
    y_test_pred = logreg.predict(X_test_poly)
    y_test_pred_proba = logreg.predict_proba(X_test_poly)
    test_accuracy = accuracy_score(y_test, y_test_pred)
    print(f"Test Accuracy: {test_accuracy:.4f}")

    # Save predictions for mcnemar test
    np.save('Datasets_Train_Test_Split/kryptonite_%s_pred_logreg.npy' % (n), y_test_pred)
    np.save('Datasets_Train_Test_Split/kryptonite_%s_pred_proba_logreg.npy' % (n), y_test_pred_proba)

possible_n_vals = [9, 12, 15, 18]


for n in possible_n_vals:
    print(n)
    run_poly_logistic_regression_fixed_datasets(n=n, e=5)

9




Fit Model
Test Accuracy: 0.5097
12




Fit Model
Test Accuracy: 0.4890
15




Fit Model
Test Accuracy: 0.5073
18




Fit Model
Test Accuracy: 0.5050
