<a href="https://colab.research.google.com/github/rodrigokazu/alsp_classifier/blob/main/Logistic_Regression_and_Random_Forest_Classifiers_(GridSearch_and_Backward_Elimination)_Continuous_Performance_Task_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Loading libraries

In [298]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.ensemble import RandomForestClassifier

# Loading data

In [299]:
dataset = pd.read_csv("READYCPTfourthrun.csv")

# Separating matrix of features from the dependent variable

In [300]:
x = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

# Splitting data into training and test sets

In [301]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

# Building Logistic Regression instance

In [302]:
classifier = LogisticRegression()

# Perform Grid Search to find the best hyperparameters (tuning)


In [303]:
param_grid = {
    "C": [0.001, 0.01, 0.1, 1, 10, 100],
    "solver": ["liblinear", "newton-cg", "saga", "sag", "lbfgs"],
    "max_iter": [100, 500, 1000, 2000, 10000]
}

# Get the best parameters and estimator from the Grid Search




In [304]:
grid_search = GridSearchCV(classifier, param_grid, cv=5, scoring="accuracy")

grid_search.fit(x_train, y_train)

best_params = grid_search.best_params_
best_score = grid_search.best_score_
print("Best Parameters:", best_params)
print("Best Score:", best_score)

best_model = grid_search.best_estimator_

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Best Parameters: {'C': 0.001, 'max_iter': 100, 'solver': 'newton-cg'}
Best Score: 0.5222222222222223


# Implementation of custom function to select accuracy-weighed features

In [305]:
selected_features = list(range(x_train.shape[1]))
initial_accuracy = accuracy_score(y_test, best_model.predict(x_test))

for i in selected_features:
    features_to_use = [feature for feature in selected_features if feature != i]

    if len(features_to_use) > 0:
        x_subset = x_train[:, features_to_use]
        classifier.fit(x_subset, y_train)
        y_pred_subset = classifier.predict(x_test[:, features_to_use])
        accuracy_subset = accuracy_score(y_test, y_pred_subset)

        if accuracy_subset > initial_accuracy:
            print(f"Removing feature in position {i} - Accuracy improved to {accuracy_subset:.4f}")
            initial_accuracy = accuracy_subset
            selected_features = features_to_use
        else:
            print(f"Keeping feature in position {i} - Accuracy: {accuracy_subset:.4f}")
    else:
        print(f"All features removed - Terminating Process")
        break

print("Selected Features:")
for feature_index in selected_features:
    if feature_index < len(dataset.columns) - 1:
        print(dataset.columns[feature_index])
    else:
        print("Invalid Index")

Removing feature in position 0 - Accuracy improved to 0.6667
Keeping feature in position 1 - Accuracy: 0.6667
Keeping feature in position 2 - Accuracy: 0.6667
Keeping feature in position 3 - Accuracy: 0.6667
Keeping feature in position 4 - Accuracy: 0.6667
Keeping feature in position 5 - Accuracy: 0.5833
Keeping feature in position 6 - Accuracy: 0.6667
Removing feature in position 7 - Accuracy improved to 0.7500
Keeping feature in position 8 - Accuracy: 0.5833
Keeping feature in position 9 - Accuracy: 0.6667
Keeping feature in position 10 - Accuracy: 0.7500
Keeping feature in position 11 - Accuracy: 0.7500
Keeping feature in position 12 - Accuracy: 0.7500
Keeping feature in position 13 - Accuracy: 0.7500
Keeping feature in position 14 - Accuracy: 0.7500
Keeping feature in position 15 - Accuracy: 0.7500
Keeping feature in position 16 - Accuracy: 0.7500
Selected Features:
HR2
HR1
HR0.5
HR0.2
FAR2
FAR1
FAR0.2
DP2
DP1
DP0.5
DP0.2
CB2
CB1
CB0.5
CB0.2


# Fit the best regressor on the training data



In [306]:
best_model.fit(x_train[:, selected_features], y_train)

# Make predictions on the test set using selected features



In [307]:
y_pred = best_model.predict(x_test[:, selected_features])

# Evaluate performance on test set

In [308]:
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

[[4 0]
 [8 0]]
Accuracy: 0.3333333333333333


# Building Random Forest classifier instance

In [309]:
rf_classifier = RandomForestClassifier()

# Perform Grid Search to find the best hyperparameters (tuning)


In [310]:
param_grid = {
    'n_estimators': [100, 300, 500],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Get the best parameters and estimator from the Grid Search


In [311]:
grid_search = GridSearchCV(rf_classifier, param_grid, cv=5, scoring='accuracy')

grid_search.fit(x_train, y_train)

best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best Parameters:", best_params)
print("Best Score:", best_score)

best_rf_model = grid_search.best_estimator_

Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 10, 'n_estimators': 300}
Best Score: 0.4138888888888889


# Implementation of custom function to select accuracy-weighed features

In [312]:
selected_features = list(range(x_train.shape[1]))
initial_accuracy = accuracy_score(y_test, best_rf_model.predict(x_test))

for i in selected_features:
    features_to_use = [feature for feature in selected_features if feature != i]

    if len(features_to_use) > 0:
        x_subset = x_train[:, features_to_use]
        rf_classifier.fit(x_subset, y_train)
        y_pred_subset = rf_classifier.predict(x_test[:, features_to_use])
        accuracy_subset = accuracy_score(y_test, y_pred_subset)

        if accuracy_subset > initial_accuracy:
            print(f"Removing feature in position {i} - Accuracy improved to {accuracy_subset:.4f}")
            initial_accuracy = accuracy_subset
            selected_features = features_to_use
        else:
            print(f"Keeping feature in position {i} - Accuracy: {accuracy_subset:.4f}")
    else:
        print(f"All features removed - Terminating Process")
        break

print("Selected Features:")
for feature_index in selected_features:
    # Assuming 'dataset' has column names
    if feature_index < len(dataset.columns) - 1:
        print(dataset.columns[feature_index])
    else:
        print("Invalid Index")

Removing feature in position 0 - Accuracy improved to 0.5833
Removing feature in position 1 - Accuracy improved to 0.7500
Keeping feature in position 2 - Accuracy: 0.6667
Keeping feature in position 3 - Accuracy: 0.4167
Keeping feature in position 4 - Accuracy: 0.5000
Keeping feature in position 5 - Accuracy: 0.6667
Keeping feature in position 6 - Accuracy: 0.6667
Keeping feature in position 7 - Accuracy: 0.6667
Removing feature in position 8 - Accuracy improved to 0.8333
Keeping feature in position 9 - Accuracy: 0.6667
Keeping feature in position 10 - Accuracy: 0.7500
Keeping feature in position 11 - Accuracy: 0.6667
Keeping feature in position 12 - Accuracy: 0.5833
Keeping feature in position 13 - Accuracy: 0.6667
Keeping feature in position 14 - Accuracy: 0.6667
Keeping feature in position 15 - Accuracy: 0.6667
Keeping feature in position 16 - Accuracy: 0.6667
Selected Features:
HR1
HR0.5
HR0.2
FAR2
FAR1
FAR0.5
DP2
DP1
DP0.5
DP0.2
CB2
CB1
CB0.5
CB0.2


# Fit the best regressor on the training data

In [313]:
best_rf_model.fit(x_train[:, selected_features], y_train)

# Make predictions on the test set using selected features

In [314]:
y_pred = best_rf_model.predict(x_test[:, selected_features])

# Evaluate performance on test set

In [315]:
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

[[3 1]
 [3 5]]
Accuracy: 0.6666666666666666
