In [179]:
import numpy as np
from sklearn.datasets import load_digits
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import mutual_info_classif

reading the data

In [180]:
data, labels = load_digits(return_X_y=True)
(n_samples, n_features), n_digits = data.shape, np.unique(labels).size

print(f"# digits: {n_digits}; # samples: {n_samples}; # features {n_features}")
print(labels)
print(data)
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

# digits: 10; # samples: 1797; # features 64
[0 1 2 ... 8 9 8]
[[ 0.  0.  5. ...  0.  0.  0.]
 [ 0.  0.  0. ... 10.  0.  0.]
 [ 0.  0.  0. ... 16.  9.  0.]
 ...
 [ 0.  0.  1. ...  6.  0.  0.]
 [ 0.  0.  2. ... 12.  0.  0.]
 [ 0.  0. 10. ... 12.  1.  0.]]


In [181]:
def best_C_accuracy(features):
    X_train_New = X_train[:, features]
    X_test_New = X_test[:, features]
    best_accuracy = 0
    best_C = 0
    Cs = [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000]

    for c in Cs:
        ova = LogisticRegression(multi_class='ovr', max_iter=1000, C=c, penalty='l2')
        ova.fit(X=X_train_New, y=y_train)
        prediction_accuracy = ova.score(X_test_New, y_test)
        if prediction_accuracy > best_accuracy:
            best_accuracy = prediction_accuracy
            best_C = c

    return (best_accuracy, best_C)

In [182]:
def trainModel(features):
    X_train_New = X_train[:, features]
    X_test_New = X_test[:, features]
    best_accuracy = 0
    ova = LogisticRegression(multi_class='ovr', max_iter=1000)
    ova.fit(X=X_train_New, y=y_train)
    prediction_accuracy = ova.score(X_test_New, y_test)
    if prediction_accuracy > best_accuracy:
        best_accuracy = prediction_accuracy
    
    return best_accuracy

In [183]:
def greedy():
    selected_features = []
    remaining_features = list(range(data.shape[1]))
    for _ in range(5):
        best_accuracy = 0
        best_feature  = -1

        for feature in remaining_features:
            accuracy = trainModel(selected_features+[feature])

            if accuracy > best_accuracy:
                best_feature = feature
                best_accuracy = accuracy

        selected_features.append(best_feature)
        remaining_features.remove(best_feature)

    return selected_features

In [184]:
greedy_features = greedy()
greedy_accuracy, greedy_C = best_C_accuracy(greedy_features)
print(f"featuers is {greedy_features}")
print(f"best accuracy is {greedy_accuracy}")
print(f"best C is {greedy_C}")

featuers is [36, 33, 42, 21, 26]
best accuracy is 0.7555555555555555
best C is 1


In [185]:
def mutual_information():
    dataNew = data.copy()
    for row in range(dataNew.shape[0]):
        for col in range(dataNew.shape[1]): 
            if dataNew[row][col] >= 0 and dataNew[row][col] <= 4:
                dataNew[row][col] = 0
            elif dataNew[row][col] >= 5 and dataNew[row][col] <= 10:
                dataNew[row][col] = 1
            else:
                dataNew[row][col] = 2

    mi = mutual_info_classif(dataNew, labels)
    sorted_arguments = mi.argsort()
    return sorted_arguments[-5:]
    

In [187]:
MI_features = mutual_information()
MI_accuracy, MI_C = best_C_accuracy(MI_features)
print(f"featuers is {MI_features}")
print(f"best accuracy is {MI_accuracy}")
print(f"best C is {MI_C}")

featuers is [21 28 43 26 42]
best accuracy is 0.6805555555555556
best C is 0.1


we see that:

1 - the greedy has more run time as we always knew 

2 - they have the same best C

3 - the accuracy of greedy is better (0.7555)

4 - the mi features doesnt stay the same if we run the alghorithm another time so with this the accuracy changes everytime and its field is between (0.6 and 0.725)

5 - so the greedy has better accuracy with worse run time