In [None]:

import numpy as np
from sklearn.preprocessing import normalize
from sklearn.metrics import accuracy_score, classification_report
from torchvision import datasets, transforms
import torch
from torch.utils.data import DataLoader, Subset

transform = transforms.ToTensor()
train_dataset = datasets.EMNIST(root='./data', split='letters', train=True, download=True, transform=transform)
test_dataset = datasets.EMNIST(root='./data', split='letters', train=False, download=True, transform=transform)
train_dataset.targets -= 1
test_dataset.targets -= 1

def get_balanced_subset(dataset, samples_per_class):
    indices = []
    for label in range(26):
        class_indices = (dataset.targets == label).nonzero(as_tuple=True)[0][:samples_per_class]
        indices.extend(class_indices.tolist())
    return Subset(dataset, indices)

train_subset = get_balanced_subset(train_dataset, 50)
test_subset = get_balanced_subset(test_dataset, 40)

def convert_to_numpy(subset):
    loader = DataLoader(subset, batch_size=len(subset))
    images, labels = next(iter(loader))
    return images.view(-1, 28*28).numpy(), labels.numpy()

X_train, y_train = convert_to_numpy(train_subset)
X_test, y_test = convert_to_numpy(test_subset)

X_train_norm = normalize(X_train, axis=1)
X_test_norm = normalize(X_test, axis=1)

#Sparse voting approach OMP

from sklearn.linear_model import OrthogonalMatchingPursuit

def predict_with_sparse_voting_omp(X_train, y_train, X_test, n_nonzero_coefs=30):
    predictions = []
    for x in X_test:
        model = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)
        model.fit(X_train.T, x)
        coeffs = model.coef_
        nonzero_indices = np.flatnonzero(coeffs)
        if len(nonzero_indices) == 0:
            predictions.append(-1)
            continue
        votes = y_train[nonzero_indices]
        classes, counts = np.unique(votes, return_counts=True)
        max_count = counts.max()
        top_classes = classes[counts == max_count]
        if len(top_classes) == 1:
            predictions.append(top_classes[0])
        else:
            weights = [np.sum(np.abs(coeffs[nonzero_indices][votes == c])) for c in top_classes]
            predictions.append(top_classes[np.argmax(weights)])
    return np.array(predictions)

y_pred_omp = predict_with_sparse_voting_omp(X_train_norm, y_train, X_test_norm)
valid_omp = y_pred_omp != -1
if np.any(valid_omp):
    print("OMP Accuracy:", accuracy_score(y_test[valid_omp], y_pred_omp[valid_omp]))
    print(classification_report(y_test[valid_omp], y_pred_omp[valid_omp], digits=2))
else:
    print("OMP: No valid predictions.")

#kNN approach

from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=3, metric='cosine')
from sklearn.decomposition import DictionaryLearning

dict_learner = DictionaryLearning(n_components=300, transform_algorithm='omp', random_state=42)
X_train_sparse = dict_learner.fit_transform(X_train_norm)
X_test_sparse = dict_learner.transform(X_test_norm)

knn = KNeighborsClassifier(n_neighbors=3, metric='cosine')
knn.fit(X_train_sparse, y_train)
y_pred = knn.predict(X_test_sparse)


print("KNN Accuracy:", accuracy_score(y_test, y_pred_knn))
print(classification_report(y_test, y_pred_knn, digits=2))

#Sparse features + SVM approach

from sklearn.decomposition import DictionaryLearning
from sklearn.svm import SVC

from sklearn.decomposition import MiniBatchDictionaryLearning
dict_learner = MiniBatchDictionaryLearning(n_components=300, alpha=1, max_iter=200, batch_size=64, random_state=42)

X_train_sparse = dict_learner.fit_transform(X_train_norm)
X_test_sparse = dict_learner.transform(X_test_norm)

clf = SVC(kernel='rbf', C=10, gamma=0.01)
clf.fit(X_train_sparse, y_train)
y_pred_svm = clf.predict(X_test_sparse)

print("SVM Accuracy:", accuracy_score(y_test, y_pred_svm))
print(classification_report(y_test, y_pred_svm, digits=2))
