In [None]:

import numpy as np
import torch
from torchvision import datasets, transforms
from sklearn.decomposition import DictionaryLearning
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler

transform = transforms.ToTensor()

train_dataset = datasets.EMNIST(
    root='./data',
    split='letters',
    train=True,
    download=True,
    transform=transform
)

test_dataset = datasets.EMNIST(
    root='./data',
    split='letters',
    train=False,
    download=True,
    transform=transform
)

def get_balanced_subset(dataset, samples_per_class=100):
    data = dataset.data
    targets = dataset.targets
    selected_indices = []
    for class_label in range(1, 27):
        class_indices = (targets == class_label).nonzero(as_tuple=True)[0]
        selected_indices.extend(class_indices[:samples_per_class].tolist())
    return data[selected_indices], targets[selected_indices]

X_train_tensor, y_train = get_balanced_subset(train_dataset, samples_per_class=100)
X_test_tensor, y_test = get_balanced_subset(test_dataset, samples_per_class=40)

X_train = X_train_tensor.float().view(-1, 28*28).numpy() / 255.0
X_test = X_test_tensor.float().view(-1, 28*28).numpy() / 255.0
y_train = y_train.numpy()
y_test = y_test.numpy()

print("Training dictionary learning model (this may take a few minutes)...")
dict_learner = DictionaryLearning(
    n_components=500,        # number of dictionary atoms
    alpha=1,                 # sparsity controlling parameter
    max_iter=500,
    transform_algorithm='lasso_lars',
    random_state=42,
    n_jobs=-1
)

X_train_sparse = dict_learner.fit_transform(X_train)
X_test_sparse = dict_learner.transform(X_test)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_sparse)
X_test_scaled = scaler.transform(X_test_sparse)

print("Training logistic regression with SGD...")
clf = SGDClassifier(loss='log_loss', penalty='l2', max_iter=1000, tol=1e-3, random_state=42)
clf.fit(X_train_scaled, y_train)

y_pred = clf.predict(X_test_scaled)

print("\nAccuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred, zero_division=0))
