In [1]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import numpy as np
import os

def read_mnist_txt(file_path):
    images = []
    labels = []
    
    with open(file_path, 'r') as file:
        for line in file:
            # Skip empty lines
            if not line.strip():
                continue
                
            # Split the line into components
            parts = line.strip().split()
            
            # The first part is the image number in quotes (e.g., "1"), 
            # the second is the label, and the rest are pixel values
            image_num = parts[0].strip('"')
            label = int(parts[1])
            pixels = list(map(float, parts[2:]))
            
            # Convert pixel values to a 16x16 numpy array
            # The values are between -1 and 1, so we normalize to 0-1 for display
            pixel_array = np.array(pixels).reshape(16, 16)
            normalized_array = (pixel_array + 1) / 2  # Scale from [-1, 1] to [0, 1]
            
            images.append(normalized_array)
            labels.append(label)
    
    return images, labels


           

In [None]:
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score

def cross_validate(random_state, images, labels, train_fn, predict_fn, k_folds=5, metric_fn=accuracy_score):
    """
    Generic cross-validation.

    Args:
        images (np.ndarray): Image data.
        labels (np.ndarray): Corresponding labels.
        train_fn (callable): Function to train a model. Signature: (train_images, train_labels) -> model
        predict_fn (callable): Function to predict. Signature: (model, test_images) -> predictions
        k_folds (int): Number of folds (default 5).
        metric_fn (callable): Evaluation metric function. Signature: (true_labels, predicted_labels) -> float

    Returns:
        List of scores for each fold.
    """
    all_scores = []
    kf = KFold(n_splits=k_folds, shuffle=True, random_state = random_state)

    for fold, (train_idx, val_idx) in enumerate(kf.split(images)):

        train_images, val_images = images[train_idx], images[val_idx]
        train_labels, val_labels = labels[train_idx], labels[val_idx]

        # Train and predict
        model = train_fn(train_images, train_labels)
        predictions = predict_fn(model, val_images)

        # Score
        score = metric_fn(val_labels, predictions)
        print(f"Fold {fold + 1} Score: {score:.4f}")
        all_scores.append(score)

    return all_scores


In [2]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

def train_lda(train_images, train_labels):
    flat_train = train_images.reshape(len(train_images), -1)
    lda = LinearDiscriminantAnalysis()
    lda.fit(flat_train, train_labels)
    return lda

def predict_with_lda(model, test_images):
    flat_test = test_images.reshape(len(test_images), -1)
    return model.predict(flat_test)
