In [10]:
import numpy as np
import torch
import deeplay as dl
from torchvision import transforms
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
import numpy as np
import torch
import deeplay as dl
from torchvision import transforms
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# Your existing functions (unchanged)
def read_mnist_txt(file_path):
    """Read MNIST-like data from text file"""
    images = []
    labels = []
    
    with open(file_path, 'r') as file:
        for line in file:
            if not line.strip():
                continue
            parts = line.strip().split()
            image_num = parts[0].strip('"')
            label = int(parts[1])
            
            # Validate label range
            if label < -9 or label > 9:
                raise ValueError(f"Invalid label {label} found in the dataset.")
            
            pixels = list(map(float, parts[2:]))
            pixel_array = np.array(pixels).reshape(16, 16)
            images.append(pixel_array)
            labels.append(label)
    
    return np.array(images), np.array(labels)

class MNISTDataset(Dataset):
    """Custom Dataset for MNIST-like data"""
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform
        
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        
        # Convert to PIL Image for compatibility with torchvision transforms
        image = Image.fromarray(image.astype('float32'))
        
        if self.transform:
            image = self.transform(image)
        
        # Shift labels to zero-based indexing
        label = torch.tensor(label + 9, dtype=torch.long)
        
        return image, label

def create_cnn_model():
    """Create CNN model using deeplay"""
    conv_base = dl.ConvolutionalNeuralNetwork(
        in_channels=1, 
        hidden_channels=[16, 16, 32], 
        out_channels=32,
    )
    conv_base.blocks[2].pool.configure(torch.nn.MaxPool2d, kernel_size=2)

    connector = dl.Layer(torch.nn.AdaptiveAvgPool2d, output_size=1)

    # Update out_features to 19 for labels -9 to 9
    dense_top = dl.MultiLayerPerceptron(
        in_features=32,
        hidden_features=[64],
        out_features=19,  # Updated
        out_activation=torch.nn.LogSoftmax(dim=1)
    )

    return dl.Sequential(conv_base, connector, dense_top)

def train_model(train_images, train_labels):
    """Train the CNN model and return the trained classifier."""
    # Define transforms
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    # Create dataset and dataloader
    train_dataset = MNISTDataset(train_images, train_labels, transform=transform)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

    # Create model
    cnn = create_cnn_model()

    # Define classifier
    class MNISTClassifier(dl.Classifier):
        def __init__(self, model, optimizer):
            super().__init__(model=model, optimizer=optimizer)
            self.loss_fn = torch.nn.NLLLoss()
        
        def training_step(self, batch, batch_idx):
            x, y = batch
            y_hat = self.model(x)
            loss = self.loss_fn(y_hat, y)
            return loss

    classifier = MNISTClassifier(
        model=cnn,
        optimizer=dl.RMSprop(lr=0.001)
    ).create()

    # Train the model
    trainer = dl.Trainer(max_epochs=20, accelerator="auto")
    trainer.fit(classifier, train_loader)

    return classifier


# Prediction function
def predict_with_model(classifier, test_images):
    """Use trained classifier to make predictions on test_images."""
    # Define transforms
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    # Dummy labels for test set
    test_dataset = MNISTDataset(test_images, np.zeros(len(test_images)), transform=transform)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

    # Get predictions
    test_labels = []
    classifier.eval()
    with torch.no_grad():
        for x, _ in test_loader:
            y_hat = classifier.model(x)
            preds = torch.argmax(y_hat, dim=1)
            test_labels.extend((preds - 9).cpu().numpy().tolist())  # Adjust if needed

    return test_labels


# Main execution
if __name__ == "__main__":
    images, labels = read_mnist_txt("Numbers.txt")

    train_images, test_images, train_labels, _ = train_test_split(
        images, labels, test_size=0.2, random_state=42
    )

    classifier = train_model(train_images, train_labels)
    predictions = predict_with_model(classifier, test_images)

    print("First 10 test predictions:", predictions[:10])

C:\Users\Nils\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.

  | Name          | Type             | Params | Mode 
-----------------------------------------------------------
0 | loss          | CrossEntropyLoss | 0      | eval 
1 | train_metrics | MetricCollection | 0      | train
2 | val_metrics   | MetricCollection | 0      | train
3 | test_metrics  | MetricCollection | 0      | train
4 | model         | Sequential       | 19.7 K | train
5 | loss_fn       | NLLLoss          | 0      | train
6 | optimizer     | RMSprop          | 0      | train
-----------------------------------------------------------
19.7 K    Trainable params
0         Non-trainable params
19.7 K    Total params
0.079     Total estimated model params size (MB)
30        Modules in train mode
1        

Epoch 19: 100%|██████████| 50/50 [00:00<00:00, 67.29it/s, v_num=14]
First 10 test predictions: [0, 0, 6, -9, 0, -2, 0, 8, 6, -9]


In [6]:
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score

def cross_validate(images, labels, train_fn, predict_fn, k_folds=5, metric_fn=accuracy_score):
    """
    Generic cross-validation.

    Args:
        images (np.ndarray): Image data.
        labels (np.ndarray): Corresponding labels.
        train_fn (callable): Function to train a model. Signature: (train_images, train_labels) -> model
        predict_fn (callable): Function to predict. Signature: (model, test_images) -> predictions
        k_folds (int): Number of folds (default 5).
        metric_fn (callable): Evaluation metric function. Signature: (true_labels, predicted_labels) -> float

    Returns:
        List of scores for each fold.
    """
    all_scores = []
    kf = KFold(n_splits=k_folds, shuffle=True, random_state=42)

    for fold, (train_idx, val_idx) in enumerate(kf.split(images)):
        print(f"\nFold {fold + 1}/{k_folds}")

        train_images, val_images = images[train_idx], images[val_idx]
        train_labels, val_labels = labels[train_idx], labels[val_idx]

        # Train and predict
        model = train_fn(train_images, train_labels)
        predictions = predict_fn(model, val_images)

        # Score
        score = metric_fn(val_labels, predictions)
        print(f"Fold {fold + 1} Score: {score:.4f}")
        all_scores.append(score)

    print("\nCross-validation results:")
    print(f"Mean score: {np.mean(all_scores):.4f}")
    print(f"Std deviation: {np.std(all_scores):.4f}")

    return all_scores


In [8]:
# "Training" function for k-NN (just returns training data and labels)
def train_knn_model(train_images, train_labels, k=3, norm=2):
    return {
        'train_images': train_images,
        'train_labels': train_labels,
        'k': k,
        'norm': norm
    }

# Prediction function for k-NN
def predict_knn(model, test_images):
    train_images = model['train_images']
    train_labels = model['train_labels']
    k = model['k']
    norm = model['norm']
    
    predictions = []
    for image in test_images:
        distances = [(
            np.linalg.norm(train_image - image, ord=norm), label
        ) for train_image, label in zip(train_images, train_labels)]
        
        neighbors = sorted(distances, key=lambda x: x[0])[:k]
        labels = [label for _, label in neighbors]
        predicted = max(labels, key=labels.count)
        predictions.append(predicted)
    
    return predictions


In [13]:
if __name__ == "__main__":
    images, labels = read_mnist_txt("Numbers.txt")

    # Normalize if needed
    flat_images = images.reshape(len(images), -1)
    scaler = StandardScaler()
    flat_images = scaler.fit_transform(flat_images)

    # Create partial functions for fixed k and norm
    from functools import partial
    k = 5
    norm = 2
    train_fn = partial(train_knn_model, k=k, norm=norm)

    scores = cross_validate(
        images=flat_images,
        labels=labels,
        train_fn=train_fn,
        predict_fn=predict_knn,
        k_folds=5,
        metric_fn=accuracy_score
    )



Fold 1/5
Fold 1 Score: 0.8950

Fold 2/5
Fold 2 Score: 0.9300

Fold 3/5
Fold 3 Score: 0.9225

Fold 4/5
Fold 4 Score: 0.9250

Fold 5/5
Fold 5 Score: 0.9175

Cross-validation results:
Mean score: 0.9180
Std deviation: 0.0122


In [14]:
if __name__ == "__main__":
    images, labels = read_mnist_txt("Numbers.txt")

    scores = cross_validate(
        images=images,
        labels=labels,
        train_fn=train_model,
        predict_fn=predict_with_model,
        k_folds=5,
        metric_fn=accuracy_score  # Optional, default already
    )


C:\Users\Nils\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.

  | Name          | Type             | Params | Mode 
-----------------------------------------------------------
0 | loss          | CrossEntropyLoss | 0      | eval 
1 | train_metrics | MetricCollection | 0      | train
2 | val_metrics   | MetricCollection | 0      | train
3 | test_metrics  | MetricCollection | 0      | train
4 | model         | Sequential       | 19.7 K | train
5 | loss_fn       | NLLLoss          | 0      | train
6 | optimizer     | RMSprop          | 0      | train
-----------------------------------------------------------
19.7 K    Trainable params
0         Non-trainable params
19.7 K    Total params
0.079     Total estimated model params size (MB)
30        Modules in train mode
1        


Fold 1/5
Epoch 19: 100%|██████████| 50/50 [00:00<00:00, 61.44it/s, v_num=22]
Fold 1 Score: 0.8450

Fold 2/5


C:\Users\Nils\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.

  | Name          | Type             | Params | Mode 
-----------------------------------------------------------
0 | loss          | CrossEntropyLoss | 0      | eval 
1 | train_metrics | MetricCollection | 0      | train
2 | val_metrics   | MetricCollection | 0      | train
3 | test_metrics  | MetricCollection | 0      | train
4 | model         | Sequential       | 19.7 K | train
5 | loss_fn       | NLLLoss          | 0      | train
6 | optimizer     | RMSprop          | 0      | train
-----------------------------------------------------------
19.7 K    Trainable params
0         Non-trainable params
19.7 K    Total params
0.079     Total estimated model params size (MB)
30        Modules in train mode
1        

Epoch 19: 100%|██████████| 50/50 [00:00<00:00, 73.41it/s, v_num=23]
Fold 2 Score: 0.8775

Fold 3/5


C:\Users\Nils\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.

  | Name          | Type             | Params | Mode 
-----------------------------------------------------------
0 | loss          | CrossEntropyLoss | 0      | eval 
1 | train_metrics | MetricCollection | 0      | train
2 | val_metrics   | MetricCollection | 0      | train
3 | test_metrics  | MetricCollection | 0      | train
4 | model         | Sequential       | 19.7 K | train
5 | loss_fn       | NLLLoss          | 0      | train
6 | optimizer     | RMSprop          | 0      | train
-----------------------------------------------------------
19.7 K    Trainable params
0         Non-trainable params
19.7 K    Total params
0.079     Total estimated model params size (MB)
30        Modules in train mode
1        

Epoch 19: 100%|██████████| 50/50 [00:00<00:00, 69.34it/s, v_num=24]
Fold 3 Score: 0.8550

Fold 4/5


C:\Users\Nils\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.

  | Name          | Type             | Params | Mode 
-----------------------------------------------------------
0 | loss          | CrossEntropyLoss | 0      | eval 
1 | train_metrics | MetricCollection | 0      | train
2 | val_metrics   | MetricCollection | 0      | train
3 | test_metrics  | MetricCollection | 0      | train
4 | model         | Sequential       | 19.7 K | train
5 | loss_fn       | NLLLoss          | 0      | train
6 | optimizer     | RMSprop          | 0      | train
-----------------------------------------------------------
19.7 K    Trainable params
0         Non-trainable params
19.7 K    Total params
0.079     Total estimated model params size (MB)
30        Modules in train mode
1        

Epoch 19: 100%|██████████| 50/50 [00:00<00:00, 60.30it/s, v_num=25]
Fold 4 Score: 0.9275

Fold 5/5


C:\Users\Nils\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.

  | Name          | Type             | Params | Mode 
-----------------------------------------------------------
0 | loss          | CrossEntropyLoss | 0      | eval 
1 | train_metrics | MetricCollection | 0      | train
2 | val_metrics   | MetricCollection | 0      | train
3 | test_metrics  | MetricCollection | 0      | train
4 | model         | Sequential       | 19.7 K | train
5 | loss_fn       | NLLLoss          | 0      | train
6 | optimizer     | RMSprop          | 0      | train
-----------------------------------------------------------
19.7 K    Trainable params
0         Non-trainable params
19.7 K    Total params
0.079     Total estimated model params size (MB)
30        Modules in train mode
1        

Epoch 19: 100%|██████████| 50/50 [00:00<00:00, 70.81it/s, v_num=26]
Fold 5 Score: 0.8925

Cross-validation results:
Mean score: 0.8795
Std deviation: 0.0292
