Train on MNIST dataset using an MLP. The original training dataset contains 60,000 images and test contains 10,000 images. If you are short on compute, use a stratified subset of a smaller number of images. But, the test set remains the same 10,000 images. Compare against RF and Logistic Regression models.  The metrics can be: F1-score, confusion matrix. What do you observe? What all digits are commonly confused?


MLP

In [None]:
import torch
import torch.nn.functional as F
from torch import nn
import pandas as pd
import matplotlib.pyplot as plt # for making figures
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
from pprint import pprint
import numpy as np
import os
import struct
from sklearn.metrics import rand_score
from scipy.spatial.distance import cdist
import time

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
def load_mnist_images(filename):
    with open(filename, 'rb') as f:
        magic, num, rows, cols = struct.unpack(">IIII", f.read(16))
        images = np.fromfile(f, dtype=np.uint8).reshape(num, rows*cols)
    return images

def load_mnist_labels(filename):
    with open(filename, 'rb') as f:
        magic, num = struct.unpack(">II", f.read(8))
        labels = np.fromfile(f, dtype=np.uint8)
    return labels

train_images_path = "/content/train-images.idx3-ubyte"
train_labels_path = "/content/train-labels.idx1-ubyte"

test_images_path = "/content/t10k-images.idx3-ubyte"
test_labels_path = "/content/t10k-labels.idx1-ubyte"

# Load the data
train_images = load_mnist_images(train_images_path)
train_labels = load_mnist_labels(train_labels_path)

test_images = load_mnist_images(test_images_path)
test_labels = load_mnist_labels(test_labels_path)

np.random.seed(42)
print("train_images: ")
print(len(train_images))
print("test_images: ")
print(len(test_images))

train_images: 
60000
test_images: 
10000


In [None]:
image_size = train_images[0].shape
image_size

(784,)

In [None]:
train_labels

array([5, 0, 4, ..., 5, 6, 8], dtype=uint8)

In [None]:
X_img = torch.tensor(train_images).to(device)

In [None]:
flattened_images = [img.view(-1) for img in X_img]
# Stack all flattened images into a single matrix
X = torch.stack(flattened_images)
# shape (60000, 784)
X = X.view(-1, 784)
print(X.shape)

torch.Size([60000, 784])


In [None]:
X.device

device(type='cuda', index=0)

In [None]:
train_labels.shape

(60000,)

In [None]:
Y = torch.tensor(train_labels).to(device)



In [None]:
X = X.float()

In [None]:
input_size = 784
class predict_digit(nn.Module):
    def __init__(self, l1, l2, l3, input_size):
        super().__init__()
        self.lin1 = nn.Linear(input_size, l1)
        self.lin2 = nn.Linear(l1, l2)
        self.lin3 = nn.Linear(l2, l3)

    def forward(self, x):
        x = torch.relu(self.lin1(x))
        x = torch.relu(self.lin2(x))
        x = self.lin3(x)
        return x

In [None]:
model = predict_digit(l1=30, l2=20, l3=10, input_size=784)

In [None]:
Y = Y.long()

In [None]:
# Ensure the model is on the correct device
model.to(device)
loss_fn = nn.CrossEntropyLoss()
opt = torch.optim.AdamW(model.parameters(), lr=0.01)
# Mini-batch training
batch_size = 6000
print_every = 1000
elapsed_time = []
# Adjustments for training monitoring
avg_loss_values = []  # To store average loss per epoch

for epoch in range(10000):  # Assuming you still want a large number of epochs
    start_time = time.time()
    total_loss = 0
    for i in range(0, X.shape[0], batch_size):
        x_batch = X[i:i+batch_size].to(device).float()
        y_batch = Y[i:i+batch_size].to(device).long()

        # Forward pass
        y_pred = model(x_batch)
        loss = loss_fn(y_pred, y_batch)

        # Backward pass and optimize
        opt.zero_grad()
        loss.backward()
        opt.step()

        total_loss += loss.item() * x_batch.size(0)

    avg_loss = total_loss / X.shape[0]
    avg_loss_values.append(avg_loss)

    end_time = time.time()
    elapsed_time.append(end_time - start_time)

    if epoch % print_every == 0:
        print(f"Epoch [{epoch+1}/10000], Loss: {avg_loss:.4f}, Time: {elapsed_time[-1]:.2f}s")

# Save the model after training
torch.save(model.state_dict(), 'model_path.pth')
print("Model saved successfully!")

Epoch [1/10000], Loss: 10.4484, Time: 0.02s
Epoch [1001/10000], Loss: 0.2662, Time: 0.02s
Epoch [2001/10000], Loss: 0.2408, Time: 0.02s
Epoch [3001/10000], Loss: 0.2345, Time: 0.02s
Epoch [4001/10000], Loss: 0.2389, Time: 0.02s
Epoch [5001/10000], Loss: 0.2310, Time: 0.02s
Epoch [6001/10000], Loss: 0.2217, Time: 0.02s
Epoch [7001/10000], Loss: 0.2285, Time: 0.02s
Epoch [8001/10000], Loss: 0.2342, Time: 0.02s
Epoch [9001/10000], Loss: 0.2176, Time: 0.02s
Model saved successfully!


To show the predicted digit

In [None]:
# Assuming the model's architecture is defined as predict_digit
model = predict_digit(l1=30, l2=20, l3=10, input_size=784).to(device)
model.load_state_dict(torch.load('model_path.pth'))
model.eval()  # Set the model to evaluation mode

predict_digit(
  (lin1): Linear(in_features=784, out_features=30, bias=True)
  (lin2): Linear(in_features=30, out_features=20, bias=True)
  (lin3): Linear(in_features=20, out_features=10, bias=True)
)

In [None]:
# Assume train_images and train_labels are loaded similarly to your training data
# Processing test data

with torch.no_grad():  # No need to track gradients
    outputs = model(X)
    _, predicted_mlp_train = torch.max(outputs.data, 1)
predicted_mlp_train

tensor([0, 0, 4,  ..., 5, 6, 5], device='cuda:0')

In [None]:
# Assume test_images and test_labels are loaded similarly to your training data
# Processing test data
X_test = torch.tensor(test_images).float().to(device)
X_test = X_test.view(-1, 784)  # Adjust shape as necessary
# Assuming test labels are for evaluation of performance and not required for just predictions
Y_test = torch.tensor(test_labels).to(device)

with torch.no_grad():  # No need to track gradients
    outputs = model(X_test)
    _, predicted_mlp_test = torch.max(outputs.data, 1)
predicted_mlp_test

tensor([7, 2, 1,  ..., 4, 5, 6], device='cuda:0')

In [None]:
print(test_labels)

[7 2 1 ... 4 5 6]


In [None]:
print(_)

tensor([7.5700, 9.2421, 4.4415,  ..., 5.2707, 2.0860, 7.3614], device='cuda:0')


Using RF

In [None]:
!pip install latexify

[31mERROR: Could not find a version that satisfies the requirement latexify (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for latexify[0m[31m
[0m

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.ensemble import RandomForestClassifier
from IPython.display import Image

# To plot trees in forest via graphviz
from sklearn.tree import export_graphviz
import graphviz

try:
    from latexify import latexify, format_axes
    latexify(columns=2)
except:
    pass

%matplotlib inline
%config InlineBackend.figure_format='retina'

In [None]:
X = X.cpu()
Y = Y.cpu()

In [None]:
# # Divide dataset into X and y
# X, y = iris.iloc[:, :-1], iris.iloc[:, -1]
rf = RandomForestClassifier(n_estimators=10,random_state=0, criterion='entropy', bootstrap=True)
rf.fit(X,Y)

In [None]:
# Assuming new_images is your new data that's already loaded and preprocessed

# Convert to tensor and reshape
new_images_tensor = torch.tensor(test_images).to(device)
new_flattened_images = [img.view(-1) for img in new_images_tensor]
new_X = torch.stack(new_flattened_images)
new_X = new_X.view(-1, 784)
# Convert to NumPy for scikit-learn if it's not already
new_X_np = new_X.cpu().numpy()

# Predict the labels
predicted_labels_rf = rf.predict(new_X_np)

# Now, predicted_labels contains the predicted labels for your new images
predicted_labels_rf

array([7, 2, 1, ..., 4, 5, 6], dtype=uint8)

In [None]:
# Predict the labels
predicted_labels_rf_train = rf.predict(X.cpu())

# Now, predicted_labels contains the predicted labels for your new images
predicted_labels_rf_train

array([5, 0, 4, ..., 5, 6, 8], dtype=uint8)

Using Logistic Regression

In [None]:
!pip install latexify

[31mERROR: Could not find a version that satisfies the requirement latexify (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for latexify[0m[31m
[0m

In [None]:
import numpy as np
import sklearn
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
# from latexify import *
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [None]:
class MultiClassLogisticRegression(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(MultiClassLogisticRegression, self).__init__()
        self.linear = nn.Linear(input_dim, num_classes)

    def forward(self, x):
        logits = self.linear(x)
        return logits

In [None]:
log_reg = MultiClassLogisticRegression(784,10)

In [None]:
X = X.float()

In [None]:
log_reg(X).shape

torch.Size([60000, 10])

In [None]:
log_reg(X)

tensor([[ 17.2993, -87.9405, -27.1262,  ...,  15.0434,  59.2862, -52.1852],
        [ 21.8101, -28.3861,  -9.1615,  ..., -31.0196,  45.9597, -28.0943],
        [-17.7398, -33.0451, -35.7391,  ...,  23.3972,  74.3131, -23.6910],
        ...,
        [ 45.6391,  -8.3765, -84.7892,  ..., -47.1744,  26.6075,  17.2581],
        [ -6.5541,   3.4983, -46.9361,  ...,   9.8314,  13.4657,  -0.2126],
        [-13.3707, -40.9609, -64.4137,  ...,   4.2377,  89.3492,  14.8161]],
       grad_fn=<AddmmBackward0>)

In [None]:
opt = torch.optim.Adam(log_reg.parameters(), lr=0.01)

converged = False
prev_loss = 1e8

i = 0
while not converged:
    opt.zero_grad()
    logits = log_reg(X)
    loss = F.cross_entropy(logits, Y)
    loss.backward()
    opt.step()
    if i%10000==0:
        print(i, loss.item())
    if np.abs(prev_loss - loss.item()) < 1e-5:
        converged = True
    prev_loss = loss.item()
    i = i + 1

0 14.337325096130371


To show predicted values

In [None]:
# Accuracy
pred = F.softmax(log_reg(X.float().cpu()), dim=-1).detach().numpy()
y_pred_train_logreg = pred.argmax(axis=-1)
# (y_pred == Y).mean()

In [None]:

X_test = torch.tensor(test_images).float()
X_test = X_test.view(-1, 784)  # Adjust shape as necessary
# Assuming test labels are for evaluation of performance and not required for just predictions
#Y_test = torch.tensor(test_labels).to(device)

# Pass test images through the model to get logits
logits = log_reg(X_test)

# Apply softmax to obtain probabilities
probabilities = F.softmax(logits, dim=1)

# Get predicted classes (class with highest probability)
predicted_classes_logreg = torch.argmax(probabilities, dim=1)

# Print predicted classes
print("Predicted classes:", predicted_classes_logreg)


Predicted classes: tensor([7, 2, 1,  ..., 4, 5, 6])


In [None]:
predicted_labels_logreg_train


In [None]:
from sklearn.metrics import f1_score, confusion_matrix

F1 Score and Confusion Matrix of MLP

In [None]:
#For Training Data

# Example true labels and predicted labels (replace with your own data)
true_labels = Y.cpu().numpy()
predicted_labels = predicted_mlp_train.cpu().numpy()

# Calculate F1-score
f1_mlp_train = f1_score(true_labels, predicted_labels,average='weighted')

# Create confusion matrix
cm_mlp_train = confusion_matrix(true_labels, predicted_labels)

print(f"F1-score: {f1_mlp_train:.4f}")
print("Confusion Matrix:")
print(cm_mlp_train)

F1-score: 0.8386
Confusion Matrix:
[[5225    2   89  109   20  180  248    9   30   11]
 [   9 6366  138    8    9   12   24   13  125   38]
 [  99  109 4742  433   70   17   82  141  222   43]
 [  76   43  948 4193    8  268   37  159  351   48]
 [  16   12    8    8 5257   70   75  195   40  161]
 [ 133    8   75  360   79 4206  151   16  294   99]
 [  47    9   37   10   68  213 5441    5   86    2]
 [  10   61   86    8  142    2    8 5646   10  292]
 [  19  384  147  159   68  300   46   30 4645   53]
 [  31  104    7   61  717   70    2  225   78 4654]]


In [None]:
# For Test Data

# Example true labels and predicted labels (replace with your own data)
true_labels = Y_test.cpu().numpy()
predicted_labels = predicted_mlp_test.cpu().numpy()

# Calculate F1-score
f1_mlp_test = f1_score(true_labels, predicted_labels,average='weighted')

# Create confusion matrix
cm_mlp_test = confusion_matrix(true_labels, predicted_labels)

print(f"F1-score: {f1_mlp_test:.4f}")
print("Confusion Matrix:")
print(cm_mlp_test)

F1-score: 0.8281
Confusion Matrix:
[[ 875    0   11   13    3   18   53    3    2    2]
 [   3 1091   13    2    1    1    3    2   15    4]
 [  20    9  830   77    9    2   10   24   38   13]
 [  18    4  151  697    1   46    4   23   58    8]
 [   2    2    3    1  880   11   13   34    9   27]
 [  23    3   13   85   22  646   17    7   57   19]
 [   8    3    2    3    9   38  880    3   11    1]
 [   2   15   31    6   23    0    2  898    1   50]
 [   7   53   23   32   28   58    8   13  744    8]
 [   6   32    1   13  148   14    1   31    9  754]]


F1 Score and Confusion Matrix of RF

In [None]:
# For train Data

# Example true labels and predicted labels (replace with your own data)
true_labels = Y.cpu().numpy()
predicted_labels = predicted_labels_rf_train

# Calculate F1-score
f1_rf_train = f1_score(true_labels, predicted_labels,average='weighted')

# Create confusion matrix
cm_rf_train = confusion_matrix(true_labels, predicted_labels)

print(f"F1-score: {f1_rf_train:.4f}")
print("Confusion Matrix:")
print(cm_rf_train)

F1-score: 0.9992
Confusion Matrix:
[[5923    0    0    0    0    0    0    0    0    0]
 [   0 6741    0    0    0    0    0    1    0    0]
 [   0    0 5956    1    1    0    0    0    0    0]
 [   0    0    2 6122    0    1    0    3    2    1]
 [   0    0    0    0 5840    0    0    0    0    2]
 [   1    1    0    4    0 5414    1    0    0    0]
 [   3    1    0    0    0    1 5913    0    0    0]
 [   0    0    3    0    0    0    0 6261    0    1]
 [   0    1    0    1    0    1    2    1 5845    0]
 [   0    0    1    3    3    3    0    1    3 5935]]


In [None]:
# For Test Data

# Example true labels and predicted labels (replace with your own data)
true_labels = Y_test.cpu().numpy()
predicted_labels = predicted_labels_rf

# Calculate F1-score
f1_rf_test = f1_score(true_labels, predicted_labels,average='weighted')

# Create confusion matrix
cm_rf_test = confusion_matrix(true_labels, predicted_labels)

print(f"F1-score: {f1_rf_test:.4f}")
print("Confusion Matrix:")
print(cm_rf_test)

F1-score: 0.9522
Confusion Matrix:
[[ 968    1    1    0    0    2    5    1    1    1]
 [   0 1123    1    6    0    2    1    2    0    0]
 [   8    1  987    5    4    0    6   11   10    0]
 [   1    1   15  950    0   22    0   13    6    2]
 [   2    1    5    1  937    0    6    3    8   19]
 [   9    0    6   24    2  829    7    2   10    3]
 [  13    4    2    0    7    7  924    0    0    1]
 [   2    9   19    4    7    0    1  965    4   17]
 [   7    2    7   15    5   17    6    4  900   11]
 [   8    6    4   13   21    5    3    3    6  940]]


LOg reg

In [None]:
# For train Data

true_labels = Y.cpu().numpy()
predicted_labels = y_pred_train_logreg

# Calculate F1-score
f1_logreg_train = f1_score(true_labels, predicted_labels,average='weighted')

# Create confusion matrix
cm_logreg_train = confusion_matrix(true_labels, predicted_labels)

print(f"F1-score: {f1_logreg_train:.4f}")
print("Confusion Matrix:")
print(cm_logreg_train)

F1-score: 0.9288
Confusion Matrix:
[[5764    0   30    2    5   43   21    4   48    6]
 [   1 6576   51   14    8   16    4    7   56    9]
 [  26   36 5564   30   47   34   46   37  116   22]
 [  31   32  238 4998   10  388   14   63  285   72]
 [  13   16   40    6 5485   17   36   23   41  165]
 [  35   10   47   54   42 4965   45    9  174   40]
 [  30    9   60    4   34   65 5675    0   40    1]
 [   8   13   59   11   53    9    2 5859   27  224]
 [  31   60  108   48   18  158   30   15 5325   58]
 [  13   13   17   19  141   49    1  111   66 5519]]


In [None]:
# For Test Data

# Example true labels and predicted labels (replace with your own data)
true_labels = Y_test.cpu().numpy()
predicted_labels = predicted_classes_logreg

# Calculate F1-score
f1_logreg_test = f1_score(true_labels, predicted_labels,average='weighted')

# Create confusion matrix
cm_logreg_test = confusion_matrix(true_labels, predicted_labels)

print(f"F1-score: {f1_logreg_test:.4f}")
print("Confusion Matrix:")
print(cm_logreg_test)

F1-score: 0.8986
Confusion Matrix:
[[ 943    0    6    1    3   11    7    2    7    0]
 [   0 1095   11    3    1    2    3    3   16    1]
 [   7   13  903   17   10   10   19    8   39    6]
 [   7    5   43  817    5   62    3   13   43   12]
 [   1    4   12    4  900    2    7   14    9   29]
 [  11    3    8   17    7  770   19    5   39   13]
 [  10    3   15    4    6   21  894    0    5    0]
 [   1    6   27    9   11    2    0  922    7   43]
 [  10   17   13   16    9   31   14   10  841   13]
 [   3    7    3    6   27   10    1   31   19  902]]


Let us assume your MLP has 30 neurons in first layer, 20 in second layer and then 10 finally for the output layer (corresponding to 10 classes). On the trained MLP, plot the t-SNE for the output from the layer containing 20 neurons for the 10 digits. Contrast this with the t-SNE for the same layer but for an untrained model. What do you conclude?

Now, use the trained MLP to predict on the Fashion-MNIST dataset. What do you observe? How do the embeddings (t-SNE viz for the second layer compare for MNIST and Fashion-MNIST images)

In [264]:
import torch
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset

# Load the CSV files
train_df = pd.read_csv('/content/fashion-mnist_train.csv')
test_df = pd.read_csv('/content/fashion-mnist_test.csv')

# Separate features and labels
X_train, y_train = train_df.drop('label', axis=1).values, train_df['label'].values
X_test, y_test = test_df.drop('label', axis=1).values, test_df['label'].values

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Create PyTorch DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)


In [265]:
X_train_tensor.shape

torch.Size([60000, 784])

In [266]:
X_test_tensor.shape

torch.Size([10000, 784])

In [267]:
X_train_f = X_train_tensor.to(device)

In [268]:
flattened_images = [img.view(-1) for img in X_train_f]
# Stack all flattened images into a single matrix
X = torch.stack(flattened_images)
# shape (60000, 784)
X = X.view(-1, 784)
X_train_f = X
print(X_train_f.shape)

torch.Size([60000, 784])


In [269]:
y_train_f = y_train_tensor.to(device)

In [270]:
y_train_f.shape

torch.Size([60000])

In [271]:
X_train_f = X_train_f.float()
y_train_f = y_train_f.long()

In [272]:
# Assume train_images and train_labels are loaded similarly to your training data
# Processing test data

with torch.no_grad():  # No need to track gradients
    outputs = model(X_train_f)
    _, predicted_mlp_train_f = torch.max(outputs.data, 1)
predicted_mlp_train_f

tensor([1, 2, 6,  ..., 9, 1, 1], device='cuda:0')

In [273]:
y_train_f

tensor([2, 9, 6,  ..., 8, 8, 7], device='cuda:0')

In [275]:
X_test_tensor.shape

torch.Size([10000, 784])

In [276]:
X_test_f = X_test_tensor.to(device)

In [277]:
X_test_f.shape

torch.Size([10000, 784])

In [278]:
flattened_images = [img.view(-1) for img in X_test_f]
# Stack all flattened images into a single matrix
X = torch.stack(flattened_images)
# shape (60000, 784)
X = X.view(-1, 784)
X_test_f = X
print(X_test_f.shape)


torch.Size([10000, 784])


In [279]:
with torch.no_grad():  # No need to track gradients
    outputs = model(X_test_f.float())
    _, predicted_mlp_test_f = torch.max(outputs.data, 1)
predicted_mlp_test_f

tensor([6, 6, 6,  ..., 2, 6, 6], device='cuda:0')

In [280]:
predicted_mlp_test_f.shape

torch.Size([10000])

In [284]:
y_test_f = y_test_tensor
y_test_f = y_test_f.to(device)

In [285]:
y_test_f.shape

torch.Size([10000])

In [286]:
y_test_f

tensor([0, 1, 2,  ..., 8, 8, 1], device='cuda:0')

Confusion matrix for Fashion MNIST

In [255]:
# For train Data

true_labels = y_train_f.cpu().numpy()
predicted_labels = predicted_mlp_train_f.cpu()

# Calculate F1-score
f1_mlp_train_f = f1_score(true_labels, predicted_labels,average='weighted')

# Create confusion matrix
cm_mlp_train_f = confusion_matrix(true_labels, predicted_labels)

print(f"F1-score: {f1_mlp_train_f:.4f}")
print("Confusion Matrix:")
print(cm_mlp_train_f)

F1-score: 0.0987
Confusion Matrix:
[[602 704 617 557 541 517 626 616 612 608]
 [585 711 622 574 564 488 601 655 588 612]
 [595 691 620 604 562 515 588 621 590 614]
 [584 734 600 562 555 546 618 655 547 599]
 [565 734 639 567 580 474 613 656 559 613]
 [602 736 595 548 605 477 591 660 551 635]
 [591 687 627 562 553 464 637 662 567 650]
 [646 736 588 606 569 480 578 607 571 619]
 [551 723 628 614 581 457 585 700 536 625]
 [629 748 601 574 566 493 597 612 574 606]]


In [287]:
# For test Data

true_labels = y_test_f.cpu().numpy()
predicted_labels = predicted_mlp_test_f.cpu()

# Calculate F1-score
f1_mlp_test_f = f1_score(true_labels, predicted_labels,average='weighted')

# Create confusion matrix
cm_mlp_test_f = confusion_matrix(true_labels, predicted_labels)

print(f"F1-score: {f1_mlp_test_f:.4f}")
print("Confusion Matrix:")
print(cm_mlp_test_f)

F1-score: 0.0391
Confusion Matrix:
[[  1  38  73   1   0   0 867   3   1  16]
 [  0  68  85   0   0   0 847   0   0   0]
 [  2  22  17   0   0   8 949   1   0   1]
 [  0  48 176   0   0   1 760   4   0  11]
 [  0  26  46   5   0  29 894   0   0   0]
 [  2 314 491  40   2   0  89  32  27   3]
 [  1  62  53   3   0   6 868   0   1   6]
 [  0 681 309   1   0   0   0   8   1   0]
 [  3 359 206  87   2  17 220  11  27  68]
 [  0 216 602  92   0   0  85   0   5   0]]
