In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Import Libraries

In [None]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

import h5py
import random
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import plotly.graph_objs as go
import plotly.express as ge

import tqdm
import io

import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras import layers, models, optimizers, losses
from tensorflow.keras.utils import to_categorical, plot_model
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping

from mlxtend.plotting import plot_confusion_matrix
from sklearn.metrics import roc_curve, auc, classification_report, confusion_matrix
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# Load Data

In [None]:
with h5py.File("/kaggle/input/3d-mnist/full_dataset_vectors.h5", "r") as df:
    X_train, X_test = df["X_train"][:], df["X_test"][:]
    y_train, y_test = df["y_train"][:], df["y_test"][:]

In [None]:
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print()
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

# Visualization

In [None]:
with h5py.File("/kaggle/input/3d-mnist/train_point_clouds.h5", "r") as points:
    digits = []
    for i in range(25):
        digit = (points[str(i)]["img"][:], points[str(i)]["points"][:], points[str(i)].attrs["label"])
        digits.append(digit)

In [None]:
plot_idx = random.randint(0, 26)
x = [digit[0] for digit in digits[plot_idx][1]]
y = [digit[1] for digit in digits[plot_idx][1]]
z = [digit[2] for digit in digits[plot_idx][1]]

fig = go.Figure(data = go.Scatter3d(
    x=x, y=y, z=z, 
    mode='markers', 
    marker=dict(size=20, color=z, colorscale='Gray')), 
    layout=go.Layout(
        height=800, 
        width=800, 
        title= "Digit: " + str(digits[plot_idx][2]))
    )

fig.show()

# Preprocess

In [None]:
scalar_map = plt.cm.ScalarMappable(cmap = "Oranges")

In [None]:
X_train_new = np.ndarray((X_train.shape[0], 4096, 3))

for i in range(X_train.shape[0]):
    X_train_new[i] = scalar_map.to_rgba(X_train[i])[:, :-1]

In [None]:
X_test_new = np.ndarray((X_test.shape[0], 4096, 3))

for i in range(X_test.shape[0]):
    X_test_new[i] = scalar_map.to_rgba(X_test[i])[:, :-1]

In [None]:
X_train_new = X_train_new.reshape(X_train.shape[0], 16, 16, 16, 3)
X_test_new = X_test_new.reshape(X_test.shape[0], 16, 16, 16, 3)

In [None]:
y_train_new = to_categorical(y_train, 10)
y_test_new = to_categorical(y_test, 10)

# Model

In [None]:
input_layer = layers.Input((16, 16, 16, 3))

conv1 = layers.Conv3D(filters=32, kernel_size=3, activation="relu")(input_layer)
conv2 = layers.Conv3D(filters=64, kernel_size=3, activation="relu")(conv1)
maxp1 = layers.MaxPool3D(pool_size=2)(conv2)
batchnorm1 = layers.BatchNormalization()(maxp1)

conv3 = layers.Conv3D(filters=128, kernel_size=3, activation="relu")(batchnorm1)
conv4 = layers.Conv3D(filters=256, kernel_size=3, activation="relu")(conv3)
maxp2 = layers.MaxPool3D(pool_size=2)(conv4)
batchnorm2 = layers.BatchNormalization()(maxp2)

flat1 = layers.Flatten()(batchnorm2)
dense1 = layers.Dense(units=512, activation="relu")(flat1)
drop1 = layers.Dropout(0.2)(dense1)
dense2 = layers.Dense(units=256, activation="relu")(drop1)
drop2 = layers.Dropout(0.2)(dense2)

output_layer = layers.Dense(units=10, activation="softmax")(drop2)

model = models.Model(input_layer, output_layer)

In [None]:
model.compile(loss="categorical_crossentropy", optimizer=optimizers.Adam(learning_rate=0.0001), metrics=['accuracy'])

In [None]:
plot_model(model, show_layer_names=True, show_shapes=True)

# Train

In [None]:
history = model.fit(X_train_new, y_train_new, validation_split=0.1, batch_size=128, epochs=50)

In [None]:
test_loss, test_acc = model.evaluate(X_test_new, y_test_new, verbose=0)
print(f"Test Loss: {test_loss:.5f}")
print(f"Test Accuracy: {test_acc:.5f}")

# Results

In [None]:
history_df = pd.DataFrame(history.history)
history_df.head()

In [None]:
plt.figure()
plt.plot(history.history["loss"])
plt.plot(history.history["val_loss"])
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend(["train", "valid"])
plt.title("Loss Curve")
plt.show()

In [None]:
plt.figure()
plt.plot(history.history["accuracy"])
plt.plot(history.history["val_accuracy"])
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend(["train", "valid"])
plt.title("Accuracy Curve")
plt.show()

# Test

In [None]:
preds = model.predict(X_test_new, verbose=0)
model_predictions = np.argmax(preds, axis=1)
model_predictions

In [None]:
actual_labels = np.argmax(y_test_new, axis=1)
actual_labels

In [None]:
model_precision_score = precision_score(actual_labels, model_predictions, average="weighted")
model_f1_score = f1_score(actual_labels, model_predictions, average="weighted")
model_recall_score = recall_score(actual_labels, model_predictions, average="weighted")
model_accuracy_score = accuracy_score(actual_labels, model_predictions)

print(f"Precision Score = {model_precision_score * 100:.2f}%")
print(f"F1 Score = {model_f1_score * 100:.2f}%")
print(f"Recall Score = {model_recall_score * 100:.2f}%")
print(f"Accuracy Score = {model_accuracy_score * 100:.2f}%")

In [None]:
print(classification_report(actual_labels, model_predictions))

In [None]:
cm = confusion_matrix(actual_labels, model_predictions)
fig, ax = plot_confusion_matrix(conf_mat=cm, show_absolute=True, show_normed=True, colorbar=True, class_names=np.arange(0, 10, 1), figsize=(10, 10))
plt.title('Confusion Matrix')
plt.show()