In [95]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   #if like me you do not have a lot of memory in your GPU
os.environ["CUDA_VISIBLE_DEVICES"] = "" #then these two lines force keras to use your CPU
import tensorflow as tf
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from glob import glob 
from tqdm import tqdm
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import h5py
import pickle

In [2]:
all_df = pd.read_csv('./data/train.csv')
all_points = h5py.File('./data/train.h5', 'r')

### Get voxel data

In [3]:
# id_list   train_df['ID'].values
# label_list   train_df['label'].values
# point_list   all_points

In [4]:
tmp = all_points['0']

In [5]:
def get_vector(points, x_y_z=[16, 16, 16]):
    # 3D Points -> [16,16,16]
    xyzmin = np.min(points, axis=0) - 0.001
    xyzmax = np.max(points, axis=0) + 0.001

    diff = max(xyzmax-xyzmin) - (xyzmax-xyzmin)
    xyzmin = xyzmin - diff / 2
    xyzmax = xyzmax + diff / 2

    segments = []
    shape = []

    for i in range(3):
        # note the +1 in num 
        if type(x_y_z[i]) is not int:
            raise TypeError("x_y_z[{}] must be int".format(i))
        s, step = np.linspace(xyzmin[i], xyzmax[i], num=(x_y_z[i] + 1), retstep=True)
        segments.append(s)
        shape.append(step)

    n_voxels = x_y_z[0] * x_y_z[1] * x_y_z[2]
    n_x = x_y_z[0]
    n_y = x_y_z[1]
    n_z = x_y_z[2]

    structure = np.zeros((len(points), 4), dtype=int)
    structure[:,0] = np.searchsorted(segments[0], points[:,0]) - 1
    structure[:,1] = np.searchsorted(segments[1], points[:,1]) - 1
    structure[:,2] = np.searchsorted(segments[2], points[:,2]) - 1

    # i = ((y * n_x) + x) + (z * (n_x * n_y))
    structure[:,3] = ((structure[:,1] * n_x) + structure[:,0]) + (structure[:,2] * (n_x * n_y)) 

    vector = np.zeros(n_voxels)
    count = np.bincount(structure[:,3])
    vector[:len(count)] = count

    vector = vector.reshape(n_z, n_y, n_x)
    return vector


In [6]:
train_data = []

for i in tqdm(range(len(all_points))):
    train_data.append(list(get_vector(all_points[str(i)])))

100%|██████████| 50000/50000 [21:53<00:00, 38.06it/s] 


In [18]:
train_data.reverse()

In [16]:
tmp = get_vector(all_points[str(1)])
type(tmp)

numpy.ndarray

In [97]:
# with open('data_pkl/train_data.pkl', 'wb') as f:
# 	pickle.dump(train_data, f)

### Get label

In [17]:
label_list = list(all_df['label'])

In [22]:
label_list_new = []

label_list.reverse()

for i in label_list:
    label_list_new.append(i)

In [23]:
BATCH_SIZE = 16
SHUFFLE_BUFFER_SIZE = 100
AUTOTUNE = tf.data.AUTOTUNE

train_tensorflow_data = tf.data.Dataset.from_tensor_slices((train_data, label_list))
train_tensorflow_data = train_tensorflow_data.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)

### Define Model and Fit 

In [28]:
model = Sequential([
    layers.Conv3D(8, (3,3,3), activation='relu', padding='same', input_shape=(16, 16, 16, 1)),
    layers.Conv3D(16, (3,3,3), activation='relu', padding='same'),
    layers.MaxPooling3D((2,2,2), padding='same'),
    
    layers.Conv3D(32, (3,3,3), activation='relu', padding='same'),
    layers.Conv3D(64, (3,3,3), activation='relu', padding='same'),
    layers.MaxPooling3D((2,2,2), padding='same'),
    
    layers.Conv3D(16, (3,3,3), activation='relu', padding='same'),
    layers.BatchNormalization(),
    layers.GlobalAveragePooling3D(),
    layers.Flatten(),
    
    layers.Dense(1024, activation='relu'),
    layers.Dropout(0.4),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.4),
    layers.Dense(10, activation='softmax'),
])

In [31]:
# Compile the model
model.compile(loss='sparse_categorical_crossentropy',
              optimizer=keras.optimizers.Adam(lr=0.001),
              metrics=['accuracy'])
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv3d_15 (Conv3D)          (None, 16, 16, 16, 8)     224       
                                                                 
 conv3d_16 (Conv3D)          (None, 16, 16, 16, 16)    3472      
                                                                 
 max_pooling3d_6 (MaxPooling  (None, 8, 8, 8, 16)      0         
 3D)                                                             
                                                                 
 conv3d_17 (Conv3D)          (None, 8, 8, 8, 32)       13856     
                                                                 
 conv3d_18 (Conv3D)          (None, 8, 8, 8, 64)       55360     
                                                                 
 max_pooling3d_7 (MaxPooling  (None, 4, 4, 4, 64)      0         
 3D)                                                  

In [32]:
model.fit(
    train_tensorflow_data, 
    batch_size=128, 
    epochs=1, 
)



<keras.callbacks.History at 0x1fba252b160>

### Submission

In [33]:
test_points = h5py.File('./data/test.h5', 'r')

In [42]:
submission_data = []

for i in tqdm(list(test_points.keys())):
    submission_data.append(list(get_vector(test_points[i])))

100%|██████████| 40000/40000 [30:20<00:00, 21.97it/s]


In [99]:
new_submission_data = []

for i in tqdm(submission_data):
    new_submission_data.append(tf.expand_dims(i, 0))

100%|██████████| 40000/40000 [03:02<00:00, 219.01it/s]


In [100]:
preds = model.predict(new_submission_data)

In [98]:
# with open('data_pkl/test_data.pkl', 'wb') as f:
# 	# pickle.dump(submission_data, f)

In [71]:
np.shape(submission_data[1])

(16, 16, 16)

In [77]:
test_data = tf.data.Dataset.from_tensor_slices((submission_data))

In [79]:
model.predict(tf.expand_dims(submission_data[1],0))



array([[0.00215394, 0.41577524, 0.01959763, 0.07384691, 0.03368898,
        0.02432891, 0.00125672, 0.21529493, 0.01028046, 0.20377629]],
      dtype=float32)

In [90]:
inf_result = []
class_names = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

for i in submission_data:
    preds = model.predict(tf.expand_dims(i, 0))
    score = tf.nn.softmax(preds[0])

    inf_result.append(class_names[np.argmax(score)])




In [93]:
submission = pd.read_csv('data\sample_submission.csv')
submission

Unnamed: 0,ID,label
0,50000,-1
1,50001,-1
2,50002,-1
3,50003,-1
4,50004,-1
...,...,...
39995,89995,-1
39996,89996,-1
39997,89997,-1
39998,89998,-1


In [94]:
submission = pd.read_csv('data\sample_submission.csv')
submission = submission.drop('label', 1)
submission['label'] = inf_result
submission.to_csv('subs\First_submission.csv', index=False)

  submission = submission.drop('label', 1)
