In [1]:
# build a simple CNN to classify geophysics dataset

import numpy as np
import os
import rasterio
import geopandas as gpd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import pandas as pd

from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical

import h5py
from datetime import datetime

In [4]:
# load dataser
db = h5py.File('all_stamps_classes.hdf5', 'r')

In [5]:
db.keys()

<KeysViewHDF5 ['features', 'label', 'stamp_id']>

In [6]:
# unpack X, y from the dataset
X_ , y = db['features'], db['label']

In [7]:
# factorizing labels to integers
factor = pd.factorize(y)
y = factor[0]
definitions = factor[1]
print(definitions)
y

['none' 'Cu' 'other' 'Au' 'PGE' 'Fe' 'Pb']


array([0, 0, 0, ..., 0, 0, 0])

In [8]:
X = np.zeros(X_.shape)

In [9]:
# assign values to a np array
for i in range(len(X_)):
    X[i,:,:] = X_[i,:,:]

In [10]:
type(X)

numpy.ndarray

In [12]:
# scale image features 0-1
min_max_scaler = MinMaxScaler()

for i in range(len(X)):
    X[i,:,:] = min_max_scaler.fit_transform(X[i,:,:]) 

In [13]:
# split train test
trainX, testX, trainY, testY = train_test_split(X, y,
                                                  test_size=0.25, random_state=42)

In [14]:
# reshape for cnn ingest and also chenge datatype to float
trainX = trainX.reshape((len(trainX), 50, 50, 1))
trainX = trainX.astype('float32')
testX = testX.reshape((len(testX), 50, 50, 1))
testX = testX.astype('float32')


trainY = to_categorical(trainY)
trainY = trainY.astype('float32')

testY = to_categorical(testY)
testY = testY.astype('float32')

In [15]:
print(trainX.shape,trainX.dtype)
print(trainY.shape,trainY.dtype)
print(testX.shape,testX.dtype)
print(testY.shape,testX.dtype)

(139725, 50, 50, 1) float32
(139725, 7) float32
(46575, 50, 50, 1) float32
(46575, 7) float32


In [20]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(50, 50, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.5)) #added dropout
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.5)) # added dropout
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(7, activation='softmax'))

In [21]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 48, 48, 32)        320       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 24, 24, 32)        0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 24, 24, 32)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 22, 22, 64)        18496     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 11, 11, 64)        0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 11, 11, 64)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 9, 9, 64)         

In [22]:
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [23]:
model.fit(trainX, trainY, epochs=3, batch_size=64)

Train on 139725 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x1a45081f50>

In [24]:
test_loss, test_acc = model.evaluate(testX, testY)



In [25]:
test_predictions = model.predict_classes(testX)

In [26]:
test_predictions

array([0, 0, 0, ..., 0, 0, 0])

In [27]:
definitions

array(['none', 'Cu', 'other', 'Au', 'PGE', 'Fe', 'Pb'], dtype=object)

In [28]:
model.save('saved_model/simple_cnn_small_stamps2') 

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: saved_model/simple_cnn_small_stamps2/assets


In [29]:
db.close()

In [30]:
# import test data

# load dataser
db_test = h5py.File('deposits_classes.hdf5', 'r')


In [31]:
db_test.keys()

<KeysViewHDF5 ['features', 'label', 'stamp_id']>

In [32]:
# unpack X, y from the dataset
X_test_ = db_test['features']

In [33]:
X_test = np.zeros(X_test_.shape)

In [34]:
# assign values to a np array
for i in range(len(X_test_)):
    X_test[i,:,:] = X_test_[i,:,:]

In [35]:
# scale image features 0-1
min_max_scaler = MinMaxScaler()

for i in range(len(X_test)):
    X_test[i,:,:] = min_max_scaler.fit_transform(X_test[i,:,:]) 

In [36]:
X_test.shape

(3121, 50, 50)

In [37]:
X_test = X_test.reshape((len(X_test), 50, 50, 1))
X_test = X_test.astype('float32')

In [38]:
submission_test_pred = model.predict_classes(X_test)

In [39]:
submission_test_pred

array([0, 0, 0, ..., 0, 0, 0])

In [40]:
label_map = {i:label for i, label in enumerate(definitions)}


In [41]:
predicted_labels = [label_map[i] for i in submission_test_pred]

In [42]:
pd.Series(predicted_labels).value_counts()

none    3121
dtype: int64

In [43]:
db_test.close()