In [1]:
# build a simple CNN to classify geophysics dataset

import numpy as np
import os
import rasterio
import geopandas as gpd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import pandas as pd

from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical

import h5py
from datetime import datetime

In [2]:
# load dataser
db = h5py.File('deposits_classes.hdf5', 'r')

In [3]:
db.keys()

<KeysViewHDF5 ['features', 'label', 'stamp_id']>

In [4]:
# unpack X, y from the dataset
X_ , y = db['features'], db['label']

In [5]:
# factorizing labels to integers
factor = pd.factorize(y)
y = factor[0]
definitions = factor[1]
print(definitions)
y

['Cu' 'other' 'Au' 'PGE' 'Fe' 'Pb']


array([0, 0, 1, ..., 0, 1, 0])

In [6]:
X = np.zeros(X_.shape)

In [7]:
# assign values to a np array
for i in range(len(X_)):
    X[i,:,:] = X_[i,:,:]

In [8]:
type(X)

numpy.ndarray

In [9]:
# scale image features 0-1
min_max_scaler = MinMaxScaler()

for i in range(len(X)):
    X[i,:,:] = min_max_scaler.fit_transform(X[i,:,:]) 

In [10]:
# split train test
trainX, testX, trainY, testY = train_test_split(X, y,
                                                  test_size=0.25, random_state=42)

In [11]:
# reshape for cnn ingest and also chenge datatype to float
trainX = trainX.reshape((len(trainX), 50, 50, 1))
trainX = trainX.astype('float32')
testX = testX.reshape((len(testX), 50, 50, 1))
testX = testX.astype('float32')


trainY = to_categorical(trainY)
trainY = trainY.astype('float32')

testY = to_categorical(testY)
testY = testY.astype('float32')

In [12]:
print(trainX.shape,trainX.dtype)
print(trainY.shape,trainY.dtype)
print(testX.shape,testX.dtype)
print(testY.shape,testX.dtype)

(2340, 50, 50, 1) float32
(2340, 6) float32
(781, 50, 50, 1) float32
(781, 6) float32


In [13]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(50, 50, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(6, activation='softmax'))

In [14]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 48, 48, 32)        320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 24, 24, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 22, 22, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 11, 11, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 9, 9, 64)          36928     
_________________________________________________________________
flatten (Flatten)            (None, 5184)              0         
_________________________________________________________________
dense (Dense)                (None, 64)                3

In [15]:
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [16]:
model.fit(trainX, trainY, epochs=30, batch_size=64)

Train on 2340 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x1a40470c50>

In [17]:
test_loss, test_acc = model.evaluate(testX, testY)



In [18]:
test_predictions = model.predict_classes(testX)

In [19]:
test_predictions

array([2, 2, 0, 0, 2, 2, 0, 2, 2, 4, 4, 2, 0, 2, 2, 1, 2, 0, 2, 0, 0, 2,
       0, 2, 2, 1, 4, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 0, 3, 2,
       2, 1, 2, 0, 2, 2, 2, 0, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 2, 0, 2, 0,
       0, 2, 0, 0, 2, 0, 2, 0, 0, 1, 0, 2, 0, 3, 2, 2, 2, 2, 2, 0, 0, 0,
       2, 4, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 2, 3, 0, 4, 1, 2, 0, 0, 2, 0,
       2, 0, 0, 0, 4, 0, 2, 0, 0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 4, 2, 0, 1,
       2, 4, 0, 2, 2, 0, 3, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 0, 0, 2, 0,
       2, 0, 1, 2, 0, 0, 2, 4, 0, 1, 4, 0, 2, 2, 0, 2, 4, 4, 0, 2, 2, 2,
       2, 0, 4, 0, 2, 2, 2, 1, 2, 0, 0, 2, 4, 2, 3, 1, 4, 0, 0, 0, 0, 4,
       0, 0, 0, 2, 2, 0, 4, 0, 4, 0, 2, 2, 2, 0, 3, 2, 0, 3, 0, 0, 2, 0,
       0, 2, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1, 2, 0, 0, 0, 1,
       0, 0, 2, 0, 2, 0, 0, 0, 0, 4, 0, 2, 0, 2, 4, 1, 2, 0, 2, 0, 1, 0,
       0, 0, 2, 0, 2, 0, 0, 1, 4, 0, 0, 2, 4, 4, 0, 2, 0, 2, 0, 0, 0, 1,
       0, 3, 0, 2, 0, 0, 2, 0, 0, 0, 0, 1, 2, 0, 0,

In [20]:
definitions

array(['Cu', 'other', 'Au', 'PGE', 'Fe', 'Pb'], dtype=object)

In [None]:
db.close