In [99]:
# build a simple CNN to classify geophysics dataset

import numpy as np
import os
import rasterio
import geopandas as gpd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import pandas as pd

from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical

In [115]:
# set path of the train dataset
trainpath = '/Users/pedrojunqueira/Desktop/train'

In [116]:
# create a list with the id of the stamps
trainids = os.listdir(trainpath)

In [118]:
# check len - should be 1863
len(trainids)

1863

In [119]:
# import stamp geodataframe path
stamps_data = '../data/stamp_locations.geo.json'

In [120]:
# create geodataframe objects
stamps = gpd.read_file(stamps_data)

In [121]:
# create a dictionary for each stamp id the label of the commodity
id_label = dict()

for i, label in zip(stamps['id'].tolist(),stamps['stratification_label'].tolist()):
    id_label[i] = label
    

In [122]:
# fixing stamp that does not have trainset
id_train = dict()

for item in trainids:
    label = id_label.get(item)
    if label == None:
        id_train[item] = 'none'
    else:
        id_train[item] = label

In [10]:
# create dataset

geophysics_image = 'geophysics/gravity/isostatic_residual_gravity_anomaly.tif'

data = []
labels = []

for i, stampid in enumerate(trainids):
    image_path = os.path.join(trainpath,stampid,geophysics_image)
    label = id_train[stampid]
    try:
        with rasterio.open(image_path, 'r') as src:
            image_data = src.read(1)
            data.append(image_data)
            labels.append(label)
    except Exception as err:
        print(err)
        continue

    if i%500 == 0:
            print(f'{i}/{len(trainids)} images processed')
print(f'{i+1}/{len(trainids)} images processed')

dataset = (np.array(data), np.array(labels))


0/1863 images processed
500/1863 images processed
1000/1863 images processed
1500/1863 images processed
1863/1863 images processed


In [123]:
# unpack X, y from the dataset
X, y = dataset

In [124]:
# factorizing labels to integers
factor = pd.factorize(y)
y = factor[0]
definitions = factor[1]
print(definitions)
y

['Cu' 'other' 'Au' 'none' 'PGE' 'Fe' 'Pb']


array([0, 1, 2, ..., 0, 3, 3])

In [125]:
# scale image features 0-1
min_max_scaler = MinMaxScaler()

for i in range(len(X)):
    X[i,:,:] = min_max_scaler.fit_transform(X[i,:,:]) 

In [126]:
# split train test
trainX, testX, trainY, testY = train_test_split(X, y,
                                                  test_size=0.25, random_state=42)

In [127]:
# reshape for cnn ingest and also chenge datatype to float
trainX = trainX.reshape((len(trainX), 500, 500, 1))
trainX = trainX.astype('float32')
testX = testX.reshape((len(testX), 500, 500, 1))
testX = testX.astype('float32')


trainY = to_categorical(trainY)
trainY = trainY.astype('float32')

testY = to_categorical(testY)
testY = testY.astype('float32')

In [129]:
print(trainX.shape,trainX.dtype)
print(trainY.shape,trainY.dtype)
print(testX.shape,testX.dtype)
print(testY.shape,testX.dtype)

(1397, 500, 500, 1) float32
(1397, 7) float32
(466, 500, 500, 1) float32
(466, 7) float32


In [130]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(500, 500, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(7, activation='softmax'))

In [131]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_9 (Conv2D)            (None, 498, 498, 32)      320       
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 249, 249, 32)      0         
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 247, 247, 64)      18496     
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 123, 123, 64)      0         
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 121, 121, 64)      36928     
_________________________________________________________________
flatten_3 (Flatten)          (None, 937024)            0         
_________________________________________________________________
dense_6 (Dense)              (None, 64)               

In [132]:
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [133]:
model.fit(trainX, trainY, epochs=5, batch_size=64)

Train on 1397 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x1b1a6a9490>

In [134]:
test_loss, test_acc = model.evaluate(testX, testY)



In [135]:
test_predictions = model.predict_classes(testX)

In [136]:
test_predictions

array([3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 2, 3, 0, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3,
       3, 3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 0, 3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 2, 3, 3, 1, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 1, 3, 3, 3, 3, 3, 0, 3, 3, 3, 2, 2, 3, 3,
       3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 1, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 1, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3,
       3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3,

In [137]:
model.save('saved_model/simple_cnn') 

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: saved_model/simple_cnn/assets
