In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from spectral import *
import spectral.io.envi as envi

### Data Check

In [2]:
imageUnsup=pd.read_pickle("./MinMapKmeans.pkl")
imageUnsup.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,222,223,224,225,226,227,228,229,minerals,mineralogy
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,Illite HX
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,Illite HX
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,Illite HX
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,Illite HX
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,Illite HX


In [6]:
# imageNpy=imageUnsup.to_numpy()
# groundTruth=imageNpy.reshape(285, 200, 231)
# gt=groundTruth[:, :, 230]
# plt.imshow(gt, cmap=plt.cm.get_cmap('viridis',4))
# plt.colorbar()



Spectral data shape: (57000, 230)


ValueError: cannot reshape array of size 13110000 into shape (336,170,230)

In [4]:
from tensorflow.keras.models import Sequential
from sklearn.preprocessing import minmax_scale
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Dropout, Flatten, Dense
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, confusion_matrix, classification_report
from tensorflow.keras.utils import to_categorical

In [None]:
X=imageUnsup.iloc[:, :-1]
y=to_categorical(imageUnsup.loc[:,'minerals'])
X_scaled=minmax_scale(X, axis = 0)

X_scaled.shape, y.shape

In [None]:
X=imageUnsup.iloc[:, :-1]
y=to_categorical(imageUnsup.loc[:,'minerals'])

# Split data 
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, stratify=y,test_size=0.30)
print(f"X_train: {X_train.shape}\ny_train: {y_train.shape}\nX_test: {X_test.shape}\ny_test: {y_test.shape}") 


In [None]:
X_train = X_train.reshape(-1, 230, 1)
X_test = X_test.reshape(-1, 230, 1)

In [None]:
model = Sequential(name = 'MinMap_CNN')

model.add(Conv1D(filters = 64, kernel_size = 3, activation ='relu', input_shape =(230,1), name = 'Layer1'))
model.add(Conv1D(filters = 64, kernel_size = 3, activation ='relu' , name = 'Layer2'))
model.add(Conv1D(filters = 64, kernel_size = 3, activation ='relu' , name = 'Layer3'))

model.add(MaxPooling1D(pool_size = 2, name = 'MaxPooling_Layer1'))
model.add(Dropout(0.4, name = 'Dropout1'))

model.add(Flatten(name = 'Flatten'))

model.add(Dense(25, activation='relu', name = 'DenseLayer'))

model.add(Dense(y_train.shape[1], activation='softmax', name = 'OutputLayer'))


model.summary()

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=256, epochs=100)


<tensorflow.python.keras.callbacks.History at 0x1caf301e788>

In [None]:
score = model.evaluate(X_test, y_test, batch_size=256)

In this case, the result seems too optimistic. It is simply because of the ground truth used in this case is a clustering classification result, which done earlier in this process without further detail validation. Our classes are only rely on the data-based analysis which resulting a very optimistic result.

In the actual case, this kind of result might be caused by the overfit. Below is the introduction of early stopping technique to reduce the overfitting result. 

In [None]:
#introduce early stopping
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

earlyStop = EarlyStopping(monitor = 'val_loss',
                            mode = 'min',
                            min_delta = 0,
                            patience = 10,
                            restore_best_weights = True)

checkPoint = ModelCheckpoint(filepath = 'minmap.h5', 
                             monitor = 'val_loss', 
                             mode ='min', 
                             save_best_only = True)


In [None]:
hist = model.fit(X_train, 
                       y_train, 
                       epochs = 100, 
                       batch_size = 256 , 
                       validation_data = (X_test, y_test), 
                       callbacks=[earlyStop,
                                  checkPoint])

In [None]:
loss, acc = model.evaluate(X_test, y_test)
print(f"Accuracy: {acc*100}\nLoss: {loss}")

In [None]:
from tensorflow.keras.models import load_model

model = load_model("minmap.h5")

In [None]:
pred = np.argmax(model.predict(X_test), axis=1)

In [None]:
print(y_test.shape)
print(pred)

In [None]:
plt.figure(figsize = (20,10))
confmat=confusion_matrix(np.add(pred, 1), np.add(np.argmax(y_test, 1), 1))
import seaborn as sns
sns.heatmap(confmat, annot=True)

In [None]:
print(classification_report(pred, np.argmax(y_test, 1)))

In [None]:
a76_ref = envi.open('76a_101012-114750_refl_ss.hdr', '76a_101012-114750_refl_ss.dat')
a76_data = np.array(a76_ref.load())
print (a76_data.shape) # wavelength1043-2486 nm in 230 bands
imshow(a76_data, (0, 200, 220))

In [None]:
a76_pixels=a76_data.reshape((62730, 230))
a76_scaled=minmax_scale(a76_pixels, axis = 0)
print (a76_scaled.shape)
a76_scaled = a76_scaled.reshape(-1, 230, 1)

In [None]:
a76_pred = np.argmax(model.predict(a76_scaled), axis=1)

In [None]:
a76=pd.DataFrame(a76_pixels)
a76['minpred']=a76_pred
a76.describe()

In [None]:
a76npy=a76.to_numpy()
a76_classified=a76npy.reshape(255, 246, 231)
MinMapCNN=a76_classified[:, :, 230]

In [None]:
plt.imshow(MinMapCNN, cmap=plt.cm.get_cmap('viridis',4))
plt.colorbar()

# CATBoost Model

In [9]:
from xgboost import XGBRegressor
def rmse(actual, pred):
    return np.sqrt(mean_squared_error(actual, pred))


model = XGBRegressor()
model.fit(X_train, y_train)

# Train performance
pred_train = model.predict(X_train)
print('RMSE train: ', rmse(y_train, pred_train))

# Test performance 
pred_test = model.predict(X_test)
print('RMSE test:', rmse(y_test, pred_test))

ModuleNotFoundError: No module named 'xgboost'

## Load New data

In [8]:
data_ref = envi.open('76a_101012-114750_refl_ss.hdr', '76a_101012-114750_refl_ss.dat')
data = np.array(data_ref.load())
print (data.shape) # wavelength1043-2486 nm in 230 bands

FileNotFoundError: Unable to locate file "76a_101012-114750_refl_ss.hdr". If the file exists, use its full path or place its directory in the SPECTRAL_DATA environment variable.

In [None]:
head=['0  ', '1  ', '2  ', '3  ', '4  ', '5  ', '6  ', '7  ', '8  ', '9  ', '10 ', '11 ', '12 ', '13 ', '14 ', '15 ', '16 ', '17 ', '18 ', '19 ', '20 ', '21 ', '22 ', '23 ', '24 ', '25 ', '26 ', '27 ', '28 ', '29 ', '30 ', '31 ', '32 ', '33 ', '34 ', '35 ', '36 ', '37 ', '38 ', '39 ', '40 ', '41 ', '42 ', '43 ', '44 ', '45 ', '46 ', '47 ', '48 ', '49 ', '50 ', '51 ', '52 ', '53 ', '54 ', '55 ', '56 ', '57 ', '58 ', '59 ', '60 ', '61 ', '62 ', '63 ', '64 ', '65 ', '66 ', '67 ', '68 ', '69 ', '70 ', '71 ', '72 ', '73 ', '74 ', '75 ', '76 ', '77 ', '78 ', '79 ', '80 ', '81 ', '82 ', '83 ', '84 ', '85 ', '86 ', '87 ', '88 ', '89 ', '90 ', '91 ', '92 ', '93 ', '94 ', '95 ', '96 ', '97 ', '98 ', '99 ', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229']
pixels = data.reshape(((data.shape[0]*data.shape[1]), 230))
imagenew=pd.DataFrame(pixels, columns=head)

In [7]:
imgMin=model.predict(imagenew)
imagenew['minerals']=imgMin.astype(int)

NameError: name 'model' is not defined

In [None]:
imagenpy=imagenew.to_numpy()
classified=imagenpy.reshape(255, 246, 231)
MinMapXGBoost=classified[:, :, 230]

In [None]:
plt.imshow(MinMapXGBoost, cmap=plt.cm.get_cmap('viridis',5))
plt.colorbar()