In [1]:
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense
import os
import pandas as pd

In [None]:
'''
this code traines a neural network model with reflectance data from MISR xxx level.
We train a classifier using our developed training dataset and we write out the model in the end.

inputs: 9 TOA reflectance red bands

output: 2 classes/cloudy and clear
'''

In [2]:
home = '/Users/ehsanmos/MLP_dataset/cloudmask_lab/training_dataset'


# # July-2016
# model_name = 'July'
# training_ds = 'trainingDS_cloudmask_july_2016_k_zero_9cams_HLconfClasses_forML.csv'


# April-2016
# model_name = 'April'
# training_ds = 'trainingDS_cloudmask_april_2016_k_zero_9cams_HLconfClasses_forML.csv'


# both April & July-2016
model_name = 'AprilJuly'
training_ds = 'trainingDS_cloudmask_apriljuly_2016_k_zero_9cams_HLconfClasses_forML.csv'


training_ds_fp = os.path.join(home, training_ds)


In [3]:
'''open with pandas as dataframe'''

in_df = pd.read_csv(training_ds_fp)

# in_df

In [4]:
'''shuffle dataset that includes features/columns and labels'''

shuffled_df = in_df.sample(frac=1, random_state=0)  # frac=1 means to sample all rows

# shuffled_df

In [5]:
'''Split the data that includes features and labels into training and validation sets''' 

from sklearn.model_selection import train_test_split

# X = shuffled_df.iloc[:, :12] # select 9 columns/features
# y = shuffled_df.iloc[:, 12:] # select all the columns starting from 12 (last column==labels)

'''prefer this methid, because more flexible and accure based on column labels'''

feature_columns = ['anr','aa','af','ba','bf','ca','cf','da','df']
labels = ['vis_confirmed']

X = shuffled_df.loc[:, feature_columns] # select 9 columns/features
Y = shuffled_df.loc[:, labels] # select all the columns starting from 12 (last column==labels)

In [6]:
X

Unnamed: 0,anr,aa,af,ba,bf,ca,cf,da,df
3977,0.390521,0.462713,0.485588,0.472219,0.709889,0.527329,0.959590,0.605760,1.212560
6399,0.494389,0.589110,0.571340,0.616242,0.728257,0.640993,0.990524,0.650988,1.247239
3916,0.498073,0.606227,0.608000,0.632823,0.782643,0.678478,0.914291,0.702562,1.169754
8841,0.405360,0.497603,0.442284,0.507942,0.552520,0.532916,0.697531,0.558428,0.855835
8938,0.520943,0.520800,0.620549,0.530476,0.712756,0.524358,0.836980,0.545275,0.929614
...,...,...,...,...,...,...,...,...,...
9225,0.393576,0.415366,0.516143,0.629518,0.988367,0.648584,1.387050,0.724507,2.207909
4859,0.391411,0.471472,0.507714,0.639502,0.889570,0.699795,1.546863,0.788093,2.255883
3264,0.477886,0.514413,0.604207,0.599641,0.849238,0.738137,1.331690,0.935988,2.121572
9845,0.396013,0.403091,0.488403,0.507776,0.791281,0.528638,1.368722,0.665734,2.247922


In [7]:
Y

Unnamed: 0,vis_confirmed
3977,11
6399,11
3916,11
8841,11
8938,11
...,...
9225,11
4859,11
3264,44
9845,11


### note: class 0: cloudy (11) & class 1: clear (44)

In [8]:
'''
map 11 and 44 to 0 and 1 and then apply the function.
why we do this?
'''

# y_train[y_train['cloud']==11]=0 # map cloudy pixel to 0
# y_train[y_train['cloud']==44]=1 # map clear pixel to 1
# y_train

Y[Y[labels]==11]=1 # mapped cloudy pixel to 1
Y[Y[labels]==44]=0 # mapped clear pixel to 0
Y

Unnamed: 0,vis_confirmed
3977,1
6399,1
3916,1
8841,1
8938,1
...,...
9225,1
4859,1
3264,0
9845,1


In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)


In [10]:
print(X_train.shape)
print(y_train.shape)

(8514, 9)
(8514, 1)


In [11]:
X_train.head(10)

Unnamed: 0,anr,aa,af,ba,bf,ca,cf,da,df
9689,0.159008,0.172918,0.156417,0.209056,0.229784,0.239876,0.302197,0.258286,0.40393
8465,0.355275,0.389076,0.457045,0.543751,0.736268,0.648092,1.377012,0.746555,2.234522
4550,0.449718,0.482877,0.480114,0.475416,0.508299,0.489509,0.591611,0.512029,0.651297
7512,0.498705,0.497983,0.529603,0.505924,0.569453,0.487587,0.623309,0.512999,0.649442
8006,0.490853,0.506138,0.546225,0.489843,0.66692,0.500947,0.805927,0.476721,0.883218
9000,0.296107,0.396712,0.386108,0.375503,0.549388,0.399703,0.683438,0.441169,0.849845
2418,0.653224,0.720937,0.840429,0.860344,1.374161,1.103312,2.648745,1.421958,3.79587
6419,0.439284,0.571355,0.516524,0.596625,0.683877,0.646688,0.946271,0.67164,1.195315
9065,0.297739,0.331952,0.36956,0.380693,0.451156,0.392369,0.579593,0.372683,0.764237
5832,0.133646,0.171618,0.267697,0.195131,0.355398,0.25486,0.359047,0.310129,0.44945


In [12]:
y_train.head(10)

Unnamed: 0,vis_confirmed
9689,0
8465,1
4550,0
7512,0
8006,1
9000,1
2418,1
6419,1
9065,1
5832,1


In [13]:
print('samples in training dataset: (%s, %s)' %(X_train.shape))
print('samples in test dataset: (%s, %s)' %(X_test.shape))

samples in training dataset: (8514, 9)
samples in test dataset: (2129, 9)


In [14]:
print(y_test.shape)

(2129, 1)


In [15]:
# y_train_arr = y_train.values
# y_train_arr[:11]

# we check the first 10 sample classes
y_test.head(10)

Unnamed: 0,vis_confirmed
8361,1
8441,1
8866,1
2788,0
9543,1
7239,1
9079,1
8133,0
9379,0
8753,1


In [16]:
'''
why should we use one-hot encoding to convert labels to binary arrays?

Convert class labels to binary matrix using one-hot encoding.
the length (or shape?) of the vector? or number of columns? is equal to the number of classes 
and each position in the vector?/columns? corresponds to a unique class.
so:
in binary array lables, size of feature space == columns shows number of classes, and 1 shows the presence of each class.
''' 

from keras.utils import to_categorical

num_classes = 2
y_train_1_hot = to_categorical(y_train, num_classes)
# y_train_1_hot

y_test_1_hot = to_categorical(y_test, num_classes)

# we check the first 10 sample classes
y_test_1_hot[:10,:]


array([[0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.]], dtype=float32)

In [17]:
y_test_list = y_test.values.tolist()
# y_test_list

## let's make the model

In [18]:
# define the model
model = Sequential()
model.add(Dense(64, input_dim=9, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(2, activation='sigmoid'))
# model.add(Dense(2, activation='linear'))


# compile the model
# model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# model.compile(loss='MSE', optimizer='adam', metrics=['accuracy']) # loss=MSE when target values are real values


2023-04-05 19:57:32.488513: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


## train the model on the input data and labels

In [19]:
# train the model on the input data and labels
# history = model.fit(X_train, y_train_one_hot, validation_data=(X_test, y_test), epochs=10, batch_size=32)
history = model.fit(X_train, y_train_1_hot, epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## evaluate the model on the test data

The test set is used to estimate the generalization performance of the model. The accuracy, precision, recall, and F1-score are commonly used metrics to evaluate the performance of a binary classification model.

In [20]:
# evaluate the model on the test? data and labels\
val_loss, val_accuracy = model.evaluate(X_test, y_test_1_hot, batch_size=32, verbose=0)
print("Loss: {:.4f}, Accuracy: {:.2f}%".format(val_loss, val_accuracy * 100))

# print('Validation loss:', val_loss)
# print('Validation accuracy:', val_accuracy)

Loss: 0.2969, Accuracy: 87.88%


In [21]:
'''performance metrics? 
such as tables, confusion matrix, f-score,  ...'''
from sklearn.metrics import confusion_matrix

y_pred_prob = model.predict(X_test) #  is a matrix of shape (n_samples, n_classes) containing the predicted probabilities for each class

# Convert predicted probabilities to predicted labels
y_pred = y_pred_prob.argmax(axis=-1)




In [22]:
print('y-predict: %s samples' %len(y_pred))

print('y-true: %s samples' %len(y_test_list))


y-predict: 2129 samples
y-true: 2129 samples


In [23]:
# Generate confusion matrix

conf_matrix = confusion_matrix(y_test_list, y_pred) # both should be an array-like or list of shape (n_samples,)
conf_matrix

array([[ 834,  194],
       [  64, 1037]])

In [24]:
# y_test_1_hot 
# type(y_pred)

y_test_arr = y_test.values
# y_test_arr

In [25]:
# Evaluate the performance of the model on the test data
from sklearn.metrics import classification_report

report = classification_report(y_test_arr, y_pred) # both should be an array-like or list of shape (n_samples,)
print(report)


              precision    recall  f1-score   support

           0       0.93      0.81      0.87      1028
           1       0.84      0.94      0.89      1101

    accuracy                           0.88      2129
   macro avg       0.89      0.88      0.88      2129
weighted avg       0.88      0.88      0.88      2129



In [26]:
'''# save model and architecture to single file'''

saved_name = 'model_'+model_name+'_loss'+str(round(val_loss, 2))+'_acc'+str(round(val_accuracy, 2))+'.h5'
# saved_model_fp = os.path.join(home, saved_name)
print(saved_name)
model.save(saved_name)

model_AprilJuly_loss0.3_acc0.88.h5


questions:

Q-1: how many samples are in group 0 and 1?

Q-2: what is group 0 and 1? 

Q-3: why does number of samples in T0 and T1 change in each model run?

Q4- which class we want to identify accurately? that we have greater interest in? The positive class is the class that is of interest or importance in the problem being solved. we assign it to class 1.


here class 1 is clear pixels.