# 1. Import

In [1]:
!pip install sklearn

Defaulting to user installation because normal site-packages is not writeable
Collecting sklearn
  Downloading sklearn-0.0.tar.gz (1.1 kB)
Collecting scikit-learn
  Downloading scikit_learn-1.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (31.2 MB)
[K     |████████████████████████████████| 31.2 MB 3.7 MB/s eta 0:00:01
[?25hCollecting threadpoolctl>=2.0.0
  Downloading threadpoolctl-3.1.0-py3-none-any.whl (14 kB)
Collecting joblib>=1.0.0
  Downloading joblib-1.1.0-py2.py3-none-any.whl (306 kB)
[K     |████████████████████████████████| 306 kB 73.7 MB/s eta 0:00:01
Building wheels for collected packages: sklearn
  Building wheel for sklearn (setup.py) ... [?25ldone
[?25h  Created wheel for sklearn: filename=sklearn-0.0-py2.py3-none-any.whl size=1302 sha256=d193e2ebb9200b31c0ddfd2e30939ba87f1fe68c8a2bcb673aeb7742869210cf
  Stored in directory: /tmp/pip-ephem-wheel-cache-w3x4aouu/wheels/22/0b/40/fd3f795caaa1fb4c6cb738bc1f56100be1e57da95849bfc897
Successfully built sklearn

In [2]:
import os
from os.path import basename, dirname, join


import glob
import argparse
import datetime
import random
import json
from netCDF4 import Dataset
import numpy as np

import data_generator
from data_generator import HugoDataGenerator, DataGenerator_sod_f, HugoBinaryGenerator

import matplotlib.pyplot as plt
from matplotlib import colors

from sklearn.metrics import (confusion_matrix, 
                            mean_squared_error, 
                            accuracy_score,
                            precision_score,
                            recall_score)
from scipy import stats

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras import Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (Dense,
                                     Flatten,
                                     Dropout,
                                     BatchNormalization, 
                                     Conv2D, 
                                     MaxPooling2D)
from tensorflow.keras.regularizers import l2

idir = '/Data/preprocessing4hugo/output/'
input_dir_json = '/Data/'

os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_virtual_device_configuration(
            gpus[0],[tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)])
    except RuntimeError as e:
        print(e)


Matplotlib created a temporary config/cache directory at /tmp/matplotlib-nen3egyc because the default path (/.config/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


ModuleNotFoundError: No module named 'sklearn'

# 2. Definition model

In [None]:
def create_model():
    """ Create sequential CNN with convolutional and dense layers
    
    """
    # number of ice classes
    nbr_classes = 4  
    # size of SAR subimages
    ws = 50
    # size of convolutional filters
    cs = 3
    # number of filters per convolutional layer (x id)
    c1,c2,c3 = 32,32,32
    # number of neurons per hidden neural layer number (x id)
    n1,n2,n3 = 16,16,64
    # value of dropout
    dropout_rate = 0.1
    # value of L2 regularisation
    l2_rate = 0.001

    model = Sequential()

    # Convolutional part
    model.add(BatchNormalization(input_shape=(ws, ws, 2)))
    model.add(Conv2D(c1, (cs, cs), activation='relu'))
    model.add(MaxPooling2D((2, 2),2))
    model.add(Conv2D(c2, (cs, cs), activation='relu'))
    model.add(Conv2D(c3, (cs, cs), activation='relu'))
    model.add(MaxPooling2D((2, 2),2))
    model.add(BatchNormalization())

    model.add(Flatten())

    # Neural network part (hidden layers)
    model.add(Dropout(dropout_rate))
    model.add(Dense(n1, kernel_regularizer=l2(l2_rate), activation='relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(n2, kernel_regularizer=l2(l2_rate), activation='relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(n3, kernel_regularizer=l2(l2_rate), activation='relu'))
    model.add(Dropout(dropout_rate))

    # Last neural layer (not hidden)
    model.add(Dense(nbr_classes, kernel_regularizer=l2(l2_rate), activation='softmax'))
    
    return model

# 3. Load Data

### 3.1 Parameters and load files

In [None]:
train_ratio = 0.7
with open(f'{idir}processed_files.json') as fichier_json:
    all_nc = json.load(fichier_json)
npz_files=[]

for nc in all_nc :
    name = nc[:15]
    files = sorted(glob.glob(f'{idir}/{name}/*.npz'))
    npz_files += files
random.shuffle(npz_files)

npz_files = npz_files[:10000]

print('Files number : '+ str (len(npz_files)))
train_files_number = int(len(npz_files) * train_ratio)
train_files = npz_files[:train_files_number]
valid_files = npz_files[train_files_number:]

input_var_names = ['nersc_sar_primary', 'nersc_sar_secondary']
amsr2_var_names = [ 'btemp_6_9h',
                    'btemp_6_9v',
                    'btemp_7_3h',
                    'btemp_7_3v',
                    'btemp_10_7h',
                    'btemp_10_7v',
                    'btemp_18_7h',
                    'btemp_18_7v',
                    'btemp_23_8h',
                    'btemp_23_8v',
                    'btemp_36_5h',
                    'btemp_36_5v',
                    'btemp_89_0h',
                    'btemp_89_0v'
                  ]
 
output_var_name = 'ice_type'
dims_amsr2 = np.load(npz_files[0])[amsr2_var_names[0]].shape

params = {'dims_amsr2':      (*dims_amsr2, len(amsr2_var_names)),
          'idir_json':       input_dir_json,
          'output_var_name': output_var_name,
          'input_var_names': input_var_names,
          'amsr2_var_names': amsr2_var_names,
          'batch_size':      50,
          'shuffle_on_epoch_end': False,
           }

### 3.2 processed data for model

In [None]:
training_generator = HugoBinaryGenerator(train_files, **params)
validation_generator = HugoBinaryGenerator(valid_files, **params)

# training_generator = HugoDataGenerator(train_files, **params)
# validation_generator = HugoDataGenerator(valid_files, **params)

# training_generator = DataGenerator_sod_f(train_files, **params)
# validation_generator = DataGenerator_sod_f(valid_files, **params)

print(
     training_generator[0][0].shape, #sar
#     training_generator[0][0][1].shape, #amsr2
     training_generator[0][1].shape, #output
)

# 4. Load model and trained weights

### 4.1 Load model and trained weights

In [None]:
# creation of the model 
model = create_model()

### 4.2 Apply CNN to SAR data

In [None]:
#callbacks
mc = tf.keras.callbacks.ModelCheckpoint(filepath='hugo_model_1000', 
                                        monitor='val_loss',
                                        verbose=1, 
                                        save_best_only=True,
                                        mode='min')

es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

#optimizers and compile
opt = tf.keras.optimizers.Adam()
model.compile(optimizer=opt, loss='categorical_crossentropy')

#fit
model.summary()
history = model.fit(training_generator, 
                    use_multiprocessing=True,
                    workers=4,
                    validation_data=validation_generator,
                    epochs=20, 
                    callbacks=[mc, es])

### 4.3 History of loss

In [None]:
plt.clf()
plt.plot(history.history['loss'], label='loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.xlabel('epochs')
plt.legend()
plt.show()


### 4.4 Confusion matrix

In [None]:
y_pred = model.predict(validation_generator)
y_val = np.vstack([vg[1] for vg in validation_generator])
y_val_index =[]
y_pred_index =[]
for val, pred in zip(y_val, y_pred):
    if max(pred)>0.5 :
        y_val_index.append(np.argmax(val))
        y_pred_index.append(np.argmax(pred))
    
cm = confusion_matrix(y_val_index, y_pred_index)
print(cm)
plt.clf()
plt.imshow(cm)
plt.show()

#### analysis confusion matrix

In [None]:
AC = accuracy_score(y_val_index, y_pred_index)
print ('Accuracy : ' +str(AC))

P_macro = precision_score(y_val_index, y_pred_index, average='macro')
P_micro = precision_score(y_val_index, y_pred_index, average='micro')
print('Precision : ' + str(P_macro)+ " " +str(P_micro))

R_macro = recall_score(y_val_index, y_pred_index, average='macro') 
R_micro = recall_score(y_val_index, y_pred_index, average='micro') 
print('Recall : ' + str(R_macro)+ " " +str(R_micro))

pearson_correlation = stats.pearsonr(y_val_index, y_pred_index)
print(pearson_correlation[0])

rmse = mean_squared_error(y_val_index, y_pred_index)
print(rmse)

In [None]:
for class_ in range (cm.shape[0]) :
    print('class : '+ str(class_))
    TP = cm[class_][class_]
    print ('TP : '+ str(TP))
    
    M = np.delete(cm, (class_), axis=0)
    M = np.delete(M, (class_), axis=1)
    
    TN =np.sum(M)
    print ('TN : '+ str(TN))
    
    
    lign = cm[0,1:]
    #recuperer la ligne puis supprimer l'element a l'indice class_
#     FP = 
    
    
    
    

In [None]:
plt.plot(y_val[:,0], y_pred[:,0], '.')
plt.show()

In [None]:

print(cm)
# print (cm[1:,1:])
print(cm[0,1:])
print(cm[1:,0])

# M = np.delete(cm, (2), axis=0)
# M = np.delete(M, (2), axis=1)
# print (M)

In [None]:
from tensorflow import keras
modelbis = keras.models.load_model('/tf/sea_ice_type_cnn_training/asip_v2/hugo_model_test_all/')

In [None]:
y_pred = modelbis.predict(validation_generator)