# **Classification de l'occupation du sol | Préparation du jeu de données**



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
%cd '/content/drive/My Drive/University/ProjetML/'
%ls

/content/drive/.shortcut-targets-by-id/1clDDLY0UADr5OF8gCO2UkoIIMu755s4h/ProjetML
[0m[01;34mData[0m/  [01;34mProjet[0m/


In [None]:
!pip install earthpy

In [None]:
import numpy as np
import tifffile
import matplotlib.pyplot as plt
%matplotlib inline
from skimage.exposure import rescale_intensity
import glob
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, confusion_matrix
import joblib
import pandas as pd
from matplotlib.colors import ListedColormap, BoundaryNorm
import random
import earthpy.plot as ep
import pickle

In [None]:
%cd '/content/drive/My Drive/University/ProjetML/Data/Donnees_ENT/'

/content/drive/.shortcut-targets-by-id/1clDDLY0UADr5OF8gCO2UkoIIMu755s4h/ProjetML/Data/Donnees_ENT


In [None]:
gt_class = tifffile.imread ('Verite_terrain/DORDOGNE_VT_CLASS.tif')
gt_class.shape , gt_class.dtype

((5412, 5592), dtype('uint8'))

In [None]:
# Identifiants
# 0 = fond, 1= zone urbaine, 2 = eau, 3 = foret, 4=landes 5=culture
gt_id = tifffile.imread ('Verite_terrain/DORDOGNE_VT_ID.tif')
gt_id

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int16)

In [None]:
# Récupérer la liste des images
lst_img = glob.glob ('Images/*.tif')
lst_img.sort() # ordonner par date
lst_img

['Images/20160322_S2A.tif',
 'Images/20160710_S2A.tif',
 'Images/20160730_S2A.tif',
 'Images/20160928_S2A.tif',
 'Images/20161018_S2A.tif',
 'Images/20161127_S2A.tif',
 'Images/20161217_S2A.tif',
 'Images/20161227_S2A.tif']

### **Lecture des bandes**

In [None]:
# Lecture des bandes
red_ts = []
green_ts = []
blue_ts = []
nir_ts = []
for img in lst_img:
  red_ts.append( tifffile.imread(img)[:,:,0]) # Rouge
  green_ts.append( tifffile.imread(img)[:,:,1]) # Vert
  blue_ts.append( tifffile.imread(img)[:,:,2]) # Bleu
  nir_ts.append( tifffile.imread(img)[:,:,3]) # Proche infra rouge
red_ts = np.dstack(red_ts)
green_ts = np.dstack(green_ts)
blue_ts = np.dstack(blue_ts)
nir_ts = np.dstack(nir_ts)
print('Red shape: ',red_ts.shape,'Green shape: ',green_ts.shape,'Blue shape: ',blue_ts.shape,'Nir shape: ',nir_ts.shape,)

Red shape:  (5412, 5592, 8) Green shape:  (5412, 5592, 8) Blue shape:  (5412, 5592, 8) Nir shape:  (5412, 5592, 8)


### **Normalisation des séries temporelles par bande**

In [None]:
# Rouge
red_ts_norm = ( red_ts - red_ts.min() ) / ( red_ts.max() - red_ts.min() ).astype(np.float32)
red_ts_norm.min() , red_ts_norm.max(), red_ts_norm.shape, red_ts_norm.dtype
# Vert
green_ts_norm = ( green_ts - green_ts.min() ) / ( green_ts.max() - green_ts.min() ).astype(np.float32)
green_ts = None
green_ts_norm.min() , green_ts_norm.max(), green_ts_norm.shape, green_ts_norm.dtype
# Bleu
blue_ts_norm = ( blue_ts - blue_ts.min() ) / ( blue_ts.max() - blue_ts.min() ).astype(np.float32)
blue_ts = None
blue_ts_norm.min() , blue_ts_norm.max(), blue_ts_norm.shape, blue_ts_norm.dtype
# Proche infrarouge
nir_ts_norm = ( nir_ts - nir_ts.min() ) / ( nir_ts.max() - nir_ts.min() ).astype(np.float32)
nir_ts_norm.min() , nir_ts_norm.max(), nir_ts_norm.shape, nir_ts_norm.dtype

(0.0, 1.0, (5412, 5592, 8), dtype('float32'))

### **Préparation des données, création de jeux de training et validation**

In [None]:
# Récupérer les positions des échantillons d'entraînement et test
idx_train = np.where ( (gt_id!=0) & (gt_class!=0) )
idx_test = np.where ( (gt_id!=0) & (gt_class==0) )
idx_train

(array([  13,   13,   13, ..., 5402, 5402, 5402]),
 array([4845, 4846, 4847, ..., 3492, 3493, 3494]))

In [None]:
# Lecture des données d'entraînement
train_id_ = gt_id[idx_train]
train_y_ = gt_class[idx_train]
f'échantillons d\'entrainement: {train_y_.shape[0]} pixels, {len(np.unique(train_id_))} objets'

"échantillons d'entrainement: 605431 pixels, 1859 objets"

In [None]:
# Lecture des données test
test_id = gt_id[idx_test]
f'échantillons test: {test_id.shape[0]} pixels, {len(np.unique(test_id))} objets'

'échantillons test: 207485 pixels, 800 objets'

Store data for later

In [None]:
#data sets

dataS = [idx_train, idx_test, gt_class, gt_id]
with open("Data_sets.pickle", "wb") as f:
  pickle.dump(dataS, f)

f.close()

In [None]:
#red band normalized
with open("red_bandN.pickle", "wb") as f:
  pickle.dump(red_ts_norm, f)
f.close()

In [None]:
#blue band normalized
with open("blue_bandN.pickle", "wb") as f:
  pickle.dump(blue_ts_norm, f)
f.close()

In [None]:
#green band normalized
with open("green_bandN.pickle", "wb") as f:
  pickle.dump(green_ts_norm, f)
f.close()

In [None]:
#nir band normalized
with open("nir_bandN.pickle", "wb") as f:
  pickle.dump(nir_ts_norm, f)
f.close()