In [0]:
from google.colab import drive
drive.mount('/content/drive/')

In [0]:
cd /content/drive/My\ Drive/Cassiopé\ Perso

# Dataset Preprocessing

In [0]:
import csv
import os
import numpy as np
import skimage.io

In [0]:
import os, os.path

m=len([name for name in os.listdir('./cassiopee_data/') if os.path.isfile(name)])

In [0]:
mean_bgr = [103.334, 107.8797, 107.4072]

In [0]:
def load_image(path):
  # load image
  img = skimage.io.imread(path)
  for i in xrange(0, 3):
    img[:, :, i] = img[:, :,i] - mean_bgr[i]
  #img = img / 255.0
  return img

On peut essayer de diviser les images par 255, mais je crois pas qu'il faut le faire pour VGG

Il peut aussi être intelligent de soustraire mean_bgr à toutes les images et de les sauvegarder sous forme de .npy, plutot que faire l'opération à chaque étape de chargement.

### Data preprocessing

In [0]:
with open('cassiopee_data/images_donwloaded.csv', 'r') as csvFile:
    reader = csv.reader(csvFile)
    line_count=0
    for row in reader:
      if (line_count%10==0):
        print(line_count)
      np.save('data_npy/' + str(line_count-1) + '.npy',load_image(row[0]))
      line_count+=1
csvFile.close()

### Labels loading

In [0]:
with open('cassiopee_data/images_donwloaded.csv', 'r') as csvFile:
    reader = csv.reader(csvFile)
    Y=np.zeros((m,1))
    line_count=0
    for row in reader:
      if (line_count%10==0):
        print(line_count)
      Y[line_count-1]=int(row[1])
      line_count+=1
csvFile.close()

In [0]:
print(Y[0:20])

### Classes restriction

In [0]:
#0-63 intensities values to 0-2
for i in range(Y.shape[0]):
  if (Y[i][0]<=2):
    Y[i][0]=0
  elif (Y[i][0]<=34):
    Y[i][0]=1
  else:
    Y[i][0]=2

In [0]:
print(Y[0:20])

#VGG-f

##Download Weights

In [0]:
# execute if you haven't donwloaded the weights yet
!wget http://www.vlfeat.org/matconvnet/models/imagenet-vgg-f.mat

##Model + Training

### Read .mat file and extract the weights of the conv layers

In [0]:
import scipy.io
mat = scipy.io.loadmat('imagenet-vgg-f.mat')
dic={}
print('Model Summary:')
for i in range(21):
  print(mat['layers'][0][i][0][0][0][0])
  name=mat['layers'][0][i][0][0][0][0]
  if (name[0:4]=='conv'):
    weights=mat['layers'][0][i][0][0][2][0]
    dic[name+'_weight']=weights[0]
    dic[name+'_biais']=np.resize(weights[1],(weights[1].shape[0],))
del mat
del weights
  

In [0]:
print(dic.keys())

### Model definition

In [0]:
import tensorflow as tf
import numpy as np

In [0]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(64, (11,11),strides=4, activation='relu', input_shape=(400, 400, 3)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=2),
    tf.keras.layers.Conv2D(256, (5,5), activation='relu', padding='same'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=2),
    tf.keras.layers.Conv2D(256, (3,3), activation='relu', padding='same'),
    tf.keras.layers.Conv2D(256, (3,3), activation='relu', padding='same'),
    tf.keras.layers.Conv2D(256, (3,3), activation='relu', padding='same'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=2),
    tf.keras.layers.Conv2D(4096, (6,6), activation='relu', strides=6),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Conv2D(4096, (1,1), activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Conv2D(3, (1,1), activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.AveragePooling2D(pool_size=(2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(3, activation='softmax')
])

In [0]:
model.layers[0].set_weights([dic['conv1_weight'],dic['conv1_biais']])
model.layers[3].set_weights([dic['conv2_weight'],dic['conv2_biais']])
model.layers[6].set_weights([dic['conv3_weight'],dic['conv3_biais']])
model.layers[7].set_weights([dic['conv4_weight'],dic['conv4_biais']])
model.layers[8].set_weights([dic['conv5_weight'],dic['conv5_biais']])
del dic

In [0]:
assert(np.array_equal(model.layers[0].get_weights()[0],dic['conv1_weight']))

In [0]:
model.summary()

### Data Generator

#### Creation of our DataGenerator

In [0]:
class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, labels, batch_size=32, dim=(400,400,3), n_classes=3):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.n_classes = n_classes
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim)
        # Initialization
        X = np.empty((self.batch_size, *self.dim))
        y = np.empty((self.batch_size), dtype=int)

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store sample
            X[i,] = np.load('./data_npy/' + ID + '.npy')

            # Store class
            y[i] = self.labels[ID]

        return X, keras.utils.to_categorical(y, num_classes=self.n_classes)

#### Split the dataset into train set and dev set

In [0]:
m_test=1000
params = {'dim': (400,400,3), 'batch_size': 32, 'n_classes': 3}
list_IDs=np.random.shuffle(np.arange(m))
list_IDs_train=list_IDs[m_test:]
list_IDs_test=list_IDs[0:m_test]
del list_IDS

In [0]:
training_generator = DataGenerator(list_IDs_train, labels, **params)
validation_generator = DataGenerator(list_IDs_test, labels, **params)

### Callbacks

In [0]:
"""
class myCallback(tf.keras.callbacks.Callback):
  def on_batch_end:
    i+=1
    (X, Y) = load_random_mini_batch(m,i, permutation)
    
  def on_epoch_end(self,epochs,logs={}):
    permutation = list(np.random.permutation(m))    
    i=0
    (X, Y) = load_random_mini_batch(m,i, permutation)
"""

In [0]:
# run for every training
from time import time
tensorboard=tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir='logs/{}'.format(time()), histogram_freq=1)

### Model compilation and training

In [0]:
adam=tf.keras.optimizers.Adam(lr=10**-6)
model.compile(loss='sparse_categorical_crossentropy', optimizer=adam, metrics=['acc'])
# maybe we can try sgd optimizer with momentum=0.9, weight decay=5*10*-4

In [0]:
model.fit_generator(generator=training_generator, validation_data=validation_generator, epochs=10, callbacks=[tensorboard])
#model.fit_generator(generator=training_generator, validation_data=validation_generator, epochs=10, use_multiprocessing=True, workers=6, callbacks=[tensorboard])

**1. Run in your terminal: tensorboard --logdir=logs/**


**2. Open the link, in your browser**

### Save and load weights

In [0]:
import h5py

In [0]:
model.save_weights("model.h5")

In [0]:
model.load_weights("model.h5")

### Evaluation

In [0]:
model.evaluate_generator(validation_generator)

### Model cleaning

In [0]:
tf.keras.backend.clear_session()
del model

#VGG 16

In [0]:
from keras.applications.vgg16 import VGG16

In [0]:
model = VGG16(weights='imagenet',include_top=False)


In [0]:
model.summary()

In [0]:
model.add(tf.keras.layers.Convolution2D(4096, (7,7), activation='relu'))
model.add(tf.keras.layers.Convolution2D(4096, (1,1), activation='relu'))
model.add(tf.keras.layers.Convolution2D(3, (1,1), activation='relu'))
model.add(tf.keras.layers.Dense(3, activation='softmax'))

#Bash command

In [0]:
ls

In [0]:
!ls cassiopee_data/

In [0]:
ls -l cassiopee_data/image-nightlight_dataset | wc -l

In [0]:
!find cassiopee_data/image-nightlight_dataset/3674.0.png