# Projet computer vision utilisant l'apprentissage contrastif

On a comme support le cours ainsi qu'un article et le repo git associé.
En particulier la partie 2.1 de l'article présente une bonne vue d'ensemble.

In [24]:
import os
import PIL
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from tensorflow.keras.layers import Dense, Flatten, Input
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras import optimizers
from tensorflow.keras import backend as K

from contrastive_loss import contrastive_loss


In [2]:
!git clone https://github.com/axelcarlier/projsemisup.git
path = "./projsemisup/"

Cloning into 'projsemisup'...
remote: Enumerating objects: 10798, done.[K
remote: Counting objects: 100% (10798/10798), done.[K
remote: Compressing objects: 100% (10798/10798), done.[K
remote: Total 10798 (delta 0), reused 10798 (delta 0), pack-reused 0
Receiving objects: 100% (10798/10798), 328.98 MiB | 2.46 MiB/s, done.
Updating files: 100% (11563/11563), done.


In [3]:
import csv
CLASSES = []
with open(path + 'selected_classes.csv', newline='') as csvfile:
	filereader = csv.reader(csvfile, delimiter=',', quotechar='"')
	for row in filereader:
		CLASSES.append(row[0])

print(CLASSES)

['red fox', 'gray wolf', 'snail', 'lizard', 'spider', 'turtle', 'hippopotamus', 'mosquito', 'crocodile', 'oyster', 'ant', 'american black bear', 'grizzly bear', 'chicken', 'beluga whale', 'humpback whale', 'tiger shark', 'dolphin', 'eagle', 'deer', 'parrot', 'falcon', 'snake', 'king penguin', 'giant panda', 'giraffe', 'donkey', 'sheep', 'salamander', 'wombat', 'bee', 'bat', 'rabbit', 'rooster', 'guinea pig', 'pig', 'goats', 'koala', 'gorilla', 'lion', 'tiger', 'leopard', 'jaguar', 'lynx', 'sea lion', 'great white shark', 'dog', 'elephant', 'cat', 'owl', 'squirrel', 'duck', 'frog', 'ibis', 'dairy cow', 'bull', 'horse', 'caterpillar', 'butterfly', 'chimpanzee', 'orangutan', 'kangaroo', 'camel', 'mouse', 'turkey', 'rhinoceros', 'baboon', 'moose', 'komodo dragon', 'okapi', 'antelope', 'ostrich', 'bison', 'water buffalo', 'beaver', 'hyena', 'swan', 'flamingo', 'hedgehog', 'raccoon', 'heron', 'lama', 'sea otter', 'macaque', 'peafowl', 'pelican', 'warthog', 'polecat', 'killer whale', 'wallaby

In [4]:
IMAGE_SIZE = 64

In [5]:
def load_semisup_data(path, classes, image_size=64):

  file_path_lab = os.listdir(path + 'Lab/')
  nb_lab = 475
  # Initialise les structures de données
  x_lab = np.zeros((nb_lab, image_size, image_size, 3))
  y_lab = np.zeros((nb_lab, 1))
  i = 0
  for c in file_path_lab:

    class_label = classes.index(c)
    list_images = os.listdir(path + 'Lab/' + c + '/')

    for img_name in list_images:
      # Lecture de l'image
      img = Image.open(path + 'Lab/' + c + '/' + img_name)
      # Mise à l'échelle de l'image
      img = img.resize((image_size,image_size), Image.ANTIALIAS)
      # Remplissage de la variable x
      x_lab[i] = np.asarray(img)
      y_lab[i] = class_label
      i = i + 1


  file_path_test = os.listdir(path + 'Test/')
  nb_test = 2850
  # Initialise les structures de données
  x_test = np.zeros((nb_test, image_size, image_size, 3))
  y_test = np.zeros((nb_test, 1))
  i = 0
  for c in file_path_test:

    class_label = classes.index(c)
    list_images = os.listdir(path + 'Test/' + c + '/')

    for img_name in list_images:
      # Lecture de l'image
      img = Image.open(path + 'Test/' + c + '/' + img_name)
      # Mise à l'échelle de l'image
      img = img.resize((image_size,image_size), Image.ANTIALIAS)
      img = img.convert('RGB')
      # Remplissage de la variable x
      x_test[i] = np.asarray(img)
      y_test[i] = class_label
      i = i + 1


  file_path_unlab = os.listdir(path + 'Unlab/')
  nb_unlab = 8237
  # Initialise les structures de données
  x_unlab = np.zeros((nb_unlab, image_size, image_size, 3))

  i = 0
  for img_name in file_path_unlab:
    # Lecture de l'image
    img = Image.open(path + 'Unlab/' + img_name)
    # Mise à l'échelle de l'image
    img = img.resize((image_size,image_size), Image.ANTIALIAS)
    img = img.convert('RGB')
    # Remplissage de la variable x
    x_unlab[i] = np.asarray(img)
    i = i + 1

  return x_lab, y_lab, x_unlab, x_test, y_test


x_lab, y_lab, x_unlab, x_test, y_test = load_semisup_data(path, CLASSES, image_size=IMAGE_SIZE)

In [6]:
print(x_lab.shape, y_lab.shape)
print(x_test.shape, y_test.shape)
print(x_unlab.shape)

(475, 64, 64, 3) (475, 1)
(2850, 64, 64, 3) (2850, 1)
(8237, 64, 64, 3)


In [6]:

def plot_training_analysis():
  acc = history.history['sparse_categorical_accuracy']
  val_acc = history.history['val_sparse_categorical_accuracy']
  loss = history.history['loss']
  val_loss = history.history['val_loss']

  epochs = range(len(acc))

  plt.plot(epochs, acc, 'b', linestyle="--",label='Training acc')
  plt.plot(epochs, val_acc, 'g', label='Validation acc')
  plt.title('Training and validation accuracy')
  plt.legend()

  plt.figure()

  plt.plot(epochs, loss, 'b', linestyle="--",label='Training loss')
  plt.plot(epochs, val_loss,'g', label='Validation loss')
  plt.title('Training and validation loss')
  plt.legend()

  plt.show()

la loss contrastive est sur le github dans le fichier objective.py

In [17]:
inputs = Input(shape=(x_unlab.shape[1:]), name='encoder_input')

x = Conv2D(64,(3,3),activation='relu', padding='same')(inputs)
x = MaxPooling2D(pool_size=(2,2))(x)
x = Conv2D(128,(3,3),activation='relu', padding='same')(x)
x = (MaxPooling2D(pool_size=(2,2)))(x)
x = Conv2D(256,(3,3),activation='relu', padding='same')(x)
x = MaxPooling2D(pool_size=(2,2))(x)
x = Conv2D(256,(3,3),activation='relu', padding='same')(x)
h = MaxPooling2D(pool_size=(2,2))(x)

encoder = Model(inputs, h, name='encoder')
encoder.summary()

Model: "encoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder_input (InputLayer)   [(None, 64, 64, 3)]       0         
_________________________________________________________________
conv2d_20 (Conv2D)           (None, 64, 64, 64)        1792      
_________________________________________________________________
max_pooling2d_20 (MaxPooling (None, 32, 32, 64)        0         
_________________________________________________________________
conv2d_21 (Conv2D)           (None, 32, 32, 128)       73856     
_________________________________________________________________
max_pooling2d_21 (MaxPooling (None, 16, 16, 128)       0         
_________________________________________________________________
conv2d_22 (Conv2D)           (None, 16, 16, 256)       295168    
_________________________________________________________________
max_pooling2d_22 (MaxPooling (None, 8, 8, 256)         0   

In [21]:
projecteur = Sequential(name="projecteur")

latent_inputs = Input(shape=(4,4,256), name='projecteur_input')
latent = GlobalAveragePooling2D()(latent_inputs)
latent = Dense(512, activation="relu")(latent)
z = Dense(1024, activation="linear")(latent)

projecteur = Model(latent_inputs, z, name='projecteur')
projecteur.summary()

Model: "projecteur"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
projecteur_input (InputLayer [(None, 4, 4, 256)]       0         
_________________________________________________________________
global_average_pooling2d_4 ( (None, 256)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 512)               131584    
_________________________________________________________________
dense_9 (Dense)              (None, 1024)              525312    
Total params: 656,896
Trainable params: 656,896
Non-trainable params: 0
_________________________________________________________________


In [23]:
outputs = projecteur(encoder(inputs))
model = Model(inputs, outputs, name='SimCLR')
model.summary()

Model: "SimCLR"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder_input (InputLayer)   [(None, 64, 64, 3)]       0         
_________________________________________________________________
encoder (Functional)         (None, 4, 4, 256)         960896    
_________________________________________________________________
projecteur (Functional)      (None, 1024)              656896    
Total params: 1,617,792
Trainable params: 1,617,792
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.add_loss(contrastive_loss)




encoder.compile(loss='sparse_categorical_crossentropy',
              optimizer=optimizers.Adam(learning_rate=3e-4),
              metrics=['sparse_categorical_accuracy'])
history = encoder.fit(x_lab, y_lab, validation_data=(x_test, y_test), epochs=30, batch_size=10)

In [None]:
plot_training_analysis()