# **ASIRRA TensorFlow**

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Deyht/AI_astro_ED_AAIF/blob/main/codes/CNN/classification/ASIRRA_TensorFlow.ipynb)

---


### **ASIRRA**

The ASIRRA (Animal Species Image Recognition for Restricting Access) is a dataset that was originally used for CAPTCHA and HIP (Human Interactive Proofs).

The dataset comprises 25000 images of variable resolution (averaging around 350x500) and perfectly distributed over the two classes "Cat" and "Dog". For this course, we provide a resized to 128x128 and squared version of the dataset so it can fit into the limited amount of Colab RAM more easily.

#### Downloading and visualizing the data


In [None]:
%%shell

cd /content/

#Manually upload the directory to github if not yet opened
git clone https://github.com/Deyht/AI_astro_ED_AAIF/

In [None]:
%%shell

cd /content/AI_astro_ED_AAIF/codes/CNN/classification/ASIRRA

python3 - <<EOF

#Will download the dataset at the fist call
from aux_fct import *

init_data_gen(0)

print("\nOrdered validation examples")
create_val_batch()

print("Create visualization of the validation dataset")
visual_val(8,4)

EOF

In [None]:
%cd /content/AI_astro_ED_AAIF/codes/CNN/classification/ASIRRA
from PIL import Image
import matplotlib.pyplot as plt

im = Image.open("val_mosaic.jpg")
plt.figure(figsize=(8,4), dpi=200)
plt.imshow(im)
plt.gca().axis('off')
plt.show()

#### Training a network



In [None]:
%cd /content/AI_astro_ED_AAIF/codes/CNN/classification/ASIRRA

import numpy as np
from threading import Thread
#from aux_fct import *
import gc, os, sys, glob


from tensorflow import keras
from tensorflow.keras import layers
from sklearn import metrics
import tensorflow as tf

class_count = 12500
nb_class = 2

nb_keep_val = 1024

image_size_raw = 128
image_size = 128
#working image size can be lowered to increase computation speed

batch_size = 16
AUTOTUNE = tf.data.AUTOTUNE

In [None]:
raw_data_array = np.reshape(np.fromfile("asirra_bin_128.dat", dtype="uint8"), (class_count*2,image_size_raw,image_size_raw,3))

train_examples = np.append(raw_data_array[:class_count-nb_keep_val], raw_data_array[class_count:-nb_keep_val], axis=0)
test_examples = np.append(raw_data_array[class_count-nb_keep_val:class_count], raw_data_array[-nb_keep_val:], axis=0)

del(raw_data_array)
gc.collect()

train_labels = np.zeros((np.shape(train_examples)[0],nb_class))
test_labels = np.zeros((np.shape(test_examples)[0],nb_class))

train_labels[:class_count-nb_keep_val,0] = 1.0
train_labels[class_count-nb_keep_val:,1] = 1.0

test_labels[:nb_keep_val,0] = 1.0
test_labels[nb_keep_val:,1] = 1.0

#Alternate classes for better shuffle starting point
buf_train_examples = np.copy(train_examples)
buf_train_labels = np.copy(train_labels)

buf_train_examples[::2] = train_examples[:class_count-nb_keep_val]
buf_train_examples[1::2] = train_examples[class_count-nb_keep_val:]

buf_train_labels[::2] = train_labels[:class_count-nb_keep_val]
buf_train_labels[1::2] = train_labels[class_count-nb_keep_val:]

train_examples = buf_train_examples
train_labels = buf_train_labels

train_dataset = tf.data.Dataset.from_tensor_slices((train_examples, train_labels))
test_dataset = tf.data.Dataset.from_tensor_slices((test_examples, test_labels))

gc.collect()

resize_and_rescale = tf.keras.Sequential([
  layers.Resizing(image_size, image_size),
  layers.Rescaling(1./255)
])

data_augmentation = tf.keras.Sequential([
  layers.RandomFlip('horizontal',
        input_shape=(image_size, image_size, 3)),
  layers.RandomRotation(factor=(-0.1, 0.1), fill_mode='constant'),
  layers.RandomZoom(height_factor=(-0.2,0.2), width_factor=(-0.2,0.2), fill_mode='constant'),
  layers.RandomContrast(0.2),
  layers.RandomBrightness(0.2, value_range=(0.0, 1.0))
])


def prepare(ds, shuffle=False, augment=False):
  ds = ds.map(lambda x, y: (resize_and_rescale(x), y),
              num_parallel_calls=AUTOTUNE)
  if shuffle:
    ds = ds.shuffle(1000)

  ds = ds.batch(batch_size)

  if augment:
    ds = ds.map(lambda x, y: (data_augmentation(x, training=True), y),
                num_parallel_calls=AUTOTUNE)

  return ds.prefetch(buffer_size=AUTOTUNE)

gc.collect()

train_dataset = prepare(train_dataset, shuffle=True, augment=True)
test_dataset = prepare(test_dataset)

gc.collect()


In [None]:

total_iter = 10

load_iter = 0

if(load_iter > 0):
	model = models.models.load('%04d.keras'%(load_iter))
else:
	model = keras.Sequential()

	model.add(layers.Conv2D(filters=16, kernel_size=(3, 3), padding='same'))
	model.add(layers.MaxPooling2D())
	model.add(layers.GroupNormalization(groups=8))
	model.add(layers.Activation('relu'))

	model.add(layers.Conv2D(filters=32, kernel_size=(3, 3), padding='same'))
	model.add(layers.MaxPooling2D())
	model.add(layers.GroupNormalization(groups=16))
	model.add(layers.Activation('relu'))

	model.add(layers.Conv2D(filters=64, kernel_size=(3, 3), padding='same'))
	model.add(layers.MaxPooling2D())
	model.add(layers.GroupNormalization(groups=16))
	model.add(layers.Activation('relu'))

	model.add(layers.Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'))
	model.add(layers.Conv2D(filters=64 , kernel_size=(1, 1), activation='relu', padding='same'))
	model.add(layers.Conv2D(filters=128, kernel_size=(3, 3), padding='same'))
	model.add(layers.MaxPooling2D())
	model.add(layers.GroupNormalization(groups=32))
	model.add(layers.Activation('relu'))

	model.add(layers.Conv2D(filters=192, kernel_size=(3, 3), activation='relu', padding='same'))
	model.add(layers.Conv2D(filters=128, kernel_size=(1, 1), activation='relu', padding='same'))
	model.add(layers.Conv2D(filters=192, kernel_size=(3, 3), padding='same'))
	model.add(layers.MaxPooling2D())
	model.add(layers.GroupNormalization(groups=32))
	model.add(layers.Activation('relu'))

	model.add(layers.Conv2D(filters=nb_class, kernel_size=(1, 1), padding='same'))
	model.add(layers.GlobalAveragePooling2D())
	model.add(layers.Activation('softmax'))

model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])


for run_iter in range(load_iter,total_iter):

	model.fit(train_dataset, batch_size=batch_size, epochs=1, shuffle=True, validation_data=test_dataset)

	model.save('%04d.keras'%(run_iter+1))

	pred = model.predict(test_dataset)

	matrix = metrics.confusion_matrix(test_labels.argmax(axis=1), pred.argmax(axis=1))
	print (matrix)

#print(model.summary())

