## CT5135 Research Topics in AI

## Assignment 2

* Student ID(s): 22229358, 22230186, 20230220
* Student name(s): KOSTADIN GEORGIEV, YAMINI GIRKAR, SHUBHAM MANGLAM

### Imports

In [72]:
import numpy as np
import pandas as pd
import os
from PIL import Image
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from keras import layers, activations, models

### Transform and Load Data

In [2]:
def walk_directory(dir, callback, label=None):
    for dirname, _, filenames in os.walk(dir):
        for filename in filenames:
            if filename.endswith(".jpg"):
                id = filename[:-4]
                pathname = os.path.join(dirname, filename)
                im = Image.open(pathname)
                imnp = np.array(im, dtype=float)

                if len(imnp.shape) != 3:
                    print("This is 1 channel, so we omit it",
                          imnp.shape, filename)
                    continue

                callback(id, imnp, label)

In [3]:
df = pd.DataFrame(columns=['height', 'width', 'channels', 'label'])

def get_img_shape(id, imnp, label):
    height, width, channel = imnp.shape
    df.loc[id] = [height, width, channel, label]

walk_directory('dataset/alpaca', get_img_shape, 'alpaca')
walk_directory('dataset/not_alpaca', get_img_shape, 'not_alpaca')

df

This is 1 channel, so we omit it (683, 1024) 4b9ef3ce2685ee32.jpg
This is 1 channel, so we omit it (914, 1024) 5a71db307230880e.jpg


Unnamed: 0,height,width,channels,label
01ad3ff1d94eb557,670,1024,3,alpaca
0346463867a297f4,768,1024,3,alpaca
038fae9e70c4c3f1,768,1024,3,alpaca
053608552d63f724,768,1024,3,alpaca
053dab62fbb47736,682,1024,3,alpaca
...,...,...,...,...
eb4d2e0c0252fdc2,682,1024,3,not_alpaca
eb4f5968c7866a3e,699,1024,3,not_alpaca
f4495f8511553631,680,1024,3,not_alpaca
f8d3c3d8be68c4fd,768,1024,3,not_alpaca


In [4]:
IMG_HEIGHT = df['height'].min()
IMG_WIDTH = df['width'].min()

print('Images will be resized to', IMG_HEIGHT, 'x', IMG_WIDTH)

Images will be resized to 481 x 583


In [5]:
def resize_images(label):
    source_dir = 'dataset/' + label
    target_dir = 'dataset_resized/'+ label
    subset = df.loc[df['label'] == label]

    for id, image in subset.iterrows():
        id = str(id)
        filename = id + ".jpg"
        
        source_path = os.path.join(source_dir, filename)
        target_path = os.path.join(target_dir, filename)
        
        image = Image.open(source_path)
        image = image.resize((IMG_WIDTH, IMG_HEIGHT), Image.NEAREST)
        image.save(target_path)

In [6]:
resize_images('alpaca')
resize_images('not_alpaca')

In [7]:
def load_images(dir):
    img_array = []
    
    walk_directory(dir, lambda id, imnp, label: img_array.append(imnp))

    return np.array(img_array)

In [8]:
img_alpaca = load_images('dataset_resized/alpaca')
img_not_alpaca = load_images('dataset_resized/not_alpaca')

In [9]:
print('alpaca:', img_alpaca.shape)
print('not_alpaca:', img_not_alpaca.shape)

alpaca: (142, 481, 583, 3)
not_alpaca: (183, 481, 583, 3)


### Implement Proposed Layer

#### Diagram of the proposed convolution operation:

![Proposed Layer](img/proposed_layer.png)

#### Pseudo code for signle training iteration of the proposed layer:


```
forward_pass(X):
    n_samples, height, width, channels = X.shape
    z = []

    for x in X:
        z_ = []
        for w, b in zip(self.W, self.B):
            for i in range(height - 2):
                for j in range(width - 2):
                    xrf = x[i:i+3, j:j+3, :]
                    wrf = w[i:i+3, j:j+3, :]
                    z_[i, j] = sum(xrf * wrf + b)
        z.append(z_)
    
    a = activation(z)

    return a
```

In [258]:
class ProposedLayer(layers.Layer):
    def __init__(self, filters, receptive_field_size=(3, 3), activation=None, **kwargs):
        super(ProposedLayer, self).__init__(**kwargs)
        self.filters = filters
        self.receptive_field_size = receptive_field_size
        self.activation = activations.get(activation)
    
    def build(self, input_shape):
        n_samples, height, width, channels = input_shape

        self.W = self.add_weight(name='kernel',
                                 shape=(height, width, channels, self.filters),
                                 initializer='glorot_uniform',
                                 trainable=True)
        self.B = self.add_weight(name='bias',
                                 shape=(self.filters,),
                                 initializer='zeros',
                                 trainable=True)
        
        super(ProposedLayer, self).build(input_shape)

    def call(self, X):
        n_samples, height, width, channels = X.shape
        z = []

        for x in X:
            z_ = np.zeros((height-2, width-2, self.filters))

            for i in range(self.filters):
                w = self.W[:, :, :, i]
                b = self.B[i]
                z__ = z_[:, :, i]

                for i in range(height - 2):
                    for j in range(width - 2):
                        xrf = x[i:i+3, j:j+3, :]
                        wrf = w[i:i+3, j:j+3, :]
                        z__[i, j] = tf.math.reduce_sum(tf.matmul(xrf, wrf) + b)
                
                z_[:, :, i] = z__

            #def conv(w, b):
            #    for i in range(height - 2):
            #        for j in range(width - 2):
            #            xrf = x[i:i+3, j:j+3, :]
            #            wrf = w[i:i+3, j:j+3, :]
            #            z_[i, j] = tf.math.reduce_sum(tf.matmul(xrf, wrf) + b)
            #
            #weights = tf.stack([self.W.T, self.B], axis=1)
            #tf.map_fn(conv, weights)
            z.append(z_)
        
        return self.activation(z)
    
    def compute_output_shape(self, input_shape):
        rf = self.receptive_field_size
        output_shape = list(input_shape)
        output_shape[1] = (input_shape[1] - rf[0]) // 1 + 1
        output_shape[2] = (input_shape[2] - rf[1]) // 1 + 1
        output_shape[3] = self.filters

        return tuple(output_shape)

### Create Model

In [104]:
input_shape = (225, IMG_HEIGHT, IMG_WIDTH, 3)
n_classes = 2

# Input
inputs = layers.Input(input_shape)

# Block 1
x0 = layers.Conv2D(16, kernel_size=(3, 3), activation='relu')(inputs)
x1 = layers.MaxPooling2D(pool_size=(2, 2))(x0)
# Block 2
x2 = layers.Conv2D(12, kernel_size=(3, 3), activation='relu')(x1)
x3 = layers.MaxPooling2D(pool_size=(2, 2))(x2)
# Block 3
x4 = layers.Conv2D(8, kernel_size=(3, 3), activation='relu')(x3)
x5 = layers.MaxPooling2D(pool_size=(2, 2))(x4)

# Flatten feature map - embedding size will become 34560
x6 = layers.Flatten()(x5)

# Dense layer for classification
# Start with units in dense layer = embedding_size / 64
x7 = layers.Dense(540, activation='relu')(x6)
# Output
outputs = layers.Dense(n_classes, activation='softmax')(x7)

model = keras.Model(inputs, outputs)
model.summary()

Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_25 (InputLayer)       [(None, 481, 583, 3)]     0         
                                                                 
 conv2d_8 (Conv2D)           (None, 479, 581, 16)      448       
                                                                 
 max_pooling2d_7 (MaxPooling  (None, 239, 290, 16)     0         
 2D)                                                             
                                                                 
 conv2d_9 (Conv2D)           (None, 237, 288, 12)      1740      
                                                                 
 max_pooling2d_8 (MaxPooling  (None, 118, 144, 12)     0         
 2D)                                                             
                                                                 
 conv2d_10 (Conv2D)          (None, 116, 142, 8)       872 

In [259]:
input_shape = (IMG_HEIGHT, IMG_WIDTH, 3)
n_classes = 2

# Input
inputs = layers.Input(input_shape)

# Block 1
x0 = ProposedLayer(16, receptive_field_size=(3, 3), activation='relu')(inputs)
x1 = layers.MaxPooling2D(pool_size=(2, 2))(x0)
# Block 2
x2 = ProposedLayer(12, receptive_field_size=(3, 3), activation='relu')(x1)
x3 = layers.MaxPooling2D(pool_size=(2, 2))(x2)
# Block 3
x4 = ProposedLayer(8, receptive_field_size=(3, 3), activation='relu')(x3)
x5 = layers.MaxPooling2D(pool_size=(2, 2))(x4)

# Flatten feature map - embedding size will become 34560
x6 = layers.Flatten()(x5)

# Dense layer for classification
# Start with units in dense layer = embedding_size / 64
x7 = layers.Dense(540, activation='relu')(x6)
# Output
outputs = layers.Dense(n_classes, activation='softmax')(x7)

model = keras.Model(inputs, outputs)
model.summary()

ValueError: Exception encountered when calling layer "proposed_layer_95" (type ProposedLayer).

in user code:

    File "C:\Users\kosta\AppData\Local\Temp\ipykernel_12684\2618982689.py", line 38, in call  *
        z__[i, j] = tf.math.reduce_sum(tf.matmul(xrf, wrf) + b)

    ValueError: setting an array element with a sequence.


Call arguments received by layer "proposed_layer_95" (type ProposedLayer):
  • X=tf.Tensor(shape=(None, 481, 583, 3), dtype=float32)

### Compile

In [None]:
#TODO

### Pre-Process Data

In [None]:
#TODO

### Create Train/Test Splits

In [None]:
#TODO

### Fit

In [None]:
#TODO

### Evaluate

In [None]:
#TODO

### Results and Discussion

In [None]:
#TODO