In [1]:
from keras.datasets import cifar10
from keras.utils import np_utils
from keras.models import Sequential 
from keras.layers.core import Dense, Dropout, Activation, Flatten 
from keras.layers.convolutional import Conv2D, MaxPooling2D 
from keras.optimizers import SGD, Adam, RMSprop 
import matplotlib.pyplot as plt

# CIFAR_10 is a set of 60k images 32x32 pixels 3 channels

IMG_ROWS = 32 
IMG_COLS = 32 
IMG_CHANNELS = 3
#constant 
BATCH_SIZE = 128 
NB_EPOCH = 20 
NB_CLASSES = 10 
VERBOSE = 1 
VALIDATION_SPLIT = 0.2 
OPTIM = RMSprop() 

#load dataset 
(X_train, y_train), (X_test, y_test) = cifar10.load_data() 
print('X_train shape:', X_train.shape) 
print(X_train.shape[0], 'train samples') 
print(X_test.shape[0], 'test samples')


# convert to categorical 
Y_train = np_utils.to_categorical(y_train, NB_CLASSES) 
Y_test = np_utils.to_categorical(y_test, NB_CLASSES) 
# float and normalization 
X_train = X_train.astype('float32') 
X_test = X_test.astype('float32') 
X_train /= 255 
X_test /= 255


Using TensorFlow backend.


X_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples


In [2]:
# network 
model = Sequential() 
model.add(Conv2D(32, (3, 3), padding='same', 
input_shape=(IMG_ROWS, IMG_COLS, IMG_CHANNELS))) 
model.add(Activation('relu')) 
model.add(MaxPooling2D(pool_size=(2, 2))) 
model.add(Dropout(0.25))


model.add(Flatten()) 
model.add(Dense(512)) 
model.add(Activation('relu')) 
model.add(Dropout(0.5)) 
model.add(Dense(NB_CLASSES)) 
model.add(Activation('softmax')) 
model.summary()

# train 
model.compile(loss='categorical_crossentropy', optimizer=OPTIM, 
metrics=['accuracy']) 
model.fit(X_train, Y_train, batch_size=BATCH_SIZE, 
epochs=NB_EPOCH, validation_split=VALIDATION_SPLIT, 
verbose=VERBOSE) 
score = model.evaluate(X_test, Y_test, 
batch_size=BATCH_SIZE, verbose=VERBOSE) 
print("Test score:", score[0]) 
print('Test accuracy:', score[1])


#save model 
model_json = model.to_json() 
open('cifar10_architecture.json', 'w').write(model_json) 
#And the weights learned by our deep network on the training 
model.save_weights('cifar10_weights.h5', overwrite=True)


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 32, 32, 32)        896       
_________________________________________________________________
activation_1 (Activation)    (None, 32, 32, 32)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 16, 16, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 16, 16, 32)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 8192)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               4194816   
_________________________________________________________________
activation_2 (Activation)    (None, 512)              

In [3]:
model = Sequential() 
model.add(Conv2D(32, (3, 3), padding='same', 
input_shape=(IMG_ROWS, IMG_COLS, IMG_CHANNELS))) 
model.add(Activation('relu')) 
model.add(Conv2D(32, (3, 3), padding='same')) 
model.add(Activation('relu')) 
model.add(MaxPooling2D(pool_size=(2, 2))) 
model.add(Dropout(0.25)) 
model.add(Conv2D(64, (3, 3), padding='same')) 
model.add(Activation('relu')) 
model.add(Conv2D(64, 3, 3)) 
model.add(Activation('relu')) 
model.add(MaxPooling2D(pool_size=(2, 2))) 
model.add(Dropout(0.25)) 
model.add(Flatten()) 
model.add(Dense(512))

model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(NB_CLASSES))
model.add(Activation('softmax'))

  # This is added back by InteractiveShellApp.init_path()


In [None]:
from keras.preprocessing.image import ImageDataGenerator
from keras.datasets import cifar10
import numpy as np
NUM_TO_AUGMENT = 5

#load dataset
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

#augmenting
print("Augmenting training set images...")

datagen = ImageDataGenerator( 
    rotation_range=40, 
    width_shift_range=0.2, 
    height_shift_range=0.2, 
    zoom_range=0.2, 
    horizontal_flip=True, 
    fill_mode='nearest'
)

xtas, ytas = [], [] 
for i in range(X_train.shape[0]): 
    num_aug = 0 
    x = X_train[i]  # (32, 32, 3) 
    x = x.reshape((1,) + x.shape)  # (1, 32, 32, 3) 
    for x_aug in datagen.flow(x, batch_size=1, 
                               save_to_dir='preview', save_prefix='cifar', save_format='jpeg'): 
        if num_aug >= NUM_TO_AUGMENT: 
            break 
        xtas.append(x_aug[0]) 
        num_aug += 1

#fit the dataset 
datagen.fit(X_train) 

# train 
history = model.fit_generator(datagen.flow(X_train, Y_train, 
                                           batch_size=BATCH_SIZE), 
                              samples_per_epoch=X_train.shape[0], 
                              epochs=NB_EPOCH, verbose=VERBOSE) 

score = model.evaluate(X_test, Y_test, 
                       batch_size=BATCH_SIZE, verbose=VERBOSE) 

print("Test score:", score[0]) 
print('Test accuracy:', score[1])



Augmenting training set images...


Markdown Cell


This algorithm is trained on the CIFAR-10 dataset, which the textbook states is capable of dinstinguishing between images of animals and vehicles. I believe that this is harmless, and that training a model on identifying things such as this does not violate any privacy or cause issues otherwise. However, these networks are able to be trained on sensitive and personal data, for instance they can be trained for facial recognition.

First there is the issue of privacy violations. In January 2020, Detroit Police wrongfully arrested a man named Robert Williams due to a false positive in a facial recognition system. He was arrested in front of his entire family, and spent roughly 30 hours in jail. Robert Williams is a black male, and facial recognition systems are reported to be much less accurate on people of races other than white. The New York Times reported that one popular facial recognition data set is 75% male, and 80% white. (New York Times, 2018) In testing the accuracy of facial recognition software on diverse faces, Joy Buolamwini found that Microsoft had an error rate for darker-skinned women at 21 percent, and IBM had a error rate of about 35%. Also in her findings, she found that they all had error rates below 1% for 'light-skinned males.' (Buolamwini, 2018) From the above findings, it can be concluded that there is more at stake than privacy issues like your face being used in a law enforcement database, models can have bias and act discriminatorily. This bias can lead to innocent people like Robert Williams, who was fortunately released, being arrested and subject to further discrimination and public humiliation. A wrongful arrest also could have the even worse result of injury or loss of life, if for instance someone is falsely found as a suspect in an incorrect facial-recognition system of a violent crime. In conclusion, image classification algorithms such as the one used for this assignment should be viewed for their strengths and weaknesses. If you are training a model on human faces, detecting and removing bias should be on the forefront of the entire development.

REFERENCES:

Lohr, Steve. (February 9, 2018). Facial Recognition Is Accurate, if You're a White Guy. NY TIMES. https://www.nytimes.com/2018/02/09/technology/facial-recognition-race-artificial-intelligence.html

Buolamwini, Joy. Gebru, Timmnit. (2018). Gender Shades: Intersectional Accuracy Disparities in Commercial Gender Classification. MIT. https://proceedings.mlr.press/v81/buolamwini18a/buolamwini18a.pdf

ACLU. (January 29, 2024). Williams v. City of Detroit. ACLU. https://www.aclu.org/cases/williams-v-city-of-detroit-face-recognition-false-arrest
