In [9]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import scipy
# Augmentation
NUM_TO_AUGMENT = 5

# CIFAR_10 is a set of 60k images 32x32 pixels on 3 channels
IMG_CHANNELS = 3
IMG_ROWS = 32
IMG_COLS = 32

#constant
BATCH_SIZE = 128
NB_EPOCH = 20
NB_CLASSES = 10
VERBOSE = 1
VALIDATION_SPLIT = 0.2
OPTIM = tf.keras.optimizers.RMSprop()

#load dataset
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()

# augmenting
print("Augmenting training set images...")
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest')

xtas, ytas = [], []
for i in range(X_train.shape[0]):
    num_aug = 0
    x = X_train[i] # (3, 32, 32)
    x = x.reshape((1,) + x.shape) # (1, 3, 32)
    for x_aug in datagen.flow(x, batch_size=1,
                                save_to_dir='preview', save_prefix='cifar', save_format='jpeg'):
        if num_aug >= NUM_TO_AUGMENT:
            break
        xtas.append(x_aug[0])
        num_aug += 1



print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train_samples')
print(X_test.shape[0], 'test_samples')

# convert to categorical
Y_train = tf.keras.utils.to_categorical(y_train, NB_CLASSES)
Y_test = tf.keras.utils.to_categorical(y_test, NB_CLASSES)

# float and normalization
X_train = X_train.astype('float32')
X_test = X_train.astype('float32')
X_train /= 255
X_test /= 255



# network
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Conv2D(32, (3, 3), padding='same',
                                 input_shape = (IMG_ROWS, IMG_COLS, IMG_CHANNELS)))
model.add(tf.keras.layers.Activation('relu'))
model.add(tf.keras.layers.Conv2D(32, (3, 3), padding='same'))
model.add(tf.keras.layers.Activation('relu'))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2)))
model.add(tf.keras.layers.Dropout(0.25))

model.add(tf.keras.layers.Conv2D(32, (3, 3), padding='same'))
model.add(tf.keras.layers.Activation('relu'))

model.add(tf.keras.layers.Conv2D(64, 3, 3))
model.add(tf.keras.layers.Activation('relu'))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2)))
model.add(tf.keras.layers.Dropout(0.25))

model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(512))
model.add(tf.keras.layers.Activation('relu'))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(NB_CLASSES))
model.add(tf.keras.layers.Activation('softmax'))
model.summary()


# fit the dataset
datagen.fit(X_train)

# train
model.compile(loss='categorical_crossentropy', optimizer=OPTIM,
              metrics=['accuracy'])
history = model.fit(datagen.flow(X_train, Y_train,
                                           batch_size=BATCH_SIZE), steps_per_epoch=X_train.shape[0],
                                           epochs=NB_EPOCH, verbose=VERBOSE)
#model.fit(X_train, Y_train, batch_size=BATCH_SIZE,
#          epochs=NB_EPOCH, validation_split=VALIDATION_SPLIT,
#          verbose=VERBOSE)
score = model.evaluate(X_test, Y_test,
                       batch_size=BATCH_SIZE, verbose=VERBOSE)
print("Test score: ", score[0])
print("Test accuracy: ", score[1])

# save model
model_json = model.to_json()
open('cifar10_architecture.json', 'w').write(model_json)
model.save_weights('cifar10_weights.h5', overwrite=True)



Augmenting training set images...
X_train shape: (50000, 32, 32, 3)
50000 train_samples
10000 test_samples
Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_28 (Conv2D)          (None, 32, 32, 32)        896       
                                                                 
 activation_42 (Activation)  (None, 32, 32, 32)        0         
                                                                 
 conv2d_29 (Conv2D)          (None, 32, 32, 32)        9248      
                                                                 
 activation_43 (Activation)  (None, 32, 32, 32)        0         
                                                                 
 max_pooling2d_14 (MaxPooli  (None, 16, 16, 32)        0         
 ng2D)                                                           
                                                                 
 dropout_21 (

ValueError: Data cardinality is ambiguous:
  x sizes: 50000
  y sizes: 10000
Make sure all arrays contain the same number of samples.

Bias in Machine Learning

In categorizing these images it is important to try and root out the three main types of bias in AI.  These are Interaction Bias, Latent Bias, and Selection Bias. Interaction bias is a bias in the way people may react to a specific question.  The example given in the article was pictures of shoes.  If everyone draws shoes to look like sneakers because they are the most common, the AI will learn that shoes look like sneakers even though high-heels and sandals may also be shoes. (Xiang, 2019)
The second type of bias is Latent Bias.  Latent Bias is when the data set used for training is incomplete. If only images from the distant past are used then the data could be skewed in a particular direction even though that is no longer true. Data from the 1800s may show a lot of early deaths due to diseases that are now cured. If a machine learning model were trained on that data, it would think that cholera is a serious problem in todays age even though this is not true.
The third type of bias is Selection bias.  Only selecting items or representations for a particular thing to be categorized from a subsection of the total set will lead to skewing of the data.  For example if I were to train a model to determine what images are cars, but I only train it in a country with predominately small cars, it will not be accurate in a country with larger cars or trucks.
Introducing any of these types of bias could negatively impact the functioning on an application using the model because the biased data could cause negative outcomes for the population that it was not trained on. A recent example of this was the issue with Google facial recognition incorrectly categorizing photos of Black people into incorrect categories.  This could be hurtful or dangerous for people being categorized in this way. (Small, 2023)

Small, Z. (2023, July 4). Black artists say A.I. shows bias, with algorithms erasing their history. The New York Times. https://www.nytimes.com/2023/07/04/arts/design/black-artists-bias-ai.html 
Xiang, M. (2019, November 6). Bias: What it means in the Big Data World. Medium. https://towardsdatascience.com/bias-what-it-means-in-the-big-data-world-6e64893e92a1 