In [1]:
import os
#from keras.layers import Conv2D, Flatten, Dense, MaxPool2D, BatchNormalization, GlobalAveragePooling2D
#from keras.applications.resnet50 import preprocess_input, decode_predictions
#from keras.preprocessing.image import ImageDataGenerator, load_img
#from keras.applications.resnet50 import ResNet50
#from keras.preprocessing import image
#from keras.models import Sequential
#from keras.models import Model
#from keras.callbacks import ModelCheckpoint, EarlyStopping
#import matplotlib.pyplot as plt
#import numpy as np
import joblib


from tensorflow.keras.layers import Conv2D, Flatten, Dense, MaxPool2D, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img
from tensorflow.keras.applications.resnet import ResNet101
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import Model
import matplotlib.pyplot as plt
import numpy as np

In [8]:
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, BatchNormalization, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
from datetime import datetime

def preprocess(train_data_dir, valid_data_dir, test_data_dir):
    img_height, img_width = (227, 227)  # AlexNet input size
    batch_size = 32

    train_datagen = ImageDataGenerator(rescale=1./255,
                                       shear_range=0.2,
                                       zoom_range=0.2,
                                       horizontal_flip=True,
                                       validation_split=0.4)

    train_generator = train_datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_height, img_width),
        batch_size=batch_size,
        class_mode='categorical',
        subset='training')

    valid_generator = train_datagen.flow_from_directory(
        valid_data_dir,
        target_size=(img_height, img_width),
        batch_size=batch_size,
        class_mode='categorical',
        subset='validation')

    test_generator = train_datagen.flow_from_directory(
        test_data_dir,
        target_size=(img_height, img_width),
        batch_size=1,
        class_mode='categorical',
        subset='validation')

    return train_generator, test_generator, valid_generator

def alexnet_model(input_shape, num_classes):
    input_layer = Input(shape=input_shape)

    # Convolutional layers
    x = Conv2D(96, (11, 11), strides=(4, 4), activation='relu')(input_layer)
    x = MaxPooling2D((3, 3), strides=(2, 2))(x)
    x = BatchNormalization()(x)

    x = Conv2D(256, (5, 5), padding='same', activation='relu')(x)
    x = MaxPooling2D((3, 3), strides=(2, 2))(x)
    x = BatchNormalization()(x)

    x = Conv2D(384, (3, 3), padding='same', activation='relu')(x)
    x = Conv2D(384, (3, 3), padding='same', activation='relu')(x)
    x = Conv2D(256, (3, 3), padding='same', activation='relu')(x)
    x = MaxPooling2D((3, 3), strides=(2, 2))(x)
    x = BatchNormalization()(x)

    # Fully connected layers
    x = Flatten()(x)
    x = Dense(4096, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(4096, activation='relu')(x)
    x = Dropout(0.5)(x)

    # Output layer
    predictions = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=input_layer, outputs=predictions)

    return model

def model_trainer(epochs=10):
    train_data_dir = "/workspace/data/data_dir/model-data/train"
    test_data_dir = "/workspace/data/data_dir/model-data/test"
    valid_data_dir = "/workspace/data/data_dir/model-data/val"

    train_generator, test_generator, valid_generator = preprocess(
        train_data_dir=train_data_dir,
        test_data_dir=test_data_dir,
        valid_data_dir=valid_data_dir)

    num_classes = train_generator.num_classes
    input_shape = (227, 227, 3)  # Assuming RGB images
    model = alexnet_model(input_shape, num_classes)

    model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

    checkpoint_callback = ModelCheckpoint('/workspace/data/data_dir/saved/best/best_model.h5',
                                          monitor='val_loss', save_best_only=True, verbose=1)
    early_stopping_callback = EarlyStopping(monitor='val_loss', patience=10, verbose=1)
    
    log_dir = "/workspace/data/data_dir/logs/" + datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_callback = TensorBoard(log_dir=log_dir)

    model.fit(
        train_generator,
        epochs=epochs,
        validation_data=valid_generator,
        callbacks=[checkpoint_callback, early_stopping_callback, tensorboard_callback]
    )

    return model

if __name__ == '__main__':
    alexnet_model = model_trainer(epochs=10)
    alexnet_model.save('/workspace/data/data_dir/saved/saved_model.h5')


Found 1051 images belonging to 3 classes.
Found 99 images belonging to 3 classes.
Found 200 images belonging to 3 classes.
Epoch 1/10
Epoch 1: val_loss improved from inf to 119.21871, saving model to /workspace/data/data_dir/saved/best/best_model.h5
Epoch 2/10
Epoch 2: val_loss improved from 119.21871 to 16.32347, saving model to /workspace/data/data_dir/saved/best/best_model.h5
Epoch 3/10
Epoch 3: val_loss improved from 16.32347 to 2.03184, saving model to /workspace/data/data_dir/saved/best/best_model.h5
Epoch 4/10
Epoch 4: val_loss improved from 2.03184 to 0.36670, saving model to /workspace/data/data_dir/saved/best/best_model.h5
Epoch 5/10
Epoch 5: val_loss did not improve from 0.36670
Epoch 6/10
Epoch 6: val_loss did not improve from 0.36670
Epoch 7/10
Epoch 7: val_loss improved from 0.36670 to 0.34433, saving model to /workspace/data/data_dir/saved/best/best_model.h5
Epoch 8/10
Epoch 8: val_loss did not improve from 0.34433
Epoch 9/10
Epoch 9: val_loss did not improve from 0.3443

In [9]:
%load_ext tensorboard

In [12]:
%tensorboard --logdir=/workspace/data/data_dir/logs/

Reusing TensorBoard on port 6008 (pid 153896), started 0:11:50 ago. (Use '!kill 153896' to kill it.)

In [1]:
!nvidia-smi

Thu Apr  4 07:57:14 2024       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.103.01   Driver Version: 470.103.01   CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  On   | 00000000:90:00.0 Off |                   On |
| N/A   32C    P0    49W / 400W |                  N/A |     N/A      Default |
|                               |                      |              Enabled |
+-------------------------------+----------------------+----------------------+

+-----------------------------------------------------------------------------+
| MIG devices:                                                                |
+------