In [2]:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from pathlib import Path
import os
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Dense, GlobalAveragePooling2D, Input
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.applications import efficientnet_v2
from sklearn.preprocessing import LabelEncoder

In [3]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [4]:
tf.version

<module 'tensorflow._api.v2.version' from '/Users/rudra/ml-env/env/lib/python3.9/site-packages/tensorflow/_api/v2/version/__init__.py'>

In [5]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [6]:
path  = Path('/Users/rudra/Tech/ML_Work/Datasets/archive')
path

PosixPath('/Users/rudra/Tech/ML_Work/Datasets/archive')

In [7]:
train_df = pd.read_csv(path/'train_cultivar_mapping.csv')

In [8]:
train_df.head()

Unnamed: 0,image,cultivar,file_path,is_exist
0,2017-06-16__12-24-20-930.jpeg,PI_257599,../input/sorghum-id-fgvc-9/train_images/2017-0...,True
1,2017-06-02__16-48-57-866.jpeg,PI_154987,../input/sorghum-id-fgvc-9/train_images/2017-0...,True
2,2017-06-12__13-18-07-707.jpeg,PI_92270,../input/sorghum-id-fgvc-9/train_images/2017-0...,True
3,2017-06-22__13-18-06-841.jpeg,PI_152651,../input/sorghum-id-fgvc-9/train_images/2017-0...,True
4,2017-06-26__12-56-48-642.jpeg,PI_176766,../input/sorghum-id-fgvc-9/train_images/2017-0...,True


In [9]:
train_df.is_exist.unique()

array([ True])

In [10]:
IMG_SIZE = 256
BATCH_SIZE = 32
EPOCHS = 30

In [10]:
train_datagen = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.15,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2
)

In [11]:
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=path/'train',
    x_col='image',
    y_col='cultivar',
    class_mode='sparse',
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    subset='training',
    shuffle=True
)

valid_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=path/'train',
    x_col='image',
    y_col='cultivar',
    class_mode='sparse',
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    subset='validation',
    shuffle=True
)

Found 17755 validated image filenames belonging to 100 classes.
Found 4438 validated image filenames belonging to 100 classes.


In [12]:
early_stopping_callback = EarlyStopping(monitor='val_loss', verbose=1, patience=3)

checkpoint_callback = ModelCheckpoint(filepath='sorghum_keras_model.h5', monitor='val_accuracy', verbose=1, save_freq='epoch', 
                                                    save_best_only=True, save_weights_only=True, period=1)

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, mode=min, patience=3, min_lr=1e-5)



In [16]:
base_model = efficientnet_v2.EfficientNetV2B3(weights='imagenet', include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))
base_model.trainable = True

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/efficientnet_v2/efficientnetv2-b3_notop.h5


In [17]:
inputs = Input(shape=(IMG_SIZE, IMG_SIZE, 3))
output = base_model(inputs)
output = GlobalAveragePooling2D()(output)
output = Dense(100, activation='softmax')(output)

model = keras.Model(inputs, output)

model.summary()

model.compile(loss='sparse_categorical_crossentropy', optimizer=keras.optimizers.Adam(), metrics=['accuracy'])

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 256, 256, 3)]     0         
                                                                 
 efficientnetv2-b3 (Function  (None, 8, 8, 1536)       12930622  
 al)                                                             
                                                                 
 global_average_pooling2d_1   (None, 1536)             0         
 (GlobalAveragePooling2D)                                        
                                                                 
 dense_1 (Dense)             (None, 100)               153700    
                                                                 
Total params: 13,084,322
Trainable params: 12,975,106
Non-trainable params: 109,216
_________________________________________________________________


In [18]:

history = model.fit(train_generator, validation_data=valid_generator, epochs=EPOCHS, callbacks=[checkpoint_callback, early_stopping_callback, reduce_lr])

Epoch 1/30


2022-04-30 23:31:01.813181: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




2022-05-01 07:04:07.054772: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.



Epoch 1: val_accuracy improved from -inf to 0.54912, saving model to sorghum_keras_model.h5
Epoch 2/30
Epoch 2: val_accuracy improved from 0.54912 to 0.72713, saving model to sorghum_keras_model.h5
Epoch 3/30
Epoch 3: val_accuracy improved from 0.72713 to 0.73006, saving model to sorghum_keras_model.h5
Epoch 4/30
Epoch 4: val_accuracy improved from 0.73006 to 0.84205, saving model to sorghum_keras_model.h5
Epoch 5/30
Epoch 5: val_accuracy improved from 0.84205 to 0.87494, saving model to sorghum_keras_model.h5
Epoch 6/30
Epoch 6: val_accuracy did not improve from 0.87494
Epoch 7/30
Epoch 7: val_accuracy improved from 0.87494 to 0.88846, saving model to sorghum_keras_model.h5
Epoch 8/30
Epoch 8: val_accuracy improved from 0.88846 to 0.88982, saving model to sorghum_keras_model.h5
Epoch 9/30
Epoch 9: val_accuracy improved from 0.88982 to 0.89950, saving model to sorghum_keras_model.h5
Epoch 10/30
Epoch 10: val_accuracy improved from 0.89950 to 0.91595, saving model to sorghum_keras_mode

In [19]:
model.save('Sorghum_keras_best_model.h5')

In [12]:
model = keras.models.load_model('/Users/rudra/Tech/ML_Work/Sorghum_keras_best_model.h5')

Metal device set to: Apple M1

systemMemory: 8.00 GB
maxCacheSize: 2.67 GB



2022-05-01 16:20:25.330170: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-05-01 16:20:25.330433: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [13]:
le = LabelEncoder()
le.fit(train_df.cultivar)
train_df['target'] = le.transform(train_df.cultivar)

In [14]:
train_df.head()

Unnamed: 0,image,cultivar,file_path,is_exist,target
0,2017-06-16__12-24-20-930.jpeg,PI_257599,../input/sorghum-id-fgvc-9/train_images/2017-0...,True,73
1,2017-06-02__16-48-57-866.jpeg,PI_154987,../input/sorghum-id-fgvc-9/train_images/2017-0...,True,29
2,2017-06-12__13-18-07-707.jpeg,PI_92270,../input/sorghum-id-fgvc-9/train_images/2017-0...,True,99
3,2017-06-22__13-18-06-841.jpeg,PI_152651,../input/sorghum-id-fgvc-9/train_images/2017-0...,True,6
4,2017-06-26__12-56-48-642.jpeg,PI_176766,../input/sorghum-id-fgvc-9/train_images/2017-0...,True,50


In [15]:
test_filenames = os.listdir(path/'test')
test_df = pd.DataFrame({'filename' : test_filenames})

test_datagen = ImageDataGenerator()

test_generator = test_datagen.flow_from_dataframe(
    dataframe = test_df,
    directory = path/'test',
    x_col = 'filename',
    y_col = None,
    class_mode = None,
    target_size = (IMG_SIZE, IMG_SIZE),
    batch_size = BATCH_SIZE,
    shuffle = False
)

Found 23639 validated image filenames.


In [16]:
classes = list(train_df.cultivar.unique())
len(classes)

100

In [17]:
preds = model.predict(test_generator)
pred_idx = np.argmax(preds, axis=1)
#predictions = [classes[i] for i in pred_idx]

2022-05-01 16:26:07.541950: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2022-05-01 16:26:08.258395: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


In [18]:
type(pred_idx)

numpy.ndarray

In [19]:
#test_df['cultivar'] = predictions
test_df['cultivar'] = le.inverse_transform(list(pred_idx))

In [20]:
test_df.filename = test_df.filename.map(lambda x: x.split('.jpeg')[0] + '.png')

In [21]:
test_df.head()

Unnamed: 0,filename,cultivar
0,1838798748.png,PI_218112
1,42096263.png,PI_152860
2,316490365.png,PI_329299
3,1091940264.png,PI_152828
4,470001726.png,PI_329301


In [27]:
test_df.to_csv('submission.csv', index=False)