## Create a experimental tracking system with W&B

Details about the set up [here](https://www.kaggle.com/code/ayuraj/experiment-tracking-with-weights-and-biases)

In [1]:
# get the key api from W&B for experimental tracking
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
wandb_api = user_secrets.get_secret("wandb_api") 

In [2]:
import wandb
from wandb.keras import WandbCallback
wandb.login(key=wandb_api)

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [3]:
import glob
import re

import numpy as np
import pandas as pd
import tensorflow as tf

from tensorflow import keras

from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.applications.xception import preprocess_input

from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras.callbacks import EarlyStopping

from sklearn.model_selection import train_test_split
import os

In [9]:
df_extra = pd.read_csv('../input/kitchenware-extra-images/data.csv')
df_extra['filename'] = '../input/kitchenware-extra-images/data/' + df_extra['Id'] + '.jpg'

In [11]:
df_org = pd.read_csv('../input/kitchenware-classification/train.csv', dtype={'Id': str})
df_org['filename'] = '../input/kitchenware-classification/images/' + df_org['Id'] + '.jpg'

In [12]:
df = pd.concat([df, df_extra])

In [14]:
# Since we want to make sure to have a good validation schema 
# we split the data into training, validation and testing
df_full_train, df_test = train_test_split(
    df, test_size=0.2, random_state=1,
    stratify=df['label']
)

df_train, df_val = train_test_split(
    df_full_train, test_size=0.25,
    random_state=1, stratify=df_full_train['label']
)

# train on df_train, validate in df_val
# if the results are good test on df_test
# if the results are good train on df_train and validate on df_test and make a sumbission

In [15]:
# set the model architecture

def make_model(learning_rate, droprate, input_shape, inner_layer):
    base_model = Xception(
        weights='imagenet',
        include_top=False,
        input_shape=(input_shape, input_shape, 3)
    )
    base_model.trainable = False

    inputs = keras.Input(shape=(input_shape, input_shape, 3))

    base = base_model(inputs, training=False)
    vectors = keras.layers.GlobalAveragePooling2D()(base)
    dense = keras.layers.Dense(inner_layer, activation='relu')(vectors)
    dropout = keras.layers.Dropout(droprate)(dense)
    outputs = keras.layers.Dense(6, activation="linear")(dropout)

    model = keras.Model(inputs, outputs)
    
    learning_rate = learning_rate
    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)

    loss = keras.losses.CategoricalCrossentropy(from_logits=True)

    model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
    
    return model

## Configure the parameters for the W&B dashboard

In [25]:
# Initialize W&B and specify the project and entity name
run = wandb.init(
        project='kitchenware-classification',name='eight-run-test',
        config={
        "learning_rate":0.001,
        "droprate": 0.2,
        "input_shape":550,
        "inner_layer": 50,
        "epochs":100,
        "batch_size":32,
        "loss_function":"crossentropy",
        "architecture":"CNN",
        "dataset":"Kitchenware-plus-extra"
        })
    
config = wandb.config

VBox(children=(Label(value='80.438 MB of 80.438 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, m…

0,1
accuracy,▁▆▆▇█▇██
epoch,▁▂▃▄▅▆▇█
loss,█▃▃▂▂▁▁▁
val_accuracy,▂▇▅▁▆▅█▄
val_loss,▇▃▁▃▃▅▇█

0,1
GFLOPS,5.84642
accuracy,0.9735
best_epoch,2.0
best_val_loss,0.18657
epoch,7.0
loss,0.07918
val_accuracy,0.93492
val_loss,0.21324


In [17]:
# Set up the early stopping callback with a patience of 5 epochs
early_stopping = EarlyStopping(monitor='val_loss', patience=5)

In [26]:
# data loaders
train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

train_generator = train_datagen.flow_from_dataframe(
    df_train,
    x_col='filename',
    y_col='label',
    target_size=(config.input_shape, config.input_shape),
    batch_size=config.batch_size,
)

# data loaders
val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

val_generator = val_datagen.flow_from_dataframe(
    df_val,
    x_col='filename',
    y_col='label',
    target_size=(config.input_shape, config.input_shape),
    batch_size=config.batch_size,
)

train_full_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

train_full_generator = train_full_datagen.flow_from_dataframe(
    df_full_train,
    x_col='filename',
    y_col='label',
    target_size=(config.input_shape, config.input_shape),
    batch_size=config.batch_size,
)

Found 4792 validated image filenames belonging to 6 classes.
Found 1598 validated image filenames belonging to 6 classes.
Found 6390 validated image filenames belonging to 6 classes.


In [27]:
checkpoint = keras.callbacks.ModelCheckpoint(
    'kitchenware_v5_{epoch:02d}_{val_accuracy:.3f}.h5',
    save_best_only=True,
    monitor='val_accuracy',
    mode='max'   
)

## Add the function to track the results of your model

In [28]:
model = make_model(learning_rate=config.learning_rate,droprate=config.droprate,
                   input_shape=config.input_shape, inner_layer=config.inner_layer)

# add the wandbCallback
model.fit(
    train_full_generator,
    epochs=config.epochs,
    validation_data=val_generator,
    callbacks=[checkpoint, early_stopping, WandbCallback()]
)

2023-01-17 00:08:18.932214: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-17 00:08:18.932618: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1
2023-01-17 00:08:18.932810: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session
2023-01-17 00:08:18.933428: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-17 00:08:18.933973: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-17 00:08:18.934328: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] su

Epoch 1/100




Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100


<keras.callbacks.History at 0x7f0772308510>

In [29]:
classes = np.array(list(train_full_generator.class_indices.keys()))

In [30]:
logits = model.predict(train_full_generator)
f_x = tf.nn.softmax(logits).numpy()

predictions = classes[f_x.argmax(axis=1)]
np.mean(df_train['label'].values == predictions)

  """


0.0

In [31]:
logits_val = model.predict(val_generator)

f_x_val = tf.nn.softmax(logits_val).numpy()

predictions_val = classes[f_x_val.argmax(axis=1)]
np.mean(df_test['label'].values == predictions_val)

0.17647058823529413

In [32]:
from sklearn.metrics import confusion_matrix

# Calculate confusion matrix
confusion_matrix = confusion_matrix(df_test['label'].values, predictions_val, labels=list(classes))

# Iterate over classes
for i, c in enumerate(classes):
    # Get number of correct and incorrect classifications for each class
    tp = confusion_matrix[i, i]
    fn = confusion_matrix[i, :].sum() - tp
    fp = confusion_matrix[:, i].sum() - tp
    tn = confusion_matrix.sum() - tp - fn - fp

    # Create string
    s = f"The number of correct classifications of {c} is {tp} and the number of incorrect classifications is {fn + fp}"
    print(s)

The number of correct classifications of cup is 77 and the number of incorrect classifications is 500
The number of correct classifications of fork is 11 and the number of incorrect classifications is 288
The number of correct classifications of glass is 27 and the number of incorrect classifications is 352
The number of correct classifications of knife is 42 and the number of incorrect classifications is 459
The number of correct classifications of plate is 67 and the number of incorrect classifications is 544
The number of correct classifications of spoon is 58 and the number of incorrect classifications is 489


# Create predictions based on your model

In [33]:
files = glob.glob("/kaggle/working/kitchenware_v5_*_*.h5")
#files = glob.glob("kitchenware_v4_*_*.h5")

In [34]:
predictions = [float(re.search("_([0-9]+\.[0-9]+)\.h5", file).group(1)) for file in files]

best_file = files[predictions.index(max(predictions))]

print(best_file)

/kaggle/working/kitchenware_v5_39_0.999.h5


In [None]:
model_test = keras.models.load_model(best_file)

In [None]:
df_test = pd.read_csv('../input/kitchenware-classification/test.csv', dtype={'Id': str})
df_test['filename'] = '../input/kitchenware-classification/images/' + df_test['Id'] + '.jpg'

In [None]:
# testing dataset

test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

test_generator = test_datagen.flow_from_dataframe(
    df_test,
    x_col='filename',
    class_mode='input',
    target_size=(250, 250),
    batch_size=32,
    shuffle=False
)

In [None]:
classes = np.array(list(train_full_generator.class_indices.keys()))

In [None]:
logits_test = model.predict(test_generator)

f_x_test = tf.nn.softmax(logits_test).numpy()

predictions_test = classes[f_x_test.argmax(axis=1)]


In [None]:
#y_pred = model_test.predict(test_generator)

#predictions = classes[y_pred.argmax(axis=1)]

df_submission = pd.DataFrame()
df_submission['filename'] = test_generator.filenames
df_submission['label'] = predictions_test

df_submission['Id'] = df_submission.filename.str[len('../input/kitchenware-classification/images/'):-4]
del df_submission['filename']
df_submission[['Id', 'label']].to_csv('submission.csv', index=False)

In [None]:
df_submission