https://www.tensorflow.org/tutorials/structured_data/imbalanced_data#class_weights

https://medium.com/@zergtant/use-weighted-loss-function-to-solve-imbalanced-data-classification-problems-749237f38b75

In [1]:
import tensorflow as tf
import matplotlib.pylab as plt
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

In [2]:
print(tf.__version__)

2.14.1


In [3]:
filepath = 'covtype.data.csv'
df = pd.read_csv(filepath, header=None)

columns = ['Elevation', 'Aspect', 'Slope', 'Horizontal_Distance_To_Hydrology', 'Vertical_Distance_To_Hydrology', 
           'Horizontal_Distance_To_Roadways', 'Hillshade_9am', 'Hillshade_Noon', 'Hillshade_3pm', 'Horizontal_Distance_To_Fire_Points',
           'Wilderness_Area1', 'Wilderness_Area2', 'Wilderness_Area3', 'Wilderness_Area4',
           'Soil_Type1' ,'Soil_Type2' ,'Soil_Type3', 'Soil_Type4', 'Soil_Type5', 
           'Soil_Type6', 'Soil_Type7', 'Soil_Type8', 'Soil_Type9', 'Soil_Type10', 
           'Soil_Type11' ,'Soil_Type12' ,'Soil_Type13', 'Soil_Type14', 'Soil_Type15', 
           'Soil_Type16', 'Soil_Type17', 'Soil_Type18', 'Soil_Type19', 'Soil_Type20', 
           'Soil_Type21' ,'Soil_Type22' ,'Soil_Type23', 'Soil_Type24', 'Soil_Type25', 
           'Soil_Type26', 'Soil_Type27', 'Soil_Type28', 'Soil_Type29', 'Soil_Type30', 
           'Soil_Type31' ,'Soil_Type32' ,'Soil_Type33', 'Soil_Type34', 'Soil_Type35', 
           'Soil_Type36', 'Soil_Type37', 'Soil_Type38', 'Soil_Type39', 'Soil_Type40', 'Cover_Type']
target = 'Cover_Type'

df.columns = columns
df[target] = df[target] - 1 # recode the integer values
df = df.astype(float) # convert all to float
print(df.shape)

(581012, 55)


In [4]:
# This split is according to original work
'''
df_list = []
for c in range(0,len(df[target].unique())):
    df_list.append(df.loc[df[target] == c]) # separate rows according to their classes

random_state = 1
n_train = 1620
n_vald = 540
#tmp = df_list[0].sample(n=n_train, random_state=0)
#print(tmp)
np_train = np.empty((0,len(columns)))
np_vald = np.empty((0,len(columns)))
np_test = np.empty((0,len(columns)))
for df_ in df_list:
    df_tr = df_.sample(n=n_train, random_state=random_state) # randomly get training instances
    np_train = np.vstack((np_train, df_tr.values))
    #np_train.append(df_tr.values)
    df_ = df_.drop(index=df_tr.index) # drop training instances
    
    df_va = df_.sample(n=n_vald, random_state=random_state) # randomly get validation instances
    np_vald = np.vstack((np_vald, df_va.values))
    #np_vald.append(df_va.values)
    df_ = df_.drop(index=df_va.index) # drop validation instances
    
    np_test = np.vstack((np_test, df_.values))
    # np_test.append(df_) # the remaining is test instances   

y_train = np_train[:,-1]
y_vald = np_vald[:,-1]
y_test = np_test[:,-1]
X_train = np_train[:,:-1]
X_vald = np_vald[:,:-1]
X_test = np_test[:,:-1]
'''
from sklearn.model_selection import train_test_split

y = df[target].values.astype(np.float32)
X = df.drop(columns=[target]).values

# This split is according to Tab Survey (Borisov et al., 2022)
X_tmp, X_test, y_tmp, y_test = train_test_split(X, y, train_size=0.7, random_state=0)
X_train, X_vald, y_train, y_vald = train_test_split(X_tmp, y_tmp, train_size=0.9, random_state=0)

print(f'Training set: {X_train.shape}')
print(f'Validation set: {X_vald.shape}')
print(f'Test set: {X_test.shape}')

Training set: (366037, 54)
Validation set: (40671, 54)
Test set: (174304, 54)


In [5]:
print(np.unique(y_train, return_counts=True))
print(np.unique(y_vald, return_counts=True))
print(np.unique(y_test, return_counts=True))

(array([0., 1., 2., 3., 4., 5., 6.], dtype=float32), array([133505, 178299,  22654,   1745,   5973,  10941,  12920],
      dtype=int64))
(array([0., 1., 2., 3., 4., 5., 6.], dtype=float32), array([14837, 19804,  2519,   180,   670,  1197,  1464], dtype=int64))
(array([0., 1., 2., 3., 4., 5., 6.], dtype=float32), array([63498, 85198, 10581,   822,  2850,  5229,  6126], dtype=int64))


Dataset is imbalanced. Calculate the weights for each class. The weights will be used during training to make the model pay more attention to under-represented classes

Let $w_c$ denotes the weight of class $c$. The weight is calculated as follows:

$w_c = \frac{1}{N_c} \times \frac{N}{C} = \frac{N}{N_c \times C}$

where $N$ is the total number of instances, $N_c$ is the number of instances for class $c$ and $C$ is the number of classes.

In [6]:
'''
num_target_instances = np.bincount(y_train.astype(np.int64))
N = np.sum(num_target_instances)
C = len(num_target_instances)

class_weight = {}
for c,s in enumerate(num_target_instances):
    w = (1 / s) * (N / C)
    class_weight[c] = w
print(class_weight)
'''

'\nnum_target_instances = np.bincount(y_train.astype(np.int64))\nN = np.sum(num_target_instances)\nC = len(num_target_instances)\n\nclass_weight = {}\nfor c,s in enumerate(num_target_instances):\n    w = (1 / s) * (N / C)\n    class_weight[c] = w\nprint(class_weight)\n'

In [7]:
def array_to_dataset(data, target, shuffle=True, batch_size=128):
    ds = tf.data.Dataset.from_tensor_slices((data, target))
    if shuffle:
        ds = ds.shuffle(batch_size*2).batch(batch_size).prefetch(batch_size)
    else:
        ds = ds.batch(batch_size)
    return ds

batch_size = 2048
train_ds = array_to_dataset(X_train, y_train, batch_size=batch_size)
vald_ds = array_to_dataset(X_vald, y_vald, shuffle=False, batch_size=batch_size)
test_ds = array_to_dataset(X_test, y_test, shuffle=False, batch_size=batch_size)

In [25]:
## Multi-head/batch Attention
'''
b = 16
d = 32
inp = tf.random.uniform((b, d)) # (batch, n_features)
print(inp.shape)

c = 3
h = 10
w = tf.random.uniform((h, d, c)) # (head, n_features, class)
print(w.shape)

z = tf.matmul(inp, w) # (batch, n_features) dot (head, n_features, class) = (head, batch, class)
print(z.shape)

z = tf.nn.softmax(z, axis=-1)
print(z.shape)

r = 5.24
w = tf.math.exp(w * r) # amplify weights; (head, n_features, class)
w_ = tf.transpose(w, perm=(0, 2, 1))
print(w_.shape) # (head, class, n_features)

a = tf.matmul(z, w_) # (head, batch, class) dot (head, class, n_features) = (head, batch, n_features)
print(a.shape) # (head, batch, n_features)
#a = tf.reduce_mean(a, axis=1) # (head, n_features)
print(a.shape)



## Multi-head IFE
input_data = tf.random.normal((b, d))
print(f'input shape: {input_data.shape}')
input_scores = [tf.random.normal((h, b, d, 1)) for j in range(0,d)]
input_scores = tf.concat(input_scores, axis=-1) # shape = (head, batch, n_features, n_features)
print(f'score shape: {input_scores.shape}')
#input_scores = tf.reduce_mean(input_scores, axis=[-1, 0]) # shape = (batch, n_features)
#input_scores = tf.nn.softmax(input_scores, axis=-1) # shape = (batch, n_features)
#print(f'score shape: {input_scores.shape}')

# Instead of calculating mean across dimension -1 (n_features) and 0 (head), calculate mean across n_features only
input_scores = tf.reduce_mean(input_scores, axis=[-1]) # shape = (head, batch, n_features)
print(f'score shape: {input_scores.shape}')

# input_data = tf.expand_dims(input_data, axis=0)
input_data = tf.broadcast_to(input_data, [h, b, d])
print(f'input shape: {input_data.shape}')

weighted_inputs = input_data * input_scores
#weighted_inputs = tf.reduce_mean(weighted_inputs, axis=[0])
#print(f'weighted_inputs shape: {weighted_inputs.shape}')

x = tf.transpose(weighted_inputs, perm=(1,0,2))
print(x.shape)
x = tf.reshape(x, shape=(b, -1))
print(x.shape)
x = tf.keras.layers.Dense(units=32)(x)
print(x.shape)
'''

(16, 32)
(10, 32, 3)
(10, 16, 3)
(10, 16, 3)
(10, 3, 32)
(10, 16, 32)
(10, 16, 32)
input shape: (16, 32)
score shape: (10, 16, 32, 32)
score shape: (10, 16, 32)
input shape: (10, 16, 32)
(16, 10, 32)
(16, 320)
(16, 32)


In [9]:
import IterativeFeatureExclusion as IFE

n_features = X_train.shape[1]
_, counts = np.unique(y_train, return_counts=True)
n_classes = len(counts)
ife_num_layers = 1
clf_hidden_size = 65
num_att = 8
r = 5.9675

print(f'n_classes: {n_classes}')
print(f'n_features: {n_features}')

ife_params = {'n_features': n_features,
              'n_classes': n_classes,
              'num_att': num_att,
              'r': r,
              'ife_num_layers': ife_num_layers, 
              'clf_hidden_size': clf_hidden_size,              
             }
model = IFE.IFENetClassifier(**ife_params)
# model = model.build(input_shape=(n_features,))

input_data = tf.random.normal((batch_size, n_features))
model(input_data)

n_classes: 7
n_features: 54


<tf.Tensor: shape=(2048, 7), dtype=float32, numpy=
array([[0.05578536, 0.06902854, 0.09311974, ..., 0.20309812, 0.23704173,
        0.24767171],
       [0.14953183, 0.11989626, 0.1103613 , ..., 0.17568758, 0.12103122,
        0.25354376],
       [0.114999  , 0.08090202, 0.09831442, ..., 0.18825532, 0.2860886 ,
        0.14139229],
       ...,
       [0.08044065, 0.10640021, 0.07542013, ..., 0.2559447 , 0.2230173 ,
        0.18521957],
       [0.07555072, 0.11363897, 0.11424036, ..., 0.14962895, 0.10252135,
        0.37869936],
       [0.17106344, 0.07528776, 0.13500437, ..., 0.1920265 , 0.10672691,
        0.25328627]], dtype=float32)>

In [10]:
'''
import optuna
from tqdm.keras import TqdmCallback

def build_model(batch_size=None, hidden_units=None, drop_rate=None, input_shape=None, output_units=None):
#def build_model(ife_params=None):
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Dense(batch_size, input_shape=(input_shape,), activation='relu'))
    model.add(tf.keras.layers.Dense(hidden_units, activation='relu'))
    model.add(tf.keras.layers.Dropout(drop_rate))
    model.add(tf.keras.layers.Dense(output_units, activation='softmax'))
    # model = IFE.IFENetClassifier(**ife_params)
    return model

# utility function to create model trials
def create_model(trial):
    # We optimize the numbers of layers, their units and learning rates 
    #n_layers = trial.suggest_int("n_layers", 1, 5)
    hidden_units = trial.suggest_int("hidden_size", 64,68)
    #num_att = trial.suggest_int('num_att', 8, 24)
    #r = trial.suggest_float('drop_rate', 2.0, 7.0)
    #clf_hidden_size = trial.suggest_int('clf_hidden_size', 64, 92)
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 0.02)

    ife_params = {'n_features': n_features,
                  'n_classes': n_classes,
                  'num_att': num_att,
                  'r': r,
                  'ife_num_layers': ife_num_layers, 
                  'clf_hidden_size': clf_hidden_size,              
                 }

    model = build_model(batch_size=batch_size, hidden_units=hidden_units, drop_rate=drop_rate, input_shape=n_features, output_units=n_classes)
    #model = build_model(ife_params=ife_params)
    
    # compile the model
    model.compile(loss='sparse_categorical_crossentropy',
                  optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                  metrics=['accuracy'])
    
    return model

# Objective function
def objective(trial):
    
    # instantiate model
    model_opt = create_model(trial)
    
    # fit the model
    model_opt.fit(train_ds, validation_data=vald_ds, epochs=epochs, verbose=0)
    
    # calculate accuracy score
    acc_score = model_opt.evaluate(test_ds, verbose=0)[1]
    
    return acc_score

epochs = 10
n_trials = 25
# perform the optimization
study = optuna.create_study(direction="maximize", study_name="baseline model optimization")
study.optimize(objective, n_trials=n_trials, n_jobs=6, show_progress_bar=True)
'''

'\nimport optuna\nfrom tqdm.keras import TqdmCallback\n\ndef build_model(batch_size=None, hidden_units=None, drop_rate=None, input_shape=None, output_units=None):\n#def build_model(ife_params=None):\n    model = tf.keras.models.Sequential()\n    model.add(tf.keras.layers.Dense(batch_size, input_shape=(input_shape,), activation=\'relu\'))\n    model.add(tf.keras.layers.Dense(hidden_units, activation=\'relu\'))\n    model.add(tf.keras.layers.Dropout(drop_rate))\n    model.add(tf.keras.layers.Dense(output_units, activation=\'softmax\'))\n    # model = IFE.IFENetClassifier(**ife_params)\n    return model\n\n# utility function to create model trials\ndef create_model(trial):\n    # We optimize the numbers of layers, their units and learning rates \n    #n_layers = trial.suggest_int("n_layers", 1, 5)\n    hidden_units = trial.suggest_int("hidden_size", 64,68)\n    #num_att = trial.suggest_int(\'num_att\', 8, 24)\n    #r = trial.suggest_float(\'drop_rate\', 2.0, 7.0)\n    #clf_hidden_size = t

In [11]:
#inputs = tf.keras.Input(shape=(n_features,))
#outputs = ifenet(inputs)
#model = tf.keras.models.Model(inputs=inputs, outputs=outputs)
#model.summary()

In [12]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()

lr = 0.005
lr_scheduler = tf.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=lr, 
                                                              decay_steps=2000,
                                                              decay_rate=0.95,
                                                              staircase=True)
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_scheduler)

checkpoint_path = 'checkpoints/ifeNet_cover.h5'
patience = 100
callbacks = [tf.keras.callbacks.EarlyStopping(patience=patience, monitor='val_loss'),
             tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, save_weights_only=True, monitor='val_accuracy')]

epochs = 10
model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy'])

In [13]:
saved_model_path = 'saved_model/ifeNet_cover.h5'
model.fit(train_ds, validation_data=vald_ds, epochs=epochs, callbacks=callbacks, verbose=2)
model.load_weights(checkpoint_path)
model.save_weights(saved_model_path)

Epoch 1/10
179/179 - 184s - loss: 0.6886 - accuracy: 0.7134 - val_loss: 2.0565 - val_accuracy: 0.2546 - 184s/epoch - 1s/step
Epoch 2/10


KeyboardInterrupt: 

In [None]:
y_pred = np.empty((0,))
y_test = np.empty((0,))

for data,label in test_ds:
    y_hat = model(data)
    y_hat = np.argmax(y_hat, axis=-1)
    y_pred = np.append(y_pred, y_hat.ravel())

    label = label.numpy()
    y_test = np.append(y_test, label.ravel())

In [None]:
print(accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

In [None]:
feat_scores = model.input_scores
for col,score in zip(columns,feat_scores):
    print(f'{col}: {score}')