In [None]:

import torch
print(torch.cuda.is_available())
print(torch.cuda.get_device_capability())

In [None]:
%tensorflow_version 2.x

TensorFlow 2.x selected.


In [None]:
import tensorflow as tf
print(tf.__version__)

2.1.0


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!cp "/content/drive/My Drive/koulu_honmia/kandi19/IDA.zip" /content/

In [None]:
!cp "/content/drive/My Drive/koulu_honmia/kandi19/benthic/loadbm.py" /content/

In [None]:
%%capture
!unzip IDA.zip

In [None]:
!rm IDA.zip

In [None]:
import numpy as np
import os
import ntpath
import platform
import matplotlib.pyplot as plt
from loadbm import create_df, create_tf_dataset, prepare_for_training


datapath = 'IDA/Separate lists with numbering/Machine learning splits'
img_path = 'IDA/Images/'

split = 1

train_fname = 'train'+str(split)+'.txt'
test_fname = 'test'+str(split)+'.txt'
val_fname = 'val'+str(split)+'.txt'

part_dat = False

df_train = create_df(os.path.join(datapath, train_fname),
                     img_path,
                     partial_dataset=part_dat,
                     seed=123)

df_test = create_df(os.path.join(datapath, test_fname),
                     img_path,
                     partial_dataset=part_dat,
                     seed=123)

df_val = create_df(os.path.join(datapath, val_fname),
                     img_path,
                     partial_dataset=part_dat,
                     seed=123)

from sklearn.utils import shuffle

df_train = shuffle(df_train)
df_val = shuffle(df_val)

df_train.head()

Unnamed: 0,path,label
313821,IDA/Images/Taeniopteryx_neb191/0-Taeniopteryx_...,39
37881,IDA/Images/Baetis_rho77/0-Baetis_rho77.1.2016-...,4
18349,IDA/Images/Amphinemura_bor211/1-Amphinemura_bo...,3
265095,IDA/Images/Polycentropus_fla82/1-Polycentropus...,31
184933,IDA/Images/Limnius_vol113/0-Limnius_vol113.1.2...,22


In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
IMSIZE = (224,224,3)
BATCH_SIZE = 32

train_ds = create_tf_dataset(df_train, imsize=IMSIZE, onehot=True)

val_ds = create_tf_dataset(df_val, imsize=IMSIZE, onehot=True)


train_ds = prepare_for_training(train_ds, 
                                shuffle_buffer_size=1000,
                                batch_size=BATCH_SIZE)

val_ds = prepare_for_training(val_ds, 
                              shuffle_buffer_size=1000,
                              batch_size=BATCH_SIZE)

for image, label in train_ds.take(5):
    print(image.shape)
    print(label.shape)


(32, 224, 224, 3)
(32, 39)
(32, 224, 224, 3)
(32, 39)
(32, 224, 224, 3)
(32, 39)
(32, 224, 224, 3)
(32, 39)
(32, 224, 224, 3)
(32, 39)


In [None]:
import tensorflow.keras.backend as K

def weighted_crossentropy(alpha=1.0):

    def loss(y,yhat):
        
      y = K.cast(y, tf.float32)
      yhat = tf.convert_to_tensor(yhat, dtype=y.dtype)

      w_ce = -K.sum(y*K.log(yhat+1e-7)*alpha, axis=1)
      
      return w_ce

    return loss

alpha = np.histogram(df_train['label'].values,bins=39)[0]
alpha = 1-(alpha/np.sum(alpha))

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D


def get_pretrained(imsize=(224, 224, 3), classes=39):
        base_model = InceptionV3(input_shape = imsize, 
                                 weights='imagenet', 
                                 include_top=False)
        
        base_model.trainable = True
        
        x = base_model.output
        x = GlobalAveragePooling2D()(x)
        x = Dense(256, activation='relu')(x)
        predictions = Dense(classes, activation='softmax')(x)
        
        model = Model(inputs=base_model.input, outputs=predictions)
        
        return model

model = get_pretrained()

loss = weighted_crossentropy(alpha=alpha) 

model.compile(optimizer = 'adam', loss = loss,
                  metrics=['accuracy'])

In [None]:
from tensorflow.keras.callbacks import CSVLogger
import datetime


tr_steps = len(df_train)//BATCH_SIZE
val_steps = len(df_val)//BATCH_SIZE

model.fit(train_ds, 
          validation_data= val_ds, 
          steps_per_epoch= tr_steps, 
          epochs = 10,
          validation_steps = val_steps)


Train for 10043 steps, validate for 1434 steps
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f106df59940>

In [None]:
from tensorflow.keras.optimizers import Adam
adam = Adam(learning_rate=0.0001) #original 0.001

from tensorflow.keras.callbacks import LearningRateScheduler
def scheduler(epoch):
    return 0.0001 * tf.math.exp(0.1 * (-epoch))

lr_cb = LearningRateScheduler(scheduler)

model.compile(optimizer = adam, loss = loss,
                  metrics=['accuracy'])

model.fit(train_ds, 
          validation_data= val_ds, 
          steps_per_epoch= tr_steps, 
          epochs = 5,
          validation_steps = val_steps,
          callbacks=[lr_cb])

Train for 10043 steps, validate for 1434 steps
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f106a577550>

In [None]:
model.history.history

{'accuracy': [0.9992843, 0.99980086, 0.9998662, 0.9999191, 0.9999191],
 'loss': [0.0022917723968919447,
  0.0006465482551303102,
  0.00042399666984836807,
  0.000312004303457951,
  0.000207307769795357],
 'lr': [1e-04, 9.0483736e-05, 8.187308e-05, 7.4081814e-05, 6.7032e-05],
 'val_accuracy': [0.9242068, 0.9254053, 0.9271487, 0.929742, 0.9319212],
 'val_loss': [0.43026168601172565,
  0.4725120326437556,
  0.4718519926186389,
  0.44853310993689793,
  0.4439209270419437]}

In [None]:
model.save('34-xx-2020.h5')

In [None]:
!cp '34-xx-2020.h5' "/content/drive/My Drive/koulu_honmia/kandi19/"

Evaluation:

In [None]:
test_ds = create_tf_dataset(df_test, imsize=IMSIZE, onehot=True)
test_ds = test_ds.batch(BATCH_SIZE)

In [None]:
!cp "/content/drive/My Drive/koulu_honmia/kandi19/benthic/combine_insects.py" /content/

In [None]:
preds = model.predict(test_ds, verbose=True)
yhat = np.argmax(preds,axis=1)+1
y_test = df_test['label']

acc = np.sum(yhat==y_test)/len(y_test)
print('Image accuracy: {:.4f}'.format(acc))

np.save('34-xx-2020.npy', preds)

Image accuracy: 0.9281


In [None]:
!cp '34-xx-2020.npy' "/content/drive/My Drive/koulu_honmia/kandi19/"

In [None]:
#%% Insect combine
from combine_insects import add_insect_class, add_yhat

df_test_preds = add_insect_class(df_test)

# adding predictions to dataframe for insect-wise prediction
df_test_preds = add_yhat(df_test_preds,yhat)

dfg = df_test_preds.groupby(['label','insect'],as_index=False)['pred'].agg(lambda x:x.value_counts().index[0])

acc_g = np.sum(dfg['pred']==dfg['label'])/len(dfg)
print('Aggregate accuracy: {:.4f}'.format(acc_g))

Aggregate accuracy: 0.9601


In [None]:
model.history.history

{'accuracy': [0.9992843, 0.99980086, 0.9998662, 0.9999191, 0.9999191],
 'loss': [0.0022917723968919447,
  0.0006465482551303102,
  0.00042399666984836807,
  0.000312004303457951,
  0.000207307769795357],
 'lr': [1e-04, 9.0483736e-05, 8.187308e-05, 7.4081814e-05, 6.7032e-05],
 'val_accuracy': [0.9242068, 0.9254053, 0.9271487, 0.929742, 0.9319212],
 'val_loss': [0.43026168601172565,
  0.4725120326437556,
  0.4718519926186389,
  0.44853310993689793,
  0.4439209270419437]}