In [None]:
import pandas as pd
import tensorflow as tf
import tensorflow.compat.v1 as tfc
from sklearn.utils.class_weight import compute_class_weight
from tensorflow import keras
from modeling import finetune_classifier

from modeling import stacking_from_csv, predict_from_csv
import pickle
from src import InputPipeline, make_finetune_curves

%load_ext autoreload
%autoreload 2

## Check GPU for tf

In [None]:
# Some GPU setup
# for documentation about using gpus refer to: https://www.tensorflow.org/install/pip#windows-wsl2

tf.keras.backend.clear_session()

device_name = tf.test.gpu_device_name()
if not device_name:
  raise SystemError('GPU device not found')
try:  # prevent a previous session from being alive
  sess.close() 
except:
  pass

tfc.enable_eager_execution()
gpu_options= tfc.GPUOptions(per_process_gpu_memory_fraction = 0.90)
sess = tfc.InteractiveSession(config=tfc.ConfigProto(gpu_options=gpu_options))

### Optional Stuff
- Here we compute classweights since the data is very imbalanced

In [None]:
train_df = pd.read_csv("../data/train_images_stratified.csv")

In [None]:
class_weights = compute_class_weight(class_weight = 'balanced', classes = train_df["label"].unique(), y=train_df["label"])
class_weights = dict(enumerate(class_weights))

## Configure Parameters

In [None]:
INPUT_SHAPE = (220, 220,3)

CONF = {
  "learning_rate": 0.0001,
  "batch_size": 18,
  "epochs": 5,
  "loss_function": "sparse_categorical_crossentropy",
  "metric": "sparse_categorical_accuracy",
}

## Make Input Pipelines

In [None]:
# Input pipeline for subspecies
sub_species_input_pipeline = InputPipeline(splits=(0.8,0.0,0.2), channels=3, batch_size=CONF["batch_size"], size=INPUT_SHAPE[:2])
# This has a stratified split
sub_species_input_pipeline.make_stratified_train_dataset(
  train_ds_path= "../data/train_ds_images_stratified.csv",
  val_ds_path="../data/val_ds_images_stratified.csv",
)


# Input pipeline for species
species_input_pipeline = InputPipeline(splits=(0.85,0.0,0.15), channels=3, batch_size=CONF["batch_size"], size=INPUT_SHAPE[:2])
species_input_pipeline.make_train_datasets(directory="../data/train_images/species_classify")  # This doesnt have a stratified split


# Finetuning our models
## Species

In [None]:
finetune_classifier(
    model_path="../classifiers/species_effnet",
    configuration=CONF,
    train_dataset=sub_species_input_pipeline.train_dataset,
    validation_dataset=sub_species_input_pipeline.validation_dataset,
    class_weights=class_weights
                    )

In [None]:
with open("../classifiers/trainHistoryDict/species_effnet.pkl", "rb") as file:
    last_hist = pickle.load(file)
with open("../classifiers/trainHistoryDict/species_effnet_tuned.pkl", "rb") as file:
    first_hist = pickle.load(file)

make_finetune_curves(first_hist,last_hist)

In [None]:
predict_from_csv(
  classifier="../classifiers/species_effnet_tuned",
  dataset="../data/test_images_path.csv",
  path="../data/test_images",
  size=INPUT_SHAPE,
)

## Family Classifier

In [None]:
finetune_classifier(
    model_path="../classifiers/family_effnet",
    configuration=CONF,
    train_dataset=species_input_pipeline.train_dataset,
    validation_dataset=species_input_pipeline.validation_dataset,
                    )

In [None]:
with open("../classifiers/trainHistoryDict/family_effnet.pkl", "rb") as file:
    last_hist = pickle.load(file)
with open("../classifiers/trainHistoryDict/family_effnet_tuned.pkl", "rb") as file:
    first_hist = pickle.load(file)

make_finetune_curves(first_hist,last_hist)

In [None]:
stacking_from_csv(
    primary_classifier="../classifiers/family_effnet_tuned",
    secondary_classifier="../classifiers/species_effnet_tuned",
    dataset="../data/test_images_path.csv",
    path="../data/test_images",
    size=INPUT_SHAPE,
    weights=(0.6, 1.0),
    mapping="../mapping.pickle",
)