In [1]:
import pandas as pd
from models import base_res_net, small_res_net, efficient_net, pretrained_mobilenet
import tensorflow as tf
import tensorflow.compat.v1 as tfc
from sklearn.utils.class_weight import compute_class_weight

from src import InputPipeline

%load_ext autoreload
%autoreload 2

## Check GPU for tf

In [10]:
# Some GPU setup
# for documentation about using gpus refer to: https://www.tensorflow.org/install/pip#windows-wsl2

tf.keras.backend.clear_session()

device_name = tf.test.gpu_device_name()
if not device_name:
  raise SystemError('GPU device not found')
try:  # prevent a previous session from being alive
  sess.close() 
except:
  pass

tfc.enable_eager_execution()
gpu_options= tfc.GPUOptions(per_process_gpu_memory_fraction = 0.8)
sess = tfc.InteractiveSession(config=tfc.ConfigProto(gpu_options=gpu_options))

### Optional Stuff

In [11]:
train_df = pd.read_csv("../data/train_images_stratified.csv")

In [12]:
class_weights = compute_class_weight(class_weight = 'balanced', classes = train_df["label"].unique(), y=train_df["label"])
class_weights = dict(enumerate(class_weights))

## Configure Parameters

In [13]:
INPUT_SHAPE = (150,150,3)

CONF = {
  "learning_rate": 0.00005,
  "batch_size": 16,
  "epochs": 50,
  "loss_function": "sparse_categorical_crossentropy",
  "metric": "accuracy",
}

## Make Input Pipelines

In [24]:
# Input pipeline for subspecies
sub_species_input_pipeline = InputPipeline(splits=(0.85,0.0,0.15), channels=3, batch_size=CONF["batch_size"], size=INPUT_SHAPE[:2])
sub_species_input_pipeline.make_stratified_train_dataset(
  train_ds_path= "../data/train_ds_images_stratified.csv",
  val_ds_path="../data/val_ds_images_stratified.csv",
)

# Input pipeline for species
species_input_pipeline = InputPipeline(splits=(0.85,0.0,0.15), channels=3, batch_size=CONF["batch_size"], size=INPUT_SHAPE[:2])
species_input_pipeline.make_train_datasets(directory="../data/train_images/species_classify")

Found 7852 files belonging to 70 classes.
Using 6675 files for training.
Using 1177 files for validation.
Datasets populated!


In [25]:
sub_species_cached_train, sub_species_cached_val = sub_species_input_pipeline.get_cached_train_datasets()
species_cached_train, species_cached_val = species_input_pipeline.get_cached_train_datasets()

## Training a Model

In [30]:
from modeling import train_classifier

In [None]:
# making the subspecies classifier
train_classifier(
  model_name="../classifiers/subspecies_pretrained_efficientnet_classifier",
  input_shape=INPUT_SHAPE,
  classes_to_classify=200,
  configuration=CONF,
  model=efficient_net, 
  train_dataset=sub_species_cached_train,
  validation_dataset=sub_species_cached_val,
  class_weights=class_weights,
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50

In [None]:
# making the species classifier
train_classifier(
  model_name="classifiers/species_pretrained_mobinet_classifier",
  input_shape=INPUT_SHAPE,
  classes_to_classify=70,
  configuration=CONF,
  model=pretrained_mobilenet, 
  train_dataset=species_cached_train,
  validation_dataset=species_cached_val,
)

## Predict Stuff

In [14]:
from modeling import predict
import pickle

In [15]:
with open("mapping.pickle", "rb+") as f:
  mapping = pickle.load(f)

In [16]:
sub_species_input_pipeline.make_test_dataset(directory="data/test_images/test_images")
test_cached = sub_species_input_pipeline.get_cached_test_datasets()

Found 4000 files belonging to 1 classes.


In [18]:
predict(
  species_classifier="species_pretrained_mobinet_classifier",
  subspecies_classifier="subspecies_pretrained_mobinet_classifier",
  dataset=test_cached,
  species_subspecies_dict=mapping
)



combining results: 100%|██████████| 4000/4000 [05:25<00:00, 12.30it/s]

Saving to: data/test_images_sample_1700690885.6570406.csv



