In [17]:
import pandas as pd
from models import base_res_net, small_res_net, efficient_net, pretrained_mobilenet, big_model
import tensorflow as tf
import tensorflow.compat.v1 as tfc
from sklearn.utils.class_weight import compute_class_weight

from modeling import predict
from src import InputPipeline

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Check GPU for tf

In [2]:
# Some GPU setup
# for documentation about using gpus refer to: https://www.tensorflow.org/install/pip#windows-wsl2

tf.keras.backend.clear_session()

device_name = tf.test.gpu_device_name()
if not device_name:
  raise SystemError('GPU device not found')
try:  # prevent a previous session from being alive
  sess.close() 
except:
  pass

tfc.enable_eager_execution()
gpu_options= tfc.GPUOptions(per_process_gpu_memory_fraction = 0.8)
sess = tfc.InteractiveSession(config=tfc.ConfigProto(gpu_options=gpu_options))

SystemError: GPU device not found

### Optional Stuff
- Here we compute classweights since the data is very imbalanced

In [3]:
train_df = pd.read_csv("../data/train_images_stratified.csv")

In [4]:
class_weights = compute_class_weight(class_weight = 'balanced', classes = train_df["label"].unique(), y=train_df["label"])
class_weights = dict(enumerate(class_weights))

## Configure Parameters

In [11]:
INPUT_SHAPE = (224,224,3)

CONF = {
  "learning_rate": 0.00001,
  "batch_size": 16,
  "epochs": 2,
  "loss_function": "sparse_categorical_crossentropy",
  "metric": "categorical_accuracy",
}

## Make Input Pipelines

In [7]:
# Input pipeline for subspecies
sub_species_input_pipeline = InputPipeline(splits=(0.85,0.0,0.15), channels=3, batch_size=CONF["batch_size"], size=INPUT_SHAPE[:2])
# This has a stratified split
sub_species_input_pipeline.make_stratified_train_dataset(
  train_ds_path= "../data/train_ds_images_stratified.csv",
  val_ds_path="../data/val_ds_images_stratified.csv",
)
"""
# Input pipeline for species
species_input_pipeline = InputPipeline(splits=(0.85,0.0,0.15), channels=3, batch_size=CONF["batch_size"], size=INPUT_SHAPE[:2])
species_input_pipeline.make_train_datasets(directory="../data/train_images/species_classify")  # This doesnt have a stratified split
"""

Datasets populated!


'\n# Input pipeline for species\nspecies_input_pipeline = InputPipeline(splits=(0.85,0.0,0.15), channels=3, batch_size=CONF["batch_size"], size=INPUT_SHAPE[:2])\nspecies_input_pipeline.make_train_datasets(directory="../data/train_images/species_classify")  # This doesnt have a stratified split\n'

In [8]:
# Cache and prefetch datasets for faster access
sub_species_cached_train, sub_species_cached_val = sub_species_input_pipeline.get_cached_train_datasets()
#species_cached_train, species_cached_val = species_input_pipeline.get_cached_train_datasets()

## Training a Model

In [12]:
from modeling import train_classifier

In [13]:
# making the species classifier
train_classifier(
  model_name="../classifiers/subspecies_effnet_classifier",
  input_shape=INPUT_SHAPE,
  classes_to_classify=200,
  configuration=CONF,
  model=efficient_net, 
  train_dataset=sub_species_cached_train,
  validation_dataset=sub_species_cached_val,
)

Epoch 1/2
Epoch 2/2




INFO:tensorflow:Assets written to: ../classifiers/subspecies_effnet_classifier\assets


INFO:tensorflow:Assets written to: ../classifiers/subspecies_effnet_classifier\assets


Model saved successfully under: ../classifiers/subspecies_effnet_classifier


In [None]:
# making the species classifier
train_classifier(
  model_name="../classifiers/species_efficient_net_classifier",
  input_shape=INPUT_SHAPE,
  classes_to_classify=70,
  configuration=CONF,
  model=big_model, 
  train_dataset=species_cached_train,
  validation_dataset=species_cached_val,
)

## Predict Stuff

In [20]:
from modeling import predict
import pickle

In [22]:
# Load mapping for stacked classifiers
with open("mapping.pickle", "rb+") as f:
  mapping = pickle.load(f)

In [14]:
# Make Train Dataset
sub_species_input_pipeline.make_test_dataset(directory="../data/test_images/test_images")
test_cached = sub_species_input_pipeline.get_cached_test_datasets()

Found 4000 files belonging to 1 classes.
Datasets populated!


In [24]:
predict(
  species_classifier=None,
  subspecies_classifier="../classifiers/subspecies_effnet_classifier",
  dataset=test_cached,
  species_subspecies_dict=mapping
)



Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,190,191,192,193,194,195,196,197,198,199
0,0.002896,0.001725,0.001646,0.005043,0.006721,0.001839,0.005976,0.006114,0.004897,0.002643,...,0.006666,0.006774,0.002534,0.004331,0.003049,0.002480,0.006581,0.007452,0.005033,0.011459
1,0.002560,0.005014,0.002973,0.004587,0.002739,0.003644,0.004132,0.002953,0.022674,0.000939,...,0.006132,0.009821,0.001731,0.001797,0.001024,0.001706,0.001251,0.004615,0.002908,0.003115
2,0.005217,0.001772,0.000972,0.003331,0.008229,0.002768,0.006206,0.001800,0.013020,0.006092,...,0.004286,0.001819,0.003178,0.000917,0.002992,0.002622,0.003233,0.007683,0.001933,0.007153
3,0.003193,0.000751,0.001080,0.002853,0.006323,0.003186,0.002718,0.001565,0.005973,0.000474,...,0.009828,0.006564,0.009180,0.003265,0.002168,0.001795,0.002610,0.005230,0.004331,0.006702
4,0.003500,0.005102,0.003022,0.013020,0.002236,0.001699,0.003377,0.001652,0.036056,0.000874,...,0.002431,0.017416,0.003082,0.003119,0.004669,0.005726,0.001684,0.001725,0.001460,0.002081
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3995,0.008739,0.002381,0.000551,0.004477,0.005560,0.001382,0.006818,0.003654,0.025638,0.000803,...,0.001228,0.003919,0.004611,0.002136,0.002614,0.002539,0.002369,0.008829,0.003108,0.006769
3996,0.024270,0.008316,0.004346,0.004327,0.024498,0.000888,0.003104,0.003490,0.014565,0.001217,...,0.004585,0.003573,0.001733,0.003865,0.001526,0.004191,0.002437,0.000690,0.000938,0.001715
3997,0.003134,0.001473,0.001195,0.000869,0.000641,0.001476,0.001716,0.004564,0.018126,0.001961,...,0.003859,0.006803,0.001627,0.001599,0.002060,0.001694,0.004685,0.007818,0.004010,0.012787
3998,0.006810,0.006160,0.004645,0.004745,0.004541,0.004944,0.003430,0.003855,0.016416,0.002063,...,0.003911,0.008960,0.001482,0.001569,0.001972,0.001983,0.000809,0.009476,0.002103,0.006041


In [None]:
# TF predicts labels from 0-199, however we need 1-200: Therefore we need to adjust the predictions.
adj_df_src = "../data/test_images_sample_1701013215.0550277.csv"
pred_df = pd.read_csv(adj_df_src, index_col=0)
pred_df["label"] += 1
pred_df.to_csv(adj_df_src)