# Private Test Set

## Setup Environment

In [203]:
import json
from pathlib import Path

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier  
import xgboost
import lightgbm

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

import seaborn as sns
import matplotlib.pyplot as plt

from functools import partial

import warnings
warnings.filterwarnings('ignore')

print(f"Tensor Flow Version: {tf.__version__}")
print(f"Keras Version: {tf.keras.__version__}")
gpu = len(tf.config.list_physical_devices('GPU'))>0
print("GPU is", "available" if gpu else "NOT AVAILABLE")

Tensor Flow Version: 2.12.0
Keras Version: 2.12.0
GPU is available


In [205]:
batch_size = 64
img_size = 112
fine_grain = False
num_classes = 50 if fine_grain else 8

root = Path('public')

In [206]:
def load_target_names(path='public/info.json'):
    with open(path) as f:
        info = json.load(f)
    return info['fine'], info['coarse']

def load_image(file_path, image_size=112):
    img = tf.io.read_file(file_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.convert_image_dtype(img, tf.float32)
    img = tf.image.resize(img, [image_size, image_size])
    return img

def create_dataset_tf(split, fine_grained, batch_size=32, image_size=112):
  df = pd.read_csv(root / f"{split}.csv")
  file_paths = df.filename.map(lambda fn: str(root / split / fn))
  labels = df.fine if fine_grained else df.coarse

  dataset = tf.data.Dataset.from_tensor_slices((file_paths, labels))
  dataset = dataset.map(lambda x, y: (load_image(x, image_size=image_size), y))
  dataset = dataset.batch(batch_size)
  dataset = dataset.prefetch(buffer_size=tf.data.AUTOTUNE)
  return dataset


def create_dataset_sklearn(split, fine_grained, image_size=48, percent=0.1):
  dataset = create_dataset_tf(split=split, fine_grained=fine_grained, batch_size=1000, image_size=image_size)
  dataset = dataset.map(lambda x, y: (tf.image.rgb_to_grayscale(x), y))
  X = None
  Y = None
  n_batches = int(len(dataset) * percent)
  for x,y in dataset.take(n_batches):
    x, y = x.numpy(), y.numpy()
    x = x.reshape(x.shape[0], -1)
    X = x if X is None else np.vstack([X, x])
    Y = y if Y is None else np.hstack([Y, y])

  return X, Y

## Import Data

In [208]:
fine_names, coarse_names = load_target_names()
print(coarse_names)

['Aves', 'Reptilia', 'Mammalia', 'Arachnida', 'Magnoliopsida', 'Insecta', 'Liliopsida', 'Pinopsida']


In [209]:
print(fine_names)

['Scolopacidae', 'Laridae', 'Accipitridae', 'Anatidae', 'Corvidae', 'Icteridae', 'Picidae', 'Colubridae', 'Sciuridae', 'Rosaceae', 'Cyperaceae', 'Brassicaceae', 'Sphingidae', 'Orchidaceae', 'Apidae', 'Papilionidae', 'Orobanchaceae', 'Liliaceae', 'Apocynaceae', 'Poaceae', 'Araneidae', 'Pinaceae', 'Asparagaceae', 'Acrididae', 'Hesperiidae', 'Rubiaceae', 'Onagraceae', 'Libellulidae', 'Fagaceae', 'Cactaceae', 'Lycaenidae', 'Coenagrionidae', 'Boraginaceae', 'Ranunculaceae', 'Polygonaceae', 'Pieridae', 'Caryophyllaceae', 'Lamiaceae', 'Noctuidae', 'Apiaceae', 'Malvaceae', 'Geometridae', 'Plantaginaceae', 'Euphorbiaceae', 'Crambidae', 'Parulidae', 'Erebidae', 'Ericaceae', 'Passerellidae', 'Tyrannidae']


In [243]:
coarse_ds = create_dataset_tf('privtest_coarse', fine_grained=False, batch_size=64, image_size = 512)
fine_ds = create_dataset_tf('privtest_fine', fine_grained=True, batch_size=64, image_size = 512)

# Load Saved Model

## Model for Course-Grained 

In [211]:
coarse_model = tf.keras.models.load_model('ENV2_ft1k_xl_model')
coarse_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 EfficientNetV2_xl (KerasLay  (None, 1280)             207615832 
 er)                                                             
                                                                 
 pred (Dense)                (None, 8)                 10248     
                                                                 
Total params: 207,626,080
Trainable params: 10,248
Non-trainable params: 207,615,832
_________________________________________________________________


In [225]:
coarse_predict = coarse_model.predict(coarse_ds)
coarse_predict



array([[5.25592838e-15, 3.29531477e-24, 2.10967649e-21, ...,
        1.22063137e-09, 3.76106687e-02, 4.24089809e-17],
       [6.36619313e-10, 1.48136118e-14, 1.31332419e-11, ...,
        9.99999404e-01, 2.35753230e-08, 8.79578600e-17],
       [1.09261870e-10, 2.46898418e-10, 3.66546585e-11, ...,
        9.99991894e-01, 1.46330050e-08, 1.92132171e-10],
       ...,
       [1.42996170e-13, 1.80206695e-25, 3.12250367e-19, ...,
        9.99999762e-01, 3.14347333e-11, 7.06800135e-22],
       [3.56657893e-14, 3.20969201e-24, 3.16289698e-24, ...,
        2.73581477e-08, 2.68149636e-09, 1.09448803e-14],
       [1.21152186e-11, 3.95023179e-17, 3.04162285e-17, ...,
        1.04675146e-09, 9.78994947e-08, 1.31227280e-19]], dtype=float32)

In [226]:
coarse_predict_class = coarse_predict.argmax(axis=1)
coarse_predict_class

array([4, 5, 5, ..., 5, 4, 4])

In [228]:
len(coarse_predict_class)

1500

In [227]:
test_csv = pd.read_csv(f"{root}/privtest_coarse.csv")
test_csv.loc[:, "coarse"] = coarse_predict_class

pred_file = "47369833-conv-coarse-private.csv"
test_csv.to_csv(pred_file, index = False)

## Model for Fine-Grained

In [241]:
fine_model = tf.keras.models.load_model('ENV2_ft1k_xl_fine_model')
fine_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 EfficientNetV2_xl (KerasLay  (None, 1280)             207615832 
 er)                                                             
                                                                 
 pred (Dense)                (None, 50)                64050     
                                                                 
Total params: 207,679,882
Trainable params: 64,050
Non-trainable params: 207,615,832
_________________________________________________________________


In [244]:
fine_predict = fine_model.predict(fine_ds)
fine_predict



array([[4.4605771e-05, 1.7733053e-05, 1.1633457e-05, ..., 1.8317978e-03,
        4.3995038e-05, 2.1270756e-05],
       [1.1173687e-04, 9.7528733e-05, 6.4011190e-05, ..., 9.2235990e-05,
        9.2499467e-05, 7.5193500e-05],
       [1.8620385e-04, 2.2833665e-05, 7.2284252e-05, ..., 4.5059442e-05,
        5.0743845e-05, 2.3424936e-05],
       ...,
       [1.7364324e-03, 4.4332223e-04, 6.3599124e-05, ..., 4.1045612e-04,
        4.6578439e-06, 2.3838494e-04],
       [8.0797481e-06, 2.5546358e-06, 5.1801703e-06, ..., 1.4274158e-03,
        2.6209886e-05, 1.3485802e-05],
       [4.0961304e-06, 2.2330655e-06, 4.0483169e-06, ..., 6.5934852e-05,
        2.9634011e-05, 1.4947743e-05]], dtype=float32)

In [245]:
fine_predict_class = fine_predict.argmax(axis=1)
fine_predict_class

array([32, 14, 30, ..., 23, 25, 18])

In [246]:
len(fine_predict_class)

1500

In [234]:
test_csv = pd.read_csv(f"{root}/privtest_fine.csv")
test_csv.loc[:, "fine"] = fine_predict_class

pred_file = "47369833-conv-fine-private.csv"
test_csv.to_csv(pred_file, index = False)