# Create model

In [None]:
from pathlib import Path
import tarfile
import zipfile

from transferit.preparation import create_class, split_data
from transferit.training import train
from transferit.wrapping import wrap_model

In [None]:
IMAGE_SIZE = (256, 256)

RAW_FOLDER = Path("../data/raw/")
CALTECH_TAR_FILE = RAW_FOLDER / "256_ObjectCategories.tar"
CARDS_ZIP_FILE = RAW_FOLDER / "cards.zip"

CALTECH_EXTRACTED_FOLDER = RAW_FOLDER / "256_ObjectCategories"
CARDS_EXTRACTED_FOLDER = RAW_FOLDER / "cards"

PREPARED_FOLDER = Path("../data/prepared/")
FULL_FOLDER = PREPARED_FOLDER / "full"
FULL_FOLDER_NEGATIVE = FULL_FOLDER / "negative"
FULL_FOLDER_POSITIVE = FULL_FOLDER / "positive"

DEV_FOLDER = PREPARED_FOLDER / "dev"
TRAIN_FOLDER = PREPARED_FOLDER / "train"

NAKED_MODEL_FOLDER = Path("../models/naked")
NAKED_MODEL_FILE = NAKED_MODEL_FOLDER / "naked_best_loss.hdf5" 

WRAPPED_MODEL_FOLDER = Path("../models/wrapped/00000001")

## Extract Caltech 256 data

We use a subset of the Caltech 256 dataset for the negative class.

Before running this notebook, download the `256_ObjectCategories.tar` file from the [Caltech 256 website](http://www.vision.caltech.edu/Image_Datasets/Caltech256/) and place it in `data/raw/` folder inside this repository.

The code below extracts the full Caltech 256 dataset a sub folder of the `data/raw` folder. Later on, we will sample a subset of these to use when training and evaluating the model

In [None]:
with tarfile.open(CALTECH_TAR_FILE) as tar:
    tar.extractall(RAW_FOLDER)

## Extract card photos

We use photos of random *Magic: The Gathering* cards for the negative class.

These photos are included in the repository in a zip file.

The code below extracts all the card images to a sub folder of the `data/raw` folder. We will use all of them when training the model.

In [None]:
with zipfile.ZipFile(CARDS_ZIP_FILE, "r") as zip_ref:
    zip_ref.extractall(RAW_FOLDER)

## Prepare images

We will now prepare the images for training. This consists of the following:

* Sample a number of images from the Caltech dataset, resize them and store them in a folder called `negative`
* Resize the card photos and store them in a folder called `positive`

In [None]:
# Caltech photos
FULL_FOLDER_NEGATIVE.mkdir(exist_ok=False, parents=True)
create_class(CALTECH_EXTRACTED_FOLDER, FULL_FOLDER_NEGATIVE, IMAGE_SIZE, 3000, 42)

In [None]:
# Card photos
FULL_FOLDER_POSITIVE.mkdir(exist_ok=False, parents=True)
create_class(CARDS_EXTRACTED_FOLDER, FULL_FOLDER_POSITIVE, IMAGE_SIZE)

## Create train / dev split

In [None]:
split_data(FULL_FOLDER, TRAIN_FOLDER, DEV_FOLDER, 27, 0.1)

## Train

In [None]:
train(TRAIN_FOLDER, DEV_FOLDER, NAKED_MODEL_FOLDER, IMAGE_SIZE)

## Wrap model for serving

In [None]:
wrap_model(
    model_file=NAKED_MODEL_FILE,
    output_folder=WRAPPED_MODEL_FOLDER,
    class_names=["negative", "positive"],
    top_k=None,
)