# Recoinize

# 1. Setup

## 1.1. Pull code from GitHub

In [39]:
from src.helpers.cmd import git_update, print_gpu_name
from src.helpers.install import install_requirements
git_update("live-edit", force=False, show_output=False)
# install_requirements()

In [40]:
print_gpu_name()

b'NVIDIA RTX A4000\n'


## 1.4. Import dependencies

In [38]:
# Import standard dependencies
import cv2
import os
import random
import numpy as np
from matplotlib import pyplot as plt

# Import tensorflow dependencies - Functional API
import tensorflow as tf
from tensorflow import keras
from keras.layers import Dense, Layer, Conv2D, Dense, MaxPooling2D, Input, Flatten
from keras.models import Sequential, load_model, Model
from datasets import load_dataset

from src.models import make_embedding, make_siamese_model
from src.helpers.setup import set_gpus_growth

set_gpus_growth()

## Get data from Hugging Face

In [35]:
dataset = load_dataset('photonsquid/coins-euro')
dataset = dataset.with_format('tf')

Using custom data configuration photonsquid--coins-euro-6a59027cbfa69f38
Found cached dataset parquet (/root/.cache/huggingface/datasets/photonsquid___parquet/photonsquid--coins-euro-6a59027cbfa69f38/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)


  0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
# Split the dataset into train and test
train_dataset = dataset['train']
test_dataset = dataset['test']

# we've got a siaesme model, so we need to create a dataset with pairs of images
# and a label that indicates if the images are positive or negative examples

# we'll use the following function to create the pairs
def create_pairs(x, y):
    """Create positive and negative pairs from two arrays"""
    # create an empty list for the pairs
    pairs = []
    # create an empty list for the labels
    labels = []
    # create a list of unique classes
    classes = np.unique(y)
    # loop over the classes
    for c in classes:
        # find the indices of the images with the current class
        idx = np.where(y == c)[0]
        # loop over the indices
        for i in range(len(idx)):
            # get the current index
            z1, z2 = idx[i], idx[(i + 1) % len(idx)]
            # add the pair to the list of pairs
            pairs += [[x[z1], x[z2]]]
            # add the label to the list of labels
            inc = random.randrange(1, len(classes))
            dn = (c + inc) % len(classes)
            labels += [c == dn]
    # convert the pairs and labels to numpy arrays
    pairs = np.array(pairs)
    labels = np.array(labels)
    # return the pairs and labels
    return pairs, labels

# create the pairs and labels for the training dataset
train_pairs, train_labels = create_pairs(train_dataset['image'], train_dataset['label'])

# create the pairs and labels for the testing dataset
test_pairs, test_labels = create_pairs(test_dataset['image'], test_dataset['label'])

# prepare the dataset for training
train_dataset = train_dataset.shuffle(1000)
train_dataset = train_dataset.batch(32)
train_dataset = train_dataset.prefetch(tf.data.experimental.AUTOTUNE)

# prepare the dataset for testing
test_dataset = test_dataset.shuffle(1000)
test_dataset = test_dataset.batch(32)
test_dataset = test_dataset.prefetch(tf.data.experimental.AUTOTUNE)
