# Preparing to compute with TensorFlow

In [1]:
import Tensorflow.scripts.Paths as Paths
import os

# setting up the paths
paths = Paths.WorkingPaths
paths.setup_paths()

Creating c:\dev\DHBW\Studienarbeit\Detection_training\Tensorflow\workspace\images
Creating c:\dev\DHBW\Studienarbeit\Detection_training\Tensorflow\workspace\images\collected_images
Creating c:\dev\DHBW\Studienarbeit\Detection_training\Tensorflow\workspace\images\collected_images_resized
Creating c:\dev\DHBW\Studienarbeit\Detection_training\Tensorflow\workspace\images\trainset
Creating c:\dev\DHBW\Studienarbeit\Detection_training\Tensorflow\workspace\images\testset
Creating c:\dev\DHBW\Studienarbeit\Detection_training\Tensorflow\workspace\images\devset
Creating c:\dev\DHBW\Studienarbeit\Detection_training\Tensorflow\workspace
Creating c:\dev\DHBW\Studienarbeit\Detection_training\Tensorflow\scripts
Creating c:\dev\DHBW\Studienarbeit\Detection_training\Tensorflow\models
Creating c:\dev\DHBW\Studienarbeit\Detection_training\Tensorflow\workspace\annotations
Creating c:\dev\DHBW\Studienarbeit\Detection_training\Tensorflow\workspace\models
Creating c:\dev\DHBW\Studienarbeit\Detection_training

# Labeling own images with Label Studio

In [None]:
!pip install label-studio --user

Additionally run it

In [None]:
!label-studio start

Note: when finished you will export them to Pascal VOC Annotation and manually copy them into the `collected_images` directory in the `Tensorflow/workspace/images` directory

# Downloading images from other sources

First i downloaded the images and xml-annotations from this [github repository](https://github.com/datitran/raccoon_dataset).

You can simply copy them in the `collected_images` directory which should be available under the `Tensorflow/workspace/images` directory. Now you should have about 200 images from Racoons pretty low number isn't it?

Well thats true so we will increase them. But for a first evaluation of choosing the right pretrained model it should be fine to use. Especially if you want to use Google Colab since you most likely only train for a couple of hours before you run out of GPU.

In [None]:
# renaming the files to match the others
import uuid

renamed: dict[str, str] = {}
for filenames in os.listdir(paths.COLLECTED_IMAGES_PATH):
    filename, ext = os.path.splitext(filenames)
    if 'raccoon' in filename:
        if ext != '.xml':
            # Generate 60 Bit uuid
            uuid_ = uuid.uuid4().hex[:16]
            renamed.update({filename: uuid_})
        os.rename(os.path.join(paths.COLLECTED_IMAGES_PATH, filename + ext), os.path.join(paths.COLLECTED_IMAGES_PATH, renamed[filename] + ext))

In [None]:
# Convert the xml files to csv
# this will also convert your images from Label Studio
!python {Paths.XML_TO_CSV} -i {paths.COLLECTED_IMAGES_PATH} -o {Paths.CSV_FILE}
# The label from the github source are named raccoon. The script will automatically makes Raccoon out of it.

## Download openimages from google

We will use [fiftyone](https://docs.voxel51.com/user_guide/dataset_zoo/datasets.html#open-images-v7) to do this.

In [None]:
# !pip install fiftyone

import fiftyone as fo

# Customize where zoo datasets are downloaded
fo.config.dataset_zoo_dir = paths.IMAGE_PATH

# This will take a hell lot of time since the .csv file will be downloaded for all labels and images (train has 2.2 GB)
dataset = fo.zoo.load_zoo_dataset(
    "open-images-v7",
    label_types=["detections"],
    classes=["Cat", "Fox", "Squirrel", "Raccoon"]
)

Now we will need to preprocess that huge CSV files and combine them

In [None]:
labels = ""
for label in Paths.LABELS:
    labels +=" -l " + label["name"]

!cargo run --manifest-path={Paths.CSV_CONV} --release -- -i {Paths.OPEN_IMAGES_TRAIN} -o {paths.COLLECTED_IMAGES_PATH} {labels}
!cargo run --manifest-path={Paths.CSV_CONV} --release -- -i {Paths.OPEN_IMAGES_TEST} -o {paths.COLLECTED_IMAGES_PATH} {labels}
!cargo run --manifest-path={Paths.CSV_CONV} --release -- -i {Paths.OPEN_IMAGES_VALIDATION} -o {paths.COLLECTED_IMAGES_PATH} {labels}

# Preprocessing the images for better performance during trainings.

downscaling thm now is better than when TensorFlow would do it on runtime

In [None]:
from Tensorflow.scripts.preprocessing_data import resize_images
# %pip install imutils

!cargo run --manifest-path={Paths.CSV_RESIZE} --release -- -i {Paths.CSV_FILE} -o {Paths.CSV_FILE_RESIZED} -r 320

resize_images(paths.COLLECTED_IMAGES_PATH, paths.RESIZED_IMAGES_PATH, 320)

# Split up the resized images into Dev- Test- and Trainset

You can do it manually or using the following Code snippet to do it

In [None]:
!cargo run --manifest-path={Paths.SPLIT_DATASET} --release -- -i {paths.RESIZED_IMAGES_PATH} -o {paths.IMAGE_PATH}

# Compress Datasets to use on Google Colab and on other platforms

In [None]:
# Paths will be relative in tar files. The tar command would take absolute paths and by using the .tar file paths would be wrong.
trainset = os.path.join('Tensorflow', 'workspace', 'images', 'trainset')
dataset = os.path.join('Tensorflow', 'workspace', 'images', Paths.DATASET_NAME)
testset = os.path.join('Tensorflow', 'workspace', 'images', 'testset')
devset = os.path.join('Tensorflow', 'workspace', 'images', 'devset')

command = "{} {} {} {}".format(dataset, trainset, testset, devset)
!tar -czf {command}
# If you want to export the dataset you need to manually copy it

# Modifying train with yolov8

In [3]:
from Tensorflow.scripts.csv_to_txt import convert_folder_to_yolov8

convert_folder_to_yolov8(paths.TRAINSET_PATH, paths.YOLO_TRAIN_PATH)
convert_folder_to_yolov8(paths.TESTSET_PATH, paths.YOLO_TEST_PATH)
convert_folder_to_yolov8(paths.DEVSET_PATH, paths.YOLO_DEV_PATH)

## And compress that

In [None]:
# Paths will be relative in tar files. The tar command would take absolute paths and by using the .tar file paths would be wrong.
trainset = os.path.join('Tensorflow', 'workspace', 'images', 'yolodata', 'trainset')
dataset = os.path.join('Tensorflow', 'workspace', 'images', 'yolodata', 'yolo_' + Paths.DATASET_NAME)
testset = os.path.join('Tensorflow', 'workspace', 'images', 'yolodata', 'testset')
devset = os.path.join('Tensorflow', 'workspace', 'images', 'yolodata', 'devset')

command = "{} {} {} {}".format(dataset, trainset, testset, devset)
!tar -czf {command}
# If you want to export the dataset you need to manually copy it