In [49]:
# code modeled after https://github.com/madelinehayes/seabirdNET
# note: code is almost identical to Harbor_Seal_Annotations_Right_Format

In [1]:
import os
import argparse
import random
import numpy as np
import json
import csv
import sys
import pandas as pd

In [2]:
# path to tiled images
path = '../Images/Elephant_Seals_Beach_Only'
image_list = [f for f in os.listdir(path) if f.endswith('.png')]

In [3]:
# shuffle image list and split into training, testing, validation
random.shuffle(image_list)
total_count = len(image_list)

print(total_count)

# 80-10-10
test_index = int(total_count*.1)
val_index = int(total_count*.9*.1) + test_index

test_data = image_list[:test_index]
val_data = image_list[test_index:val_index]
train_data = image_list[val_index:]

print(len(test_data), len(val_data), len(train_data))

1741
174 156 1411


In [5]:
# because there are so many images and I wanted to save my annotations intermittantly, I have multiple csv files
# combine them here
eseal_csv_path = '../Data/Elephant_Seal_Anno_csv/combined_csv.csv'
out = open(eseal_csv_path, 'w')

csvPath = '../Data/Elephant_Seal_Anno_csv'
all_csv = [csvPath + '/' + i for i in os.listdir(csvPath) if i.endswith('.csv')]

for c in all_csv:
    f = open(c)
    text = f.read()
    out.write(text)

In [6]:
# path to output
via_path = eseal_csv_path = '../Data/Elephant_Seal_Anno_csv/combined_csv.csv'

image_annotations_train1 = []
image_annotations_test1 = []
image_annotations_val1 = []

# keep track of how many cows, bulls, and pups are labeled
cow = 0
bull = 0
pup = 0

# dictionary mapping the annotations to the correct labels. 
# if you created your own annotations, be sure to verify this is the same as your annotations using the comments at the top of 
# the CSV file and change order if not
anno_dict = {"0":"cow","1":"bull","2":"pup"}

with open(via_path, "r") as f:
    reader = csv.reader(f, delimiter=",")
    for line in reader: 
        # output we want
        # path/to/image.jpg,x1,y1,x2,y2,class_name
        if '#' in line[0][0]:
            # bypassing comments in csv
            continue
        filename = line[1][2:-2]
        # strip brackets, split and get only the values we care about, then convert all the string to int 
        coords = list(map(int,list(map(float, line[4].strip('][').split(',')[1:]))))
        
        # make sure only bounding rectangles were used
        if len(coords) != 4:
            print('bad annotation')
            continue
            
        top_left_x, top_left_y, width, height = list(map(int,list(map(float, line[4].strip('][').split(',')[1:]))))

        if width == 0 or height == 0:
            continue
        # move from top left and width/height to x and y values
        if top_left_x < 0:
            top_left_x = 1
        if top_left_y < 0:
            top_left_y = 1
        x1 = top_left_x
        x2 = top_left_x + width
        y1 = top_left_y
        y2 = top_left_y + height 
        
        # occassionally I had some issues with boxes being to 501 pixels, this just makes sense all boxes are on the image
        if x2 > 500:
            x2 = 500
        if y2 > 500:
            y2 = 500

        # determine class
        if len(line[5]) > 2:
            var = line[5]
            if '#' in var:
                var = var.split('#')[0]
            anno = json.loads(var)
            n = anno['1']
            name = anno_dict[n]
            if name == "cow":
                cow += 1
            if name== "bull":
                bull += 1
            if name == "pup":
                pup += 1
        else:
            print("seal not labeled" + filename)
            continue

        # create the csv row
        new_row = []
        if filename in train_data:
            new_row.append(filename)
            new_row.append(x1)
            new_row.append(y1)
            new_row.append(x2)
            new_row.append(y2)
            new_row.append(name)

            image_annotations_train1.append(new_row)
        elif filename in test_data:
            new_row.append(filename)
            new_row.append(x1)
            new_row.append(y1)
            new_row.append(x2)
            new_row.append(y2)
            new_row.append(name)

            image_annotations_test1.append(new_row)
        else:
            new_row.append(filename)
            new_row.append(x1)
            new_row.append(y1)
            new_row.append(x2)
            new_row.append(y2)
            new_row.append(name)

            image_annotations_val1.append(new_row)

In [63]:
print(cow, bull, pup)

3454 688 6602


In [55]:
print(len(image_annotations_train), len(image_annotations_test), len(image_annotations_val))

8568 1128 1048


In [56]:
with open('..\Images\Elephant_Seals_Beach_Only\elephant_test_annotations.csv', 'w', newline='') as fp:
    writer = csv.writer(fp)
    writer.writerows(image_annotations_test)
    
with open('..\Images\Elephant_Seals_Beach_Only\elephant_val_annotations.csv', 'w', newline='') as fp:
    writer = csv.writer(fp)
    writer.writerows(image_annotations_val)
    
with open('..\Images\Elephant_Seals_Beach_Only\elephant_train_annotations.csv', 'w', newline='') as fp:
    writer = csv.writer(fp)
    writer.writerows(image_annotations_train)