# Code to create a CSV file for the labels of mel spectrograms
This code creates a csv that contains the file path for each image and the respective label taken from the image's file name.

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
import os
import csv
import tensorflow as tf
import pandas as pd
import numpy as np

In [2]:
# paths for training, validation, and test images
train_path = "G:/Shared drives/BioSoundSCape_ABG/labeled_data/spectrogram_data/training"
val_path = "G:/Shared drives/BioSoundSCape_ABG/labeled_data/spectrogram_data/validation"
test_path = "G:/Shared drives/BioSoundSCape_ABG/labeled_data/spectrogram_data/test"

colab_train_path = "/content/drive/Shareddrives/BioSoundSCape_ABG/labeled_data/spectrogram_data/training"
colab_val_path = "/content/drive/Shareddrives/BioSoundSCape_ABG/labeled_data/spectrogram_data/validation"
colab_test_path = "/content/drive/Shareddrives/BioSoundSCape_ABG/labeled_data/spectrogram_data/test"

# output csv files for labels
train_labels = "G:/Shared drives/BioSoundSCape_ABG/code/train_labels.csv"
val_labels = "G:/Shared drives/BioSoundSCape_ABG/code/val_labels.csv"
test_labels = "G:/Shared drives/BioSoundSCape_ABG/code/test_labels.csv"

colab_train_labels = "/content/drive/Shareddrives/BioSoundSCape_ABG/code/colab_train_labels.csv"
colab_val_labels = "/content/drive/Shareddrives/BioSoundSCape_ABG/code/colab_val_labels.csv"
colab_test_labels = "/content/drive/Shareddrives/BioSoundSCape_ABG/code/colab_test_labels.csv"

# image dimensions 224 x 224 x 3 (RGB pixels)
IMG_WIDTH = 224
IMG_HEIGHT = 224
IMG_DEPTH = 3

In [3]:
# function to create dataset with image path and label
def create_data_set(data_path, output_file):
  img_paths = os.listdir(data_path)
  dataset = []
  
  for img_name in img_paths:
    split_img_name = img_name.split('.')
  
    img_data = split_img_name[1].split('_') # just get data of image
    img_label = img_data[2]
    img_group = img_data[4]
    
    # convert letter label to numeric
    if img_label == 'A':
      img_label = 0
    elif img_label == 'B':
      img_label = 1
    elif img_label == 'G':
      img_label = 2
    elif img_label == "I":
      img_label = 3

    # convert image to numpy
    img_path = data_path + "/" + img_name
    #img = process_path(img_path, IMG_HEIGHT, IMG_WIDTH)
    #np_img = np.array(img)
    dataset.append({'img_path': img_path, 'label': img_label, 'group': img_group})
    

  # write data into csv
  img_count = 0
  with open(output_file, 'w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=['img_path', 'label', 'group'])
    writer.writeheader()
    for item in dataset:
      print('Just added', img_count, ':', item)
      writer.writerow(item)
      img_count = img_count + 1

In [4]:
# training data
create_data_set(train_path, train_labels)

Just added 0 : {'img_path': 'G:/Shared drives/BioSoundSCape_ABG/labeled_data/spectrogram_data/training/s2lam001_230815_2023-08-19_13-20_55.07_noaug_B_sp4_584.png', 'label': 1, 'group': '584'}
Just added 1 : {'img_path': 'G:/Shared drives/BioSoundSCape_ABG/labeled_data/spectrogram_data/training/s2lam029_231010_2023-10-11_06-00_43.69_noaug_B_sp4_595.png', 'label': 1, 'group': '595'}
Just added 2 : {'img_path': 'G:/Shared drives/BioSoundSCape_ABG/labeled_data/spectrogram_data/training/s2lam044_230622_2023-06-24_16-30_56.79_noaug_B_sp4_596.png', 'label': 1, 'group': '596'}
Just added 3 : {'img_path': 'G:/Shared drives/BioSoundSCape_ABG/labeled_data/spectrogram_data/training/s2lam001_230815_2023-08-19_13-20_55.57_noaug_B_sp4_584.png', 'label': 1, 'group': '584'}
Just added 4 : {'img_path': 'G:/Shared drives/BioSoundSCape_ABG/labeled_data/spectrogram_data/training/s2lam054_230705_2023-07-10_08-30_38.48_noaug_B_sp4_593.png', 'label': 1, 'group': '593'}
Just added 5 : {'img_path': 'G:/Shared d

In [5]:
create_data_set(val_path, val_labels)

Just added 0 : {'img_path': 'G:/Shared drives/BioSoundSCape_ABG/labeled_data/spectrogram_data/validation/s2lam095_231017_2023-10-20_08-50_34.32_noaug_B_sp6_19.png', 'label': 1, 'group': '19'}
Just added 1 : {'img_path': 'G:/Shared drives/BioSoundSCape_ABG/labeled_data/spectrogram_data/validation/s2lam126_230812_2023-08-15_23-50_7.18_noaug_B_sp6_20.png', 'label': 1, 'group': '20'}
Just added 2 : {'img_path': 'G:/Shared drives/BioSoundSCape_ABG/labeled_data/spectrogram_data/validation/s2lam126_230812_2023-08-15_23-50_7.43_noaug_B_sp6_20.png', 'label': 1, 'group': '20'}
Just added 3 : {'img_path': 'G:/Shared drives/BioSoundSCape_ABG/labeled_data/spectrogram_data/validation/s2lam126_230812_2023-08-15_23-50_6.93_noaug_B_sp6_20.png', 'label': 1, 'group': '20'}
Just added 4 : {'img_path': 'G:/Shared drives/BioSoundSCape_ABG/labeled_data/spectrogram_data/validation/s2lam118_230703_2023-07-08_12-40_3.26_noaug_B_sp6_21.png', 'label': 1, 'group': '21'}
Just added 5 : {'img_path': 'G:/Shared drive

In [6]:
# test data
create_data_set(test_path, test_labels)

Just added 0 : {'img_path': 'G:/Shared drives/BioSoundSCape_ABG/labeled_data/spectrogram_data/test/s2lam080_220922_2022-09-22_18-50_10.45_noaug_B_sp6_19.png', 'label': 1, 'group': '19'}
Just added 1 : {'img_path': 'G:/Shared drives/BioSoundSCape_ABG/labeled_data/spectrogram_data/test/s2lam047_220928_2022-10-07_13-10_2.74_noaug_B_sp6_20.png', 'label': 1, 'group': '20'}
Just added 2 : {'img_path': 'G:/Shared drives/BioSoundSCape_ABG/labeled_data/spectrogram_data/test/s2lam047_220928_2022-10-07_13-10_2.99_noaug_B_sp6_20.png', 'label': 1, 'group': '20'}
Just added 3 : {'img_path': 'G:/Shared drives/BioSoundSCape_ABG/labeled_data/spectrogram_data/test/s2lam047_220928_2022-10-07_13-10_2.49_noaug_B_sp6_20.png', 'label': 1, 'group': '20'}
Just added 4 : {'img_path': 'G:/Shared drives/BioSoundSCape_ABG/labeled_data/spectrogram_data/test/154786163_36.62_noaug_B_sp6_21.png', 'label': 1, 'group': '21'}
Just added 5 : {'img_path': 'G:/Shared drives/BioSoundSCape_ABG/labeled_data/spectrogram_data/te