# Custom Label

This notebook contains helper functions that assist in labelling images with their species names <br>
Developed by: Prakash Palanivelu Rajmohan

## Count Unique Images
Code to count the number of unique images ignoring filename replication in mewc_out.csv

In [6]:
import pandas as pd

# Replace 'your_file.csv' with the actual path to your CSV file
file_path = 'your_file.csv'

column_name = 'filename'

# Read the CSV file into a pandas DataFrame
df = pd.read_csv(file_path)

# Find the number of unique rows based on the specified column
unique_rows = df.drop_duplicates(subset=column_name)

# Get the count of unique rows
num_unique_rows = len(unique_rows)

print(f"Number of unique rows based on '{column_name}': {num_unique_rows}")


## Sorting based on high probabilities

The species labels are allocated based on maximum probabilty

Example:
If the csv file contains data similar to:

rownum,filename,label,class_id,prob,class_name,class_rank,date_time_orig,conf <br>

0,file1.JPG,Tiger,Tiger,0.9348078,1,1.0,2023:01:01 00:00:00,<br>
1,file1.JPG,Tiger,WB,0.0548096,1,1.0,2023:01:01 00:00:00,

We remove rownum = 1 for file1 since it is not the max probabilty for that file

In [3]:
import pandas as pd

# Read the CSV file
df = pd.read_csv(file_path)

# Sort by filename and prob in descending order
df.sort_values(by=['filename', 'prob'], ascending=[True, False], inplace=True)

# Keep only the first row for each unique filename
df.drop_duplicates(subset='filename', keep='first', inplace=True)

# Save the updated DataFrame to a new CSV file
df.to_csv('output_file.csv', index=False)


## Rewriting JSON file

The below code is helpful to change the names of labels with maximum confidence and sort them in an ascending order

In [4]:
import csv
import json

# Read CSV file
csv_file_path = 'output_file.csv'
csv_data = []
with open(csv_file_path, 'r') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    for row in csv_reader:
        csv_data.append(row)

# Read JSON file
json_file_path = 'file_path.json'
with open(json_file_path, 'r') as json_file:
    json_data = json.load(json_file)

# Process JSON data
new_json_data = {"images": [], "detection_categories": json_data['detection_categories'],"info": json_data['info'] }
for item in json_data['images']:
    file_name = item['file'].split('/')[-1]
    conf_list = [detection['conf'] for detection in item['detections'] if 'conf' in detection]

    if len(conf_list) == 1:
        max_conf = conf_list[0]
        max_conf_detection = next((d for d in item['detections'] if d.get('conf') == max_conf), None)

        if max_conf_detection and max_conf_detection['category'] == '1':
            for csv_row in csv_data:
                if csv_row['filename'] == file_name:
                    max_conf_detection['category'] = csv_row['class_id']
                    break
    elif len(conf_list) > 1:
        max_conf = max(conf_list)
        for detection in item['detections']:
            if 'conf' in detection and detection['conf'] == max_conf and detection.get('category') == '1':
                for csv_row in csv_data:
                    if csv_row['filename'] == file_name:
                        detection['category'] = csv_row['class_id']
                        break                
    print(item)
    sorted_detections = sorted(item['detections'], key=lambda x: x['conf'])
    item['detections'] = sorted_detections
    new_json_data['images'].append(item)

# Write the new JSON data to a file
new_json_file_path = f'/home/s4737925/MEWC/scripts/misc/md_output_new_jsonfile.json'
with open(new_json_file_path, 'w') as new_json_file:
    json.dump(new_json_data, new_json_file, indent=2)

print(f"New JSON file created at: {new_json_file_path}")


## Label Names

The below code is to get the label names for drawing bounding boxe

In [5]:
import os

def get_directories(path):
    directories = [d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))]
    return directories

# Replace 'your_directory_path' with the actual path of the directory you want to inspect
directory_path = 'your_directory_path'
directories_list = get_directories(directory_path)
print("Directories in", directory_path, ":", directories_list)
