In [12]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split

root_dir = "../Dataset_Of_animal_Images/"  


categories = ['Cat', 'Cow','Deer', 'Dog', 'Goat','Hen', 'Rabbit','Sheep']
print("Categories found:", categories)

mapping = {category: idx for idx, category in enumerate(categories)}
print("Mapping:", mapping)

data_rows = []

for category in categories:
    image_dir = os.path.join(root_dir, category, "train", "images")
    if os.path.exists(image_dir):
        image_files = os.listdir(image_dir)
        for img_file in image_files:
            # Create full file path
            img_path = os.path.join(image_dir, img_file)
            # Get the label from the mapping
            label = mapping[category]
            data_rows.append({"filepath": img_path, "label": label})
    else:
        print(f"Warning: {image_dir} does not exist.")

df = pd.DataFrame(data_rows)

df.to_csv("full_dataset.csv", index=False)
print("Full dataset saved to full_dataset.csv")

train_df, temp_df = train_test_split(
    df, test_size=0.3, random_state=42, stratify=df["label"]
)
val_df, test_df = train_test_split(
    temp_df, test_size=0.5, random_state=42, stratify=temp_df["label"]
)

train_df.to_csv("train_dataset.csv", index=False)
val_df.to_csv("val_dataset.csv", index=False)
test_df.to_csv("test_dataset.csv", index=False)

print("Train, Validation, and Test datasets saved to train_dataset.csv, val_dataset.csv, and test_dataset.csv respectively.")


Categories found: ['Cat', 'Cow', 'Deer', 'Dog', 'Goat', 'Hen', 'Rabbit', 'Sheep']
Mapping: {'Cat': 0, 'Cow': 1, 'Deer': 2, 'Dog': 3, 'Goat': 4, 'Hen': 5, 'Rabbit': 6, 'Sheep': 7}
Full dataset saved to full_dataset.csv
Train, Validation, and Test datasets saved to train_dataset.csv, val_dataset.csv, and test_dataset.csv respectively.


In [13]:
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('full_dataset.csv')
df['label'].value_counts()

label
2    3882
0    2866
3    2373
6    2236
5    2006
7    1410
4     969
1     881
Name: count, dtype: int64

In [2]:
import os
import pandas as pd

nv_dir = "../Dataset_Of_animal_Images/NightVision/train/images"

labels_str = """
2
2
-
-
-
-
-
-
-
-
-
2
2
0
0
0
-
-
-
2
2
2
-
-
-
2
3
3
3
2
2
2
2
2
2
2
2
2
2
2
2
2
2
-
3
3
3
3
3
3
3
3
3
2
2
2
2
2
2
2
2
0
0
0
3
3
3
-
-
-
3
3
3
-
-
3
3
3
-
-
-
3
3
3
3
3
3
3
3
3
0
-
-
-
-
-
-
0
0
-
-
-
-
-
-
0
0
0
0
3
3
3
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
2
2
2
2
2
2
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
7
7
7
7
7
7
0
0
0
0
0
0
0
0
0
3
3
3
0
0
0
0
-
-
-
1
1
1
1
1
1
1
1
1
2
2
2
0
0
0
0
0
0
0
0
0
2
2
2
2
2
2
2
2
2
2
2
2
-
-
-
-
-
-
-
-
-
3
3
3
0
0
0
0
0
0
0
0
0
0
0
0
-
-
-
0
0
0
0
0
0
0
0
0
-
-
-
-
-
-
0
0
0
-
-
-
0
0
0
-
-
-
-
-
-
-
-
-
"""

labels = [line.strip() for line in labels_str.strip().splitlines() if line.strip() != ""]

data_rows = []

for idx, label in enumerate(labels):
    if label == "-":
        continue
    file_name = f"NV ({idx + 1}).jpg"
    filepath = os.path.join(nv_dir, file_name)
    data_rows.append({"filepath": filepath, "label": label})

df = pd.DataFrame(data_rows)
df.to_csv("data.csv", index=False)
print("Data saved to data.csv")

Data saved to data.csv
