<h1 style='text-align:center; font-weight:bold'>Imports</h1>

In [1]:
import os
import gc
import cv2
import time
import pickle
import pandas as pd
from tqdm import tqdm
from rembg import remove

try:
    from kaggle.api.kaggle_api_extended import KaggleApi
except OSError:
    from kaggle.api.kaggle_api_extended import KaggleApi

<h1 style='text-align:center; font-weight:bold'>Downloading and Unpacking Data</h1>

In [2]:
def downloadData(path='./Dataset', name="fatihkgg/affectnet-yolo-format"):
    api = KaggleApi()
    if not os.path.exists('./Dataset'): os.makedirs(path)
    api.dataset_download_files(name, path=path, unzip=True)
    print("Dataset downloaded successfully")
    
# downloadData()

<h1 style='text-align:center; font-weight:bold'>Global Variables</h1>

In [3]:
LOAD_BATCH_SIZE = 500
CLASS_NAMES = [
    "Anger",
    "Contempt",
    "Disgust",
    "Fear",
    "Happy",
    "Neutral",
    "Sad",
    "Surprise",
]

<h1 style='text-align:center; font-weight:bold'>Preparing Data</h1>

In [4]:
def prepareData(type):
    imageDir = f"Dataset/YOLO_format/{type}/images"
    labelDir = f"Dataset/YOLO_format/{type}/labels"

    outputDir = f"Processed Images/{type}"
    if not os.path.exists(outputDir):
        os.makedirs(outputDir)

    for className in CLASS_NAMES:
        classDir = os.path.join(outputDir, className)
        if not os.path.exists(classDir):
            os.makedirs(classDir)

    for imageFile in os.listdir(imageDir):
        if not (imageFile.endswith(".jpg") or imageFile.endswith(".png")):
            continue

        imagePath = os.path.join(imageDir, imageFile)
        labelFile = imageFile.replace(".jpg", ".txt").replace(".png", ".txt")
        labelPath = os.path.join(labelDir, labelFile)

        if not os.path.exists(labelPath):
            continue

        image = cv2.imread(imagePath)
        if image is None:
            print(f"Warning: Unable to read image {imagePath}. Skipping...")
            continue

        with open(labelPath, "r") as f:
            for line in f.readlines():
                try:
                    data = line.strip().split()
                    classId = int(data[0])  
                    className = CLASS_NAMES[classId]  

                    with open(imagePath, "rb") as inputImage:
                        bgRemovedImage = remove(inputImage.read())

                    classDir = os.path.join(outputDir, className)
                    fileName = f"{os.path.splitext(imageFile)[0]}_{className}.{imageFile.split('.')[-1]}"
                    savePath = os.path.join(classDir, fileName)

                    with open(savePath, "wb") as outputImage:
                        outputImage.write(bgRemovedImage)

                except Exception as e:
                    print(f"Error processing line '{line}' in {labelPath}: {e}")


In [5]:
print(f"\n\t\t\t\t     Loading Data:")
print(f"\n********************************************************************************************\n")

xTrain, yTrain = [], []
try:
    for i, batch in enumerate(tqdm(prepareData("train"), desc="Loading Training Data")):
        xTrain.extend(batch["x"].tolist())
        yTrain.extend(batch["y"].tolist())
        
    print(f"Training: {len(xTrain)} images, {len(yTrain)} labels")
    print("Training Data is Successfully Saved")
    
except Exception as e:
    print(f"Error Loading Training Data: {e}")

time.sleep(1)
try:
    del xTrain, yTrain
except Exception as e:
    print(f"Error Deleting Training Data: {e}")
        
print(f"\n********************************************************************************************\n")
gc.collect()

xVal, yVal = [], []
try:
    for i, batch in enumerate(tqdm(prepareData("valid"), desc="Loading Validation Data")):
        xVal.extend(batch["x"].tolist())
        yVal.extend(batch["y"].tolist())
        
    print(f"Validation: {len(xVal)} images, {len(yVal)} labels")
    print(f"Validation Data is Successfully Saved")
    
except Exception as e:
    print(f"Error Loading Validation Data: {e}") 

time.sleep(1)
try:
    del xVal, yVal
except Exception as e:
    print(f"Error Deleting Validation Data: {e}")
        
print(f"\n********************************************************************************************\n")
gc.collect()

xTest, yTest = [], []
try:
    for i, batch in enumerate(tqdm(prepareData("test"), desc="Loading Testing Data")):
        xTest.extend(batch["x"].tolist())
        yTest.extend(batch["y"].tolist())
        
    print(f"Testing: {len(xTest)} images, {len(yTest)} labels")
    print("Testing Data is Successfully Saved")
    
except Exception as e:
    print(f"Error Loading Testing Data: {e}")   
        
time.sleep(1)
try:
    del xTest, yTest
except Exception as e:
    print(f"Error Deleting Testing Data: {e}")

Downloading data from 'https://github.com/danielgatis/rembg/releases/download/v0.0.0/u2net.onnx' to file 'C:\Users\dell\.u2net\u2net.onnx'.



				     Loading Data:

********************************************************************************************



100%|###############################################| 176M/176M [00:00<?, ?B/s]
Loading Training Data: 0it [00:00, ?it/s]


Error Loading Training Data: 'NoneType' object is not iterable

********************************************************************************************



Loading Validation Data: 0it [00:00, ?it/s]


Error Loading Validation Data: 'NoneType' object is not iterable

********************************************************************************************



Loading Testing Data: 0it [00:00, ?it/s]


Error Loading Testing Data: 'NoneType' object is not iterable
