<h1 style='text-align:center; font-weight:bold'>Imports</h1>

In [6]:
import os
import gc
import cv2
import time
import pickle
import pandas as pd
from tqdm import tqdm

try:
    from kaggle.api.kaggle_api_extended import KaggleApi
except OSError:
    from kaggle.api.kaggle_api_extended import KaggleApi

<h1 style='text-align:center; font-weight:bold'>Downloading and Unpacking Data</h1>

In [7]:
def downloadData(path='./Dataset', name="fatihkgg/affectnet-yolo-format"):
    api = KaggleApi()
    if not os.path.exists('./Dataset'): os.makedirs(path)
    api.dataset_download_files(name, path=path, unzip=True)
    print("Dataset downloaded successfully")
    
# downloadData()

<h1 style='text-align:center; font-weight:bold'>Global Variables</h1>

In [8]:
LOAD_BATCH_SIZE = 500
CLASS_NAMES = [
    "Anger",
    "Contempt",
    "Disgust",
    "Fear",
    "Happy",
    "Neutral",
    "Sad",
    "Surprise",
]

<h1 style='text-align:center; font-weight:bold'>Preparing Data</h1>

In [9]:
def prepareData(type):
    imageDir = f"Dataset/YOLO_format/{type}/images"
    labelDir = f"Dataset/YOLO_format/{type}/labels"

    croppedImages = []
    classNames = []

    outputDir = f'Cropped Images/{type}'
    if not os.path.exists(outputDir):
        os.makedirs(outputDir)

    for className in CLASS_NAMES:
        classDir = os.path.join(outputDir, className)
        if not os.path.exists(classDir):
            os.makedirs(classDir)

    for imageFile in os.listdir(imageDir):
        if not (imageFile.endswith(".jpg") or imageFile.endswith(".png")):
            continue

        imagePath = os.path.join(imageDir, imageFile)
        labelFile = imageFile.replace(".jpg", ".txt").replace(".png", ".txt")
        labelPath = os.path.join(labelDir, labelFile)

        if not os.path.exists(labelPath):
            continue

        image = cv2.imread(imagePath)
        if image is None:
            print(f"Warning: Unable to read image {imagePath}. Skipping...")
            continue

        height, width = image.shape[0:2]

        with open(labelPath, "r") as f:
            for line in f.readlines():
                try:
                    data = line.strip().split()
                    classId = int(data[0])  
                    xCenter, yCenter, boxWidth, boxHeight = map(float, data[1:])

                    xMin = int((xCenter - boxWidth / 2) * width)
                    yMin = int((yCenter - boxHeight / 2) * height)
                    xMax = int((xCenter + boxWidth / 2) * width)
                    yMax = int((yCenter + boxHeight / 2) * height)

                    xMin, yMin = max(0, xMin), max(0, yMin)
                    xMax, yMax = min(width, xMax), min(height, yMax)

                    croppedFace = image[yMin:yMax, xMin:xMax]

                    if croppedFace.size == 0:
                        continue

                    croppedFace = cv2.resize(croppedFace, (416, 416))
                    croppedFace = (croppedFace / 255.0).astype("float32")

                    croppedImages.append(croppedFace)
                    classNames.append(CLASS_NAMES[classId])  

                    classDir = os.path.join(outputDir, CLASS_NAMES[classId])
                    fileName = f"{imageFile.split('.')[0]}_{len(croppedImages)}.{imageFile.split('.')[1]}"
                    savePath = os.path.join(classDir, fileName)

                    cv2.imwrite(savePath, croppedFace * 255.0)  

                except Exception as e:
                    print(f"Error processing line '{line}' in {labelPath}: {e}")

        if len(croppedImages) >= LOAD_BATCH_SIZE:
            df = pd.DataFrame({"x": croppedImages, "y": classNames})
            yield df
            croppedImages, classNames = [], []

    if croppedImages:
        df = pd.DataFrame({"x": croppedImages, "y": classNames})
        yield df


In [10]:
print(f"\n\t\t\t\t     Loading Data:")
print(f"\n********************************************************************************************\n")

xTrain, yTrain = [], []
try:
    for i, batch in enumerate(tqdm(prepareData("train"), desc="Loading Training Data")):
        xTrain.extend(batch["x"].tolist())
        yTrain.extend(batch["y"].tolist())
        
    print(f"Training: {len(xTrain)} images, {len(yTrain)} labels")
    print("Training Data is Successfully Saved")
    
except Exception as e:
    print(f"Error Loading Training Data: {e}")

time.sleep(1)
try:
    del xTrain, yTrain
except Exception as e:
    print(f"Error Deleting Training Data: {e}")
        
print(f"\n********************************************************************************************\n")
gc.collect()

xVal, yVal = [], []
try:
    for i, batch in enumerate(tqdm(prepareData("valid"), desc="Loading Validation Data")):
        xVal.extend(batch["x"].tolist())
        yVal.extend(batch["y"].tolist())
        
    print(f"Validation: {len(xVal)} images, {len(yVal)} labels")
    print(f"Validation Data is Successfully Saved")
    
except Exception as e:
    print(f"Error Loading Validation Data: {e}") 

time.sleep(1)
try:
    del xVal, yVal
except Exception as e:
    print(f"Error Deleting Validation Data: {e}")
        
print(f"\n********************************************************************************************\n")
gc.collect()

xTest, yTest = [], []
try:
    for i, batch in enumerate(tqdm(prepareData("test"), desc="Loading Testing Data")):
        xTest.extend(batch["x"].tolist())
        yTest.extend(batch["y"].tolist())
        
    print(f"Testing: {len(xTest)} images, {len(yTest)} labels")
    print("Testing Data is Successfully Saved")
    
except Exception as e:
    print(f"Error Loading Testing Data: {e}")   
        
time.sleep(1)
try:
    del xTest, yTest
except Exception as e:
    print(f"Error Deleting Testing Data: {e}")


				     Loading Data:

********************************************************************************************



Loading Training Data: 0it [00:00, ?it/s]

Loading Training Data: 35it [12:36, 21.61s/it]


Training: 17101 images, 17101 labels
Training Data is Successfully Saved

********************************************************************************************



Loading Validation Data: 11it [03:34, 19.53s/it]


Validation: 5406 images, 5406 labels
Validation Data is Successfully Saved

********************************************************************************************



Loading Testing Data: 6it [01:50, 18.34s/it]


Testing: 2755 images, 2755 labels
Testing Data is Successfully Saved
