In [7]:
import cv2
import pandas as pd

final_data_path = open("final_features.txt", "r")
final_data = pd.read_csv(final_data_path.read().strip())

Convert from dataset format to
LABEL, FEATURES
"LABEL1", [1,2,3,......]

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# --- 1. CREATE SAMPLE DATA (Replace with your actual data) ---
# Let's imagine you have 1000 samples, each with 10 features.
# And you have 3 possible classes (e.g., labels 0, 1, 2).
num_samples = 1000
num_features = 10
num_classes = 3

# features is your array, labels is your list of labels
features = np.random.rand(num_samples, num_features).astype(np.float32)
labels = np.random.randint(0, num_classes, size=num_samples)

# --- 2. PREPROCESS AND SPLIT DATA ---
# Split data into training (80%) and validation (20%) sets
X_train, X_val, y_train, y_val = train_test_split(features, labels, test_size=0.2, random_state=42)

# It's good practice to scale your features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val) # Use the same scaler from training

# --- 3. CREATE A CUSTOM PYTORCH DATASET ---
class MyDataset(Dataset):
    def __init__(self, features, labels):
        # Convert numpy arrays to PyTorch Tensors
        self.features = torch.tensor(features, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.long) # CrossEntropyLoss expects long tensors for labels

    def __len__(self):
        # Return the total number of samples
        return len(self.features)

    def __getitem__(self, idx):
        # Retrieve a sample and its label at a given index
        return self.features[idx], self.labels[idx]

# --- 4. CREATE DATALOADERS ---
# DataLoaders handle batching, shuffling, and loading data in parallel.
train_dataset = MyDataset(X_train, y_train)
val_dataset = MyDataset(X_val, y_val)

# A batch size of 32 or 64 is common
batch_size = 64
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)