In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split, ParameterGrid
from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, LabelEncoder
import os
from PIL import Image
from pickle import dump, load
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset


In [None]:
# Step 1: Load and preprocess images
def load_images(images_folder, save_file_to=None):
    X_original = []
    X = []
    y = []

    for subdir, dirs, files in os.walk(images_folder):
        for file in files:
            if file.endswith(('jpg', 'jpeg', 'png')):
                img_path = os.path.join(subdir, file)
                label = os.path.basename(subdir)

                image = Image.open(img_path).convert('L')  # Convert to grayscale
                image = image.resize((48, 48))  # Resize to 48x48
                X_original.append(np.array(image).flatten())
                X.append(np.array(image).flatten())
                y.append(label)

    if save_file_to:
        with open(save_file_to, "wb") as f:
            dump((X_original, X, y), f, protocol=5)

    return np.array(X_original), np.array(X), np.array(y)

images_folder = "../images"
dataset_file = "dataset_dump.pkl"


X_original, X, y = load_images(images_folder, save_file_to=dataset_file)


In [None]:
with open("dataset_dump.pkl", "rb") as f:
    X_original, X, y = load(f)

In [1]:

# Convert lists to NumPy arrays
X = np.array(X)
y = np.array(y)

NameError: name 'np' is not defined

In [None]:
# Preprocessing data function
def preprocessing_data(X, y, save_file_to=None):
    # Normalize pixel values
    X = X / 255.0  # Normalize to [0, 1]

    # Encode labels
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)

    # Standardize the data
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    if save_file_to:
        with open(save_file_to, "wb") as f:
            dump((X_scaled, y_encoded, label_encoder, scaler), f, protocol=5)

    return X_scaled, y_encoded, label_encoder, scaler

# Process the data and save
X_scaled, y_encoded, label_encoder, scaler = preprocessing_data(X, y, save_file_to="preprocessed_data.pkl")


In [None]:
# Split data into training and testing sets first
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)

# Further split training set into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)


In [None]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

In [None]:
class CustomDataset(Dataset):
    def __init__(self, X, y, transform=None):
        self.X = X
        self.y = y
        self.transform = transform

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        # Reshape the data as an image for visualization purposes
        image = self.X[idx].reshape(48, 48)  # Adjusted to original image size (48x48)
        
        if self.transform:
            image = self.transform(image)

        label = self.y[idx]
        return image.type(torch.float32), label 

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.RandomHorizontalFlip(p=0.2),  
    transforms.RandomRotation(5),      
    transforms.Normalize(mean=[0.5], std=[0.5])
])


# Validation/Test transform
test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

# Train and Validation DataLoader
train_dataset = CustomDataset(X_train, y_train, transform=transform)
val_dataset = CustomDataset(X_val, y_val, transform=test_transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


In [None]:
class SimpleNN(nn.Module):
    def __init__(self, input_size):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 512)  # Increased neurons
        self.fc2 = nn.Linear(512, 256)        # Added extra layer
        self.fc3 = nn.Linear(256, len(np.unique(y_encoded)))  # Output layer
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten the input
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))  # Apply relu after the second layer
        x = self.dropout(x)
        x = self.fc3(x)
        return x

In [None]:
# Initialize the model, loss function, and optimizer
criterion = nn.CrossEntropyLoss()

# Hyperparameter grid
param_grid = {
    'lr': [0.001, 0.0005, 0.0001],
    'dropout': [0.3],
    'batch_size': [32],
}

grid_search = ParameterGrid(param_grid)

best_val_loss = float('inf')  # To track the best validation loss
patience = 10
patience_counter = 0

# Iterate through each combination of hyperparameters
for params in grid_search:
    print(f"Training with params: {params}")
    
    # Initialize model with specific dropout
    model = SimpleNN(input_size=48 * 48)  # Updated to use original image size (48x48)
    
    # Optimizer with the specified learning rate
    optimizer = optim.Adam(model.parameters(), lr=params['lr'])
    
    # DataLoader with the specified batch size
    train_loader = DataLoader(train_dataset, batch_size=params['batch_size'], shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=params['batch_size'], shuffle=False)
    
    # Scheduler
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)

    # Training loop for the current parameter combination
    num_epochs = 100
    for epoch in range(num_epochs):
        # Training Phase
        model.train()
        train_loss = 0.0
        for images, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(images.view(images.size(0), -1))  # Flatten images for FC layer
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        
        # Validation Phase
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for images, labels in val_loader:
                outputs = model(images.view(images.size(0), -1))  # Flatten images for FC layer
                loss = criterion(outputs, labels)
                val_loss += loss.item()
        
        # Average losses
        train_loss /= len(train_loader)
        val_loss /= len(val_loader)

        scheduler.step()

        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
        
        # Early stopping based on validation loss
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
        
        if patience_counter >= patience:
            print("Early stopping triggered.")
            break

    print("Finished training with current parameter combination.\n")

    # Evaluation and prediction on the test set
    model.eval()
    with torch.no_grad():
        outputs = model(X_test_tensor)
        _, predicted = torch.max(outputs.data, 1)
        y_pred = predicted.numpy()

    # Calculate accuracy and print classification report
    accuracy = accuracy_score(y_test_tensor.numpy(), y_pred)
    print(f"Accuracy for params {params}: {accuracy:.2f}")
    print("Classification Report:")
    print(classification_report(y_test_tensor.numpy(), y_pred, target_names=label_encoder.classes_))
    print("-" * 50)


In [None]:
# Evaluation and prediction
model.eval()
with torch.no_grad():
    outputs = model(X_test_tensor)
    _, predicted = torch.max(outputs.data, 1)
    y_pred = predicted.numpy()

# Calculate accuracy and print classification report
accuracy = accuracy_score(y_test_tensor.numpy(), y_pred)
print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(classification_report(y_test_tensor.numpy(), y_pred, target_names=label_encoder.classes_))