In [None]:
# Classification_Task1_LLM_C1C2_Control.py
# LLM controls c1, c2 parameters + provides particle position/velocity history

import os
import time
import json
import random
import requests
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset

# ==========================================
# 1. CONFIGURATION
# ==========================================
class Config:
    AVALAI_API_KEY = ""
    AVALAI_BASE_URL = "https://api.avalai.ir/v1/chat/completions"
    LLM_MODEL = "gpt-5.1"

    DATASET_PATH = "/kaggle/input/waste-classification-data/DATASET"
    IMG_SIZE = (128, 128)
    BATCH_SIZE = 64
    NUM_CLASSES = 2
    TRAIN_DATA_PERCENTAGE = 0.25

    POPULATION_SIZE = 5
    MAX_ITERATIONS = 8
    
    # Initial C1, C2 (LLM will update these)
    C1_INIT = 2.0
    C2_INIT = 2.0
    
    W_MAX = 0.9
    W_MIN = 0.4

    # Search Space
    MIN_LAYERS = 1
    MAX_LAYERS = 4
    MIN_FILTERS = 16
    MAX_FILTERS = 128

    EVAL_EPOCHS = 1
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ==========================================
# 2. ENHANCED MEMORY MODULE
# ==========================================
class OptimizationMemory:
    def __init__(self):
        self.history = []
        self.particle_states = []
        self.c1_c2_history = []
    
    def add_record(self, params, acc):
        self.history.append({'params': params, 'acc': acc})
    
    def add_particle_state(self, iteration, positions, velocities, fitness):
        state = {
            'iteration': iteration,
            'particles': []
        }
        for i in range(len(positions)):
            state['particles'].append({
                'id': i,
                'position': [round(positions[i][0], 2), round(positions[i][1], 2)],
                'velocity': [round(velocities[i][0], 3), round(velocities[i][1], 3)],
                'fitness': round(fitness[i], 4)
            })
        self.particle_states.append(state)
        if len(self.particle_states) > 3:
            self.particle_states.pop(0)
    
    def add_c1_c2_record(self, c1, c2, iteration):
        self.c1_c2_history.append({'iter': iteration, 'c1': c1, 'c2': c2})
    
    def get_context_string(self):
        if not self.history:
            return "No history yet."
        sorted_hist = sorted(self.history, key=lambda x: x['acc'], reverse=True)
        best_3 = sorted_hist[:3]
        worst_3 = sorted_hist[-3:]
        
        context = "=== OPTIMIZATION MEMORY ===\n"
        context += "TOP 3 Configurations (Highest Accuracy):\n"
        for i, h in enumerate(best_3):
            context += f"  {i+1}. [L={h['params'][0]}, F={h['params'][1]}] -> Acc: {h['acc']:.4f}\n"
        context += "\nWORST 3 Configurations:\n"
        for i, h in enumerate(worst_3):
            context += f"  {i+1}. [L={h['params'][0]}, F={h['params'][1]}] -> Acc: {h['acc']:.4f}\n"
        return context
    
    def get_particle_state_string(self):
        if not self.particle_states:
            return "No particle state history."
        context = "\n=== PARTICLE STATE HISTORY ===\n"
        for state in self.particle_states:
            context += f"\n--- Iteration {state['iteration']} ---\n"
            for p in state['particles']:
                context += (f"  P{p['id']}: Pos=[L:{p['position'][0]:.1f}, F:{p['position'][1]:.0f}] "
                           f"Vel=[{p['velocity'][0]:+.2f}, {p['velocity'][1]:+.2f}] "
                           f"Acc:{p['fitness']:.4f}\n")
        return context
    
    def get_c1_c2_history_string(self):
        if not self.c1_c2_history:
            return "No c1/c2 history."
        context = "\n=== C1/C2 HISTORY ===\n"
        for h in self.c1_c2_history[-5:]:
            context += f"  Iter {h['iter']}: c1={h['c1']:.2f}, c2={h['c2']:.2f}\n"
        return context

# ==========================================
# 3. DATA LOADING
# ==========================================
def get_data_loaders():
    transform = transforms.Compose([
        transforms.Resize(Config.IMG_SIZE),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    train_dir = os.path.join(Config.DATASET_PATH, 'TRAIN')
    test_dir = os.path.join(Config.DATASET_PATH, 'TEST')

    try:
        train_dataset = datasets.ImageFolder(root=train_dir, transform=transform)
        test_dataset = datasets.ImageFolder(root=test_dir, transform=transform)
        
        if Config.TRAIN_DATA_PERCENTAGE < 1.0:
            indices = list(range(len(train_dataset)))
            split = int(np.floor(Config.TRAIN_DATA_PERCENTAGE * len(train_dataset)))
            subset_indices = random.sample(indices, split)
            train_dataset = Subset(train_dataset, subset_indices)
            print(f"  [Info] Using {len(train_dataset)} images ({Config.TRAIN_DATA_PERCENTAGE*100}%) for training.")
        
        train_loader = DataLoader(train_dataset, batch_size=Config.BATCH_SIZE, shuffle=True)
        test_loader = DataLoader(test_dataset, batch_size=Config.BATCH_SIZE, shuffle=False)
        return train_loader, test_loader
    except Exception as e:
        print(f"Error loading data: {e}")
        return None, None

# ==========================================
# 4. DYNAMIC CNN MODEL
# ==========================================
class DynamicCNN(nn.Module):
    def __init__(self, num_layers, num_filters):
        super(DynamicCNN, self).__init__()
        self.convs = nn.ModuleList()
        in_channels = 3
        
        for _ in range(int(num_layers)):
            self.convs.append(nn.Conv2d(in_channels, int(num_filters), 3, 1, 1))
            self.convs.append(nn.MaxPool2d(2, 2))
            in_channels = int(num_filters)
        
        final_dim = Config.IMG_SIZE[0]
        for _ in range(int(num_layers)):
            final_dim = final_dim // 2
        if final_dim < 1:
            final_dim = 1
            
        self.flatten_size = in_channels * final_dim * final_dim
        self.fc1 = nn.Linear(self.flatten_size, 64)
        self.fc2 = nn.Linear(64, Config.NUM_CLASSES)

    def forward(self, x):
        for layer in self.convs:
            if isinstance(layer, nn.Conv2d):
                x = F.relu(layer(x))
            else:
                x = layer(x)
        x = x.view(-1, self.flatten_size)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# ==========================================
# 5. LLM INTERFACE (Returns c1, c2 + suggestions)
# ==========================================
def get_llm_suggestions_with_params(memory_ctx, particle_ctx, c1c2_ctx, current_c1, current_c2):
    prompt = f"""
You are an expert in PSO Hyperparameter Optimization for CNN Image Classification.

OBJECTIVE: Maximize Classification Accuracy (Organic vs Recyclable waste).

SEARCH SPACE:
- Layers: {Config.MIN_LAYERS} to {Config.MAX_LAYERS}
- Filters: {Config.MIN_FILTERS} to {Config.MAX_FILTERS}

CURRENT PSO PARAMETERS:
- c1 (cognitive): {current_c1:.2f}
- c2 (social): {current_c2:.2f}

{memory_ctx}

{particle_ctx}

{c1c2_ctx}

=== YOUR TASKS ===

1. ANALYZE the particle velocities:
   - If velocities are too high (particles overshooting), suggest LOWER c1/c2.
   - If velocities are too low (stagnation), suggest HIGHER c1/c2.
   - Typical range: c1, c2 ‚àà [1.0, 3.0]

2. SUGGEST 2 new configurations [layers, filters] based on the history.

3. RECOMMEND new c1 and c2 values.

OUTPUT FORMAT (JSON only):
{{
    "analysis": "Brief reasoning about velocity patterns...",
    "suggestions": [[layers1, filters1], [layers2, filters2]],
    "recommended_c1": float,
    "recommended_c2": float
}}
"""
    
    try:
        print("  [LLM] Consulting for suggestions + c1/c2 tuning...")
        resp = requests.post(
            Config.AVALAI_BASE_URL,
            headers={"Authorization": f"Bearer {Config.AVALAI_API_KEY}", "Content-Type": "application/json"},
            json={"model": Config.LLM_MODEL, "messages": [{"role": "user", "content": prompt}], "temperature": 0.5},
            timeout=30
        )
        
        if resp.status_code == 200:
            content = resp.json()['choices'][0]['message']['content']
            content = content.replace("```json", "").replace("```", "").strip()
            data = json.loads(content)
            
            print(f"  [LLM Analysis]: {data.get('analysis', 'N/A')}")
            
            suggestions = data.get("suggestions", [])
            new_c1 = np.clip(float(data.get("recommended_c1", current_c1)), 1.0, 3.0)
            new_c2 = np.clip(float(data.get("recommended_c2", current_c2)), 1.0, 3.0)
            
            return suggestions, new_c1, new_c2
        else:
            print(f"  [LLM Error] Status: {resp.status_code}")
            return [], current_c1, current_c2
    except Exception as e:
        print(f"  [LLM Failed]: {e}")
        return [], current_c1, current_c2

# ==========================================
# 6. PSO WITH LLM-CONTROLLED C1/C2
# ==========================================
class LLMControlledPSO:
    def __init__(self, train_loader, test_loader):
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.memory = OptimizationMemory()
        
        self.positions = np.zeros((Config.POPULATION_SIZE, 2))
        self.positions[:, 0] = np.random.uniform(Config.MIN_LAYERS, Config.MAX_LAYERS, Config.POPULATION_SIZE)
        self.positions[:, 1] = np.random.uniform(Config.MIN_FILTERS, Config.MAX_FILTERS, Config.POPULATION_SIZE)
        
        self.velocities = np.zeros((Config.POPULATION_SIZE, 2))
        self.pbest_pos = self.positions.copy()
        self.pbest_val = np.zeros(Config.POPULATION_SIZE)
        self.gbest_pos = np.zeros(2)
        self.gbest_val = 0.0
        
        self.c1 = Config.C1_INIT
        self.c2 = Config.C2_INIT

    def evaluate(self, pos):
        L = int(np.clip(pos[0], Config.MIN_LAYERS, Config.MAX_LAYERS))
        F_num = int(np.clip(pos[1], Config.MIN_FILTERS, Config.MAX_FILTERS))
        print(f"    > Eval: L={L}, F={F_num}...", end=" ")
        
        try:
            model = DynamicCNN(L, F_num).to(Config.DEVICE)
            opt = optim.Adam(model.parameters(), lr=0.001)
            model.train()
            for img, lbl in self.train_loader:
                opt.zero_grad()
                F.cross_entropy(model(img.to(Config.DEVICE)), lbl.to(Config.DEVICE)).backward()
                opt.step()
            
            model.eval()
            corr, tot = 0, 0
            with torch.no_grad():
                for img, lbl in self.test_loader:
                    out = model(img.to(Config.DEVICE))
                    corr += (out.argmax(1) == lbl.to(Config.DEVICE)).sum().item()
                    tot += lbl.size(0)
            acc = corr / tot
            print(f"Acc: {acc:.4f}")
            self.memory.add_record([L, F_num], acc)
            return acc
        except Exception as e:
            print(f"Error: {e}")
            return 0.0

    def run(self):
        print("\n=== Task 1: LLM-Controlled PSO (c1/c2 + Particle History) ===")
        
        fitness = np.array([self.evaluate(p) for p in self.positions])
        self.pbest_val = fitness.copy()
        best_idx = fitness.argmax()
        self.gbest_val = fitness[best_idx]
        self.gbest_pos = self.positions[best_idx].copy()
        
        for iteration in range(Config.MAX_ITERATIONS):
            w = Config.W_MAX - ((Config.W_MAX - Config.W_MIN) * iteration / Config.MAX_ITERATIONS)
            print(f"\nIter {iteration+1}/{Config.MAX_ITERATIONS} | Best Acc: {self.gbest_val:.4f} | c1={self.c1:.2f}, c2={self.c2:.2f}")
            
            self.memory.add_particle_state(iteration, self.positions, self.velocities, fitness)
            
            # LLM intervention
            if iteration in [2, 5]:
                mem_ctx = self.memory.get_context_string()
                particle_ctx = self.memory.get_particle_state_string()
                c1c2_ctx = self.memory.get_c1_c2_history_string()
                
                suggestions, new_c1, new_c2 = get_llm_suggestions_with_params(
                    mem_ctx, particle_ctx, c1c2_ctx, self.c1, self.c2
                )
                
                if new_c1 != self.c1 or new_c2 != self.c2:
                    print(f"  [LLM] Updating: c1={self.c1:.2f}->{new_c1:.2f}, c2={self.c2:.2f}->{new_c2:.2f}")
                    self.c1, self.c2 = new_c1, new_c2
                    self.memory.add_c1_c2_record(self.c1, self.c2, iteration)
                
                if suggestions:
                    worst_indices = np.argsort(fitness)[:len(suggestions)]
                    for idx, sug in zip(worst_indices, suggestions):
                        print(f"  [LLM] Replacing P{idx} -> {sug}")
                        self.positions[idx] = np.array(sug)
                        self.velocities[idx] = np.zeros(2)
                        new_fit = self.evaluate(self.positions[idx])
                        fitness[idx] = new_fit
                        if new_fit > self.gbest_val:
                            self.gbest_val = new_fit
                            self.gbest_pos = self.positions[idx].copy()
                            print(f"  *** LLM FOUND NEW GLOBAL BEST! ***")
            
            # PSO update
            for i in range(Config.POPULATION_SIZE):
                r1, r2 = np.random.rand(2), np.random.rand(2)
                social_term = 0 if i == Config.POPULATION_SIZE - 1 else self.c2 * r2 * (self.gbest_pos - self.positions[i])
                cognitive_term = self.c1 * r1 * (self.pbest_pos[i] - self.positions[i])
                
                self.velocities[i] = w * self.velocities[i] + cognitive_term + social_term
                self.positions[i] += self.velocities[i]
                self.positions[i, 0] = np.clip(self.positions[i, 0], Config.MIN_LAYERS, Config.MAX_LAYERS)
                self.positions[i, 1] = np.clip(self.positions[i, 1], Config.MIN_FILTERS, Config.MAX_FILTERS)
                
                acc = self.evaluate(self.positions[i])
                fitness[i] = acc
                
                if acc > self.pbest_val[i]:
                    self.pbest_val[i] = acc
                    self.pbest_pos[i] = self.positions[i].copy()
                if acc > self.gbest_val:
                    self.gbest_val = acc
                    self.gbest_pos = self.positions[i].copy()
        
        return self.gbest_pos, self.gbest_val

# ==========================================
# 7. MAIN
# ==========================================
def main():
    print("=" * 70)
    print("CLASSIFICATION TASK 1: LLM Controls c1/c2 + Particle History")
    print("=" * 70)
    
    train_loader, test_loader = get_data_loaders()
    if train_loader is None:
        return
    
    start = time.time()
    pso = LLMControlledPSO(train_loader, test_loader)
    best_pos, best_acc = pso.run()
    elapsed = time.time() - start
    
    print("\n" + "=" * 70)
    print(f"RESULT: Best Params = [L={int(best_pos[0])}, F={int(best_pos[1])}]")
    print(f"        Best Accuracy = {best_acc*100:.2f}%")
    print(f"        Time = {elapsed:.1f}s")
    print(f"        Final c1={pso.c1:.2f}, c2={pso.c2:.2f}")
    print("=" * 70)

if __name__ == "__main__":
    main()

CLASSIFICATION TASK 1: LLM Controls c1/c2 + Particle History
  [Info] Using 5641 images (25.0%) for training.

=== Task 1: LLM-Controlled PSO (c1/c2 + Particle History) ===
    > Eval: L=1, F=16... Acc: 0.8663
    > Eval: L=3, F=41... Acc: 0.8754
    > Eval: L=1, F=30... Acc: 0.8607
    > Eval: L=1, F=114... Acc: 0.7994
    > Eval: L=1, F=43... Acc: 0.8126

Iter 1/8 | Best Acc: 0.8754 | c1=2.00, c2=2.00
    > Eval: L=4, F=55... Acc: 0.8189
    > Eval: L=3, F=41... Acc: 0.8424
    > Eval: L=3, F=52... Acc: 0.8520
    > Eval: L=4, F=28... Acc: 0.8735
    > Eval: L=1, F=43... Acc: 0.7867

Iter 2/8 | Best Acc: 0.8754 | c1=2.00, c2=2.00
    > Eval: L=4, F=18... Acc: 0.8723
    > Eval: L=3, F=41... Acc: 0.8659
    > Eval: L=3, F=42... Acc: 0.8687
    > Eval: L=4, F=16... Acc: 0.8699
    > Eval: L=1, F=43... Acc: 0.8173

Iter 3/8 | Best Acc: 0.8754 | c1=2.00, c2=2.00
  [LLM] Consulting for suggestions + c1/c2 tuning...
  [LLM Analysis]: Velocities in filters dimension are large (e.g., -86.53,

In [None]:
# Classification_Task2_5D_SearchSpace.py
# Expanded search space: Layers, Filters, Learning Rate, Dropout, Epochs

import os
import time
import json
import random
import requests
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset

# ==========================================
# 1. CONFIGURATION (5D SEARCH SPACE)
# ==========================================
class Config:
    AVALAI_API_KEY = ""
    AVALAI_BASE_URL = "https://api.avalai.ir/v1/chat/completions"
    LLM_MODEL = "gpt-5.1"

    DATASET_PATH = "/kaggle/input/waste-classification-data/DATASET"
    IMG_SIZE = (128, 128)
    BATCH_SIZE = 64
    NUM_CLASSES = 2
    TRAIN_DATA_PERCENTAGE = 0.25

    POPULATION_SIZE = 5
    MAX_ITERATIONS = 8
    C1 = 2.0
    C2 = 2.0
    W_MAX = 0.9
    W_MIN = 0.4

    # EXPANDED 5D SEARCH SPACE
    BOUNDS = {
        'layers_min': 1, 'layers_max': 4,
        'filters_min': 16, 'filters_max': 128,
        'lr_min': 0.0001, 'lr_max': 0.01,
        'dropout_min': 0.0, 'dropout_max': 0.5,
        'epochs_min': 1, 'epochs_max': 3
    }
    
    DIM_LAYERS = 0
    DIM_FILTERS = 1
    DIM_LR = 2
    DIM_DROPOUT = 3
    DIM_EPOCHS = 4
    NUM_DIMS = 5

    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ==========================================
# 2. MEMORY MODULE
# ==========================================
class OptimizationMemory:
    def __init__(self):
        self.history = []
    
    def add_record(self, params, acc):
        self.history.append({'params': params, 'acc': float(acc)})
    
    def get_context_string(self):
        if not self.history:
            return "No history yet."
        sorted_hist = sorted(self.history, key=lambda x: x['acc'], reverse=True)
        best_5 = sorted_hist[:5]
        worst_3 = sorted_hist[-3:]
        
        context = "=== OPTIMIZATION MEMORY (5D) ===\n"
        context += "TOP 5 Configurations:\n"
        for i, h in enumerate(best_5):
            p = h['params']
            context += f"  {i+1}. [L={p[0]}, F={p[1]}, LR={p[2]:.4f}, Drop={p[3]:.2f}, Ep={p[4]}] -> Acc: {h['acc']:.4f}\n"
        context += "\nWORST 3:\n"
        for i, h in enumerate(worst_3):
            p = h['params']
            context += f"  {i+1}. [L={p[0]}, F={p[1]}, LR={p[2]:.4f}, Drop={p[3]:.2f}, Ep={p[4]}] -> Acc: {h['acc']:.4f}\n"
        return context

# ==========================================
# 3. DATA LOADING
# ==========================================
def get_data_loaders():
    transform = transforms.Compose([
        transforms.Resize(Config.IMG_SIZE),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    train_dir = os.path.join(Config.DATASET_PATH, 'TRAIN')
    test_dir = os.path.join(Config.DATASET_PATH, 'TEST')

    try:
        train_dataset = datasets.ImageFolder(root=train_dir, transform=transform)
        test_dataset = datasets.ImageFolder(root=test_dir, transform=transform)
        
        if Config.TRAIN_DATA_PERCENTAGE < 1.0:
            indices = list(range(len(train_dataset)))
            split = int(np.floor(Config.TRAIN_DATA_PERCENTAGE * len(train_dataset)))
            subset_indices = random.sample(indices, split)
            train_dataset = Subset(train_dataset, subset_indices)
            print(f"  [Info] Using {len(train_dataset)} images for training.")
        
        train_loader = DataLoader(train_dataset, batch_size=Config.BATCH_SIZE, shuffle=True)
        test_loader = DataLoader(test_dataset, batch_size=Config.BATCH_SIZE, shuffle=False)
        return train_loader, test_loader
    except Exception as e:
        print(f"Error: {e}")
        return None, None

# ==========================================
# 4. DYNAMIC CNN MODEL (with Dropout)
# ==========================================
class DynamicCNN(nn.Module):
    def __init__(self, num_layers, num_filters, dropout_rate):
        super(DynamicCNN, self).__init__()
        self.convs = nn.ModuleList()
        in_channels = 3
        
        for _ in range(int(num_layers)):
            self.convs.append(nn.Conv2d(in_channels, int(num_filters), 3, 1, 1))
            self.convs.append(nn.MaxPool2d(2, 2))
            in_channels = int(num_filters)
        
        final_dim = Config.IMG_SIZE[0]
        for _ in range(int(num_layers)):
            final_dim = final_dim // 2
        if final_dim < 1:
            final_dim = 1
            
        self.flatten_size = in_channels * final_dim * final_dim
        self.dropout = nn.Dropout(dropout_rate)
        self.fc1 = nn.Linear(self.flatten_size, 64)
        self.fc2 = nn.Linear(64, Config.NUM_CLASSES)

    def forward(self, x):
        for layer in self.convs:
            if isinstance(layer, nn.Conv2d):
                x = F.relu(layer(x))
            else:
                x = layer(x)
        x = x.view(-1, self.flatten_size)
        x = self.dropout(x)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# ==========================================
# 5. LLM INTERFACE (5D)
# ==========================================
def get_llm_suggestions_5d(memory_ctx):
    prompt = f"""
You are an expert in PSO Hyperparameter Optimization for CNN Image Classification.

OBJECTIVE: Maximize Accuracy (Organic vs Recyclable waste).

SEARCH SPACE (5 Dimensions):
1. Layers: {Config.BOUNDS['layers_min']} to {Config.BOUNDS['layers_max']}
2. Filters: {Config.BOUNDS['filters_min']} to {Config.BOUNDS['filters_max']}
3. Learning Rate: {Config.BOUNDS['lr_min']} to {Config.BOUNDS['lr_max']}
4. Dropout: {Config.BOUNDS['dropout_min']} to {Config.BOUNDS['dropout_max']}
5. Epochs: {Config.BOUNDS['epochs_min']} to {Config.BOUNDS['epochs_max']}

{memory_ctx}

TASK: Suggest 2 new configurations based on patterns.

OUTPUT FORMAT (JSON only):
{{
    "analysis": "Brief reasoning...",
    "suggestions": [
        [layers1, filters1, lr1, dropout1, epochs1],
        [layers2, filters2, lr2, dropout2, epochs2]
    ]
}}
"""
    
    try:
        print("  [LLM] Consulting for 5D suggestions...")
        resp = requests.post(
            Config.AVALAI_BASE_URL,
            headers={"Authorization": f"Bearer {Config.AVALAI_API_KEY}", "Content-Type": "application/json"},
            json={"model": Config.LLM_MODEL, "messages": [{"role": "user", "content": prompt}], "temperature": 0.6},
            timeout=30
        )
        
        if resp.status_code == 200:
            content = resp.json()['choices'][0]['message']['content']
            content = content.replace("```json", "").replace("```", "").strip()
            data = json.loads(content)
            print(f"  [LLM Analysis]: {data.get('analysis', 'N/A')}")
            return data.get("suggestions", [])
        return []
    except Exception as e:
        print(f"  [LLM Failed]: {e}")
        return []

# ==========================================
# 6. 5D PSO
# ==========================================
class Expanded5DPSO:
    def __init__(self, train_loader, test_loader, use_llm=True):
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.use_llm = use_llm
        self.memory = OptimizationMemory()
        
        self.positions = np.zeros((Config.POPULATION_SIZE, Config.NUM_DIMS))
        self.positions[:, Config.DIM_LAYERS] = np.random.uniform(Config.BOUNDS['layers_min'], Config.BOUNDS['layers_max'], Config.POPULATION_SIZE)
        self.positions[:, Config.DIM_FILTERS] = np.random.uniform(Config.BOUNDS['filters_min'], Config.BOUNDS['filters_max'], Config.POPULATION_SIZE)
        self.positions[:, Config.DIM_LR] = np.random.uniform(Config.BOUNDS['lr_min'], Config.BOUNDS['lr_max'], Config.POPULATION_SIZE)
        self.positions[:, Config.DIM_DROPOUT] = np.random.uniform(Config.BOUNDS['dropout_min'], Config.BOUNDS['dropout_max'], Config.POPULATION_SIZE)
        self.positions[:, Config.DIM_EPOCHS] = np.random.uniform(Config.BOUNDS['epochs_min'], Config.BOUNDS['epochs_max'], Config.POPULATION_SIZE)
        
        self.velocities = np.zeros((Config.POPULATION_SIZE, Config.NUM_DIMS))
        self.pbest_pos = self.positions.copy()
        self.pbest_val = np.zeros(Config.POPULATION_SIZE)
        self.gbest_pos = np.zeros(Config.NUM_DIMS)
        self.gbest_val = 0.0

    def clip_position(self, pos):
        pos[Config.DIM_LAYERS] = np.clip(pos[Config.DIM_LAYERS], Config.BOUNDS['layers_min'], Config.BOUNDS['layers_max'])
        pos[Config.DIM_FILTERS] = np.clip(pos[Config.DIM_FILTERS], Config.BOUNDS['filters_min'], Config.BOUNDS['filters_max'])
        pos[Config.DIM_LR] = np.clip(pos[Config.DIM_LR], Config.BOUNDS['lr_min'], Config.BOUNDS['lr_max'])
        pos[Config.DIM_DROPOUT] = np.clip(pos[Config.DIM_DROPOUT], Config.BOUNDS['dropout_min'], Config.BOUNDS['dropout_max'])
        pos[Config.DIM_EPOCHS] = np.clip(pos[Config.DIM_EPOCHS], Config.BOUNDS['epochs_min'], Config.BOUNDS['epochs_max'])
        return pos

    def decode_position(self, pos):
        return {
            'layers': int(np.round(pos[Config.DIM_LAYERS])),
            'filters': int(np.round(pos[Config.DIM_FILTERS])),
            'lr': float(pos[Config.DIM_LR]),
            'dropout': float(pos[Config.DIM_DROPOUT]),
            'epochs': int(np.round(pos[Config.DIM_EPOCHS]))
        }

    def evaluate(self, position):
        params = self.decode_position(position)
        print(f"    > Eval: L={params['layers']}, F={params['filters']}, LR={params['lr']:.4f}, Drop={params['dropout']:.2f}, Ep={params['epochs']}...", end=" ")
        
        try:
            model = DynamicCNN(params['layers'], params['filters'], params['dropout']).to(Config.DEVICE)
            opt = optim.Adam(model.parameters(), lr=params['lr'])
            
            model.train()
            for _ in range(params['epochs']):
                for img, lbl in self.train_loader:
                    opt.zero_grad()
                    F.cross_entropy(model(img.to(Config.DEVICE)), lbl.to(Config.DEVICE)).backward()
                    opt.step()
            
            model.eval()
            corr, tot = 0, 0
            with torch.no_grad():
                for img, lbl in self.test_loader:
                    out = model(img.to(Config.DEVICE))
                    corr += (out.argmax(1) == lbl.to(Config.DEVICE)).sum().item()
                    tot += lbl.size(0)
            acc = corr / tot
            print(f"Acc: {acc:.4f}")
            self.memory.add_record([params['layers'], params['filters'], params['lr'], params['dropout'], params['epochs']], acc)
            return acc
        except Exception as e:
            print(f"Error: {e}")
            return 0.0

    def run(self):
        mode = "LLM-Enhanced" if self.use_llm else "Vanilla"
        print(f"\n=== Starting 5D {mode} PSO ===")
        
        fitness = np.array([self.evaluate(p) for p in self.positions])
        self.pbest_val = fitness.copy()
        best_idx = fitness.argmax()
        self.gbest_val = fitness[best_idx]
        self.gbest_pos = self.positions[best_idx].copy()
        
        for iteration in range(Config.MAX_ITERATIONS):
            w = Config.W_MAX - ((Config.W_MAX - Config.W_MIN) * iteration / Config.MAX_ITERATIONS)
            print(f"\nIter {iteration+1}/{Config.MAX_ITERATIONS} | Best Acc: {self.gbest_val:.4f}")
            
            if self.use_llm and iteration in [2, 5]:
                suggestions = get_llm_suggestions_5d(self.memory.get_context_string())
                if suggestions:
                    worst_indices = np.argsort(fitness)[:len(suggestions)]
                    for idx, sug in zip(worst_indices, suggestions):
                        print(f"  [LLM] Replacing P{idx} -> {sug}")
                        self.positions[idx] = np.array(sug)
                        self.velocities[idx] = np.zeros(Config.NUM_DIMS)
                        new_fit = self.evaluate(self.positions[idx])
                        fitness[idx] = new_fit
                        if new_fit > self.gbest_val:
                            self.gbest_val = new_fit
                            self.gbest_pos = self.positions[idx].copy()
                            print(f"  *** NEW GLOBAL BEST! ***")
            
            for i in range(Config.POPULATION_SIZE):
                r1 = np.random.rand(Config.NUM_DIMS)
                r2 = np.random.rand(Config.NUM_DIMS)
                
                social_term = 0 if i == Config.POPULATION_SIZE - 1 else Config.C2 * r2 * (self.gbest_pos - self.positions[i])
                cognitive_term = Config.C1 * r1 * (self.pbest_pos[i] - self.positions[i])
                
                self.velocities[i] = w * self.velocities[i] + cognitive_term + social_term
                self.positions[i] += self.velocities[i]
                self.positions[i] = self.clip_position(self.positions[i])
                
                acc = self.evaluate(self.positions[i])
                fitness[i] = acc
                
                if acc > self.pbest_val[i]:
                    self.pbest_val[i] = acc
                    self.pbest_pos[i] = self.positions[i].copy()
                if acc > self.gbest_val:
                    self.gbest_val = acc
                    self.gbest_pos = self.positions[i].copy()
        
        return self.gbest_pos, self.gbest_val

# ==========================================
# 7. MAIN
# ==========================================
def main():
    print("=" * 90)
    print("CLASSIFICATION TASK 2: Expanded 5D Search Space")
    print("=" * 90)
    
    train_loader, test_loader = get_data_loaders()
    if train_loader is None:
        return
    
    # Vanilla 5D PSO
    print("\n" + "-" * 50)
    print("Running VANILLA 5D PSO...")
    start = time.time()
    pso_v = Expanded5DPSO(train_loader, test_loader, use_llm=False)
    best_v, acc_v = pso_v.run()
    time_v = time.time() - start
    
    # LLM-Enhanced 5D PSO
    print("\n" + "-" * 50)
    print("Running LLM-ENHANCED 5D PSO...")
    start = time.time()
    pso_l = Expanded5DPSO(train_loader, test_loader, use_llm=True)
    best_l, acc_l = pso_l.run()
    time_l = time.time() - start
    
    # Results
    print("\n" + "=" * 90)
    print(f"{'METHOD':<20} | {'L':<3} | {'F':<4} | {'LR':<8} | {'DROP':<5} | {'EP':<3} | {'ACC':<8} | {'TIME'}")
    print("-" * 90)
    
    v_p = pso_v.decode_position(best_v)
    l_p = pso_l.decode_position(best_l)
    
    print(f"{'Vanilla 5D PSO':<20} | {v_p['layers']:<3} | {v_p['filters']:<4} | {v_p['lr']:<8.4f} | {v_p['dropout']:<5.2f} | {v_p['epochs']:<3} | {acc_v*100:<7.2f}% | {time_v:.1f}s")
    print(f"{'LLM-Enhanced 5D':<20} | {l_p['layers']:<3} | {l_p['filters']:<4} | {l_p['lr']:<8.4f} | {l_p['dropout']:<5.2f} | {l_p['epochs']:<3} | {acc_l*100:<7.2f}% | {time_l:.1f}s")
    print("=" * 90)

if __name__ == "__main__":
    main()

CLASSIFICATION TASK 2: Expanded 5D Search Space
  [Info] Using 5641 images for training.

--------------------------------------------------
Running VANILLA 5D PSO...

=== Starting 5D Vanilla PSO ===
    > Eval: L=4, F=70, LR=0.0059, Drop=0.13, Ep=1... Acc: 0.8615
    > Eval: L=4, F=81, LR=0.0098, Drop=0.37, Ep=1... Acc: 0.5575
    > Eval: L=2, F=55, LR=0.0004, Drop=0.43, Ep=2... Acc: 0.8715
    > Eval: L=3, F=81, LR=0.0049, Drop=0.19, Ep=3... Acc: 0.7680
    > Eval: L=2, F=121, LR=0.0046, Drop=0.01, Ep=1... Acc: 0.8026

Iter 1/8 | Best Acc: 0.8715
    > Eval: L=3, F=69, LR=0.0001, Drop=0.14, Ep=2... Acc: 0.8830
    > Eval: L=3, F=74, LR=0.0001, Drop=0.23, Ep=2... Acc: 0.8794
    > Eval: L=3, F=77, LR=0.0001, Drop=0.10, Ep=2... Acc: 0.8834
    > Eval: L=3, F=79, LR=0.0001, Drop=0.19, Ep=2... Acc: 0.8743
    > Eval: L=2, F=121, LR=0.0046, Drop=0.01, Ep=1... Acc: 0.8703

Iter 2/8 | Best Acc: 0.8834
    > Eval: L=3, F=70, LR=0.0001, Drop=0.10, Ep=3... Acc: 0.8735
    > Eval: L=2, F=73, LR

In [1]:
# Classification_Task3_W_Strategies.py
# Compare multiple inertia weight strategies from the paper

import os
import time
import random
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset

# ==========================================
# 1. CONFIGURATION
# ==========================================
class Config:
    DATASET_PATH = "/kaggle/input/waste-classification-data/DATASET"
    IMG_SIZE = (128, 128)
    BATCH_SIZE = 64
    NUM_CLASSES = 2
    TRAIN_DATA_PERCENTAGE = 0.25

    POPULATION_SIZE = 5
    MAX_ITERATIONS = 8
    C1 = 2.0
    C2 = 2.0
    W_MAX = 1.0
    W_MIN = 0.0

    MIN_LAYERS = 1
    MAX_LAYERS = 4
    MIN_FILTERS = 16
    MAX_FILTERS = 128

    EVAL_EPOCHS = 1
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ==========================================
# 2. DATA LOADING
# ==========================================
def get_data_loaders():
    transform = transforms.Compose([
        transforms.Resize(Config.IMG_SIZE),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    train_dir = os.path.join(Config.DATASET_PATH, 'TRAIN')
    test_dir = os.path.join(Config.DATASET_PATH, 'TEST')

    try:
        train_dataset = datasets.ImageFolder(root=train_dir, transform=transform)
        test_dataset = datasets.ImageFolder(root=test_dir, transform=transform)
        
        if Config.TRAIN_DATA_PERCENTAGE < 1.0:
            indices = list(range(len(train_dataset)))
            split = int(np.floor(Config.TRAIN_DATA_PERCENTAGE * len(train_dataset)))
            subset_indices = random.sample(indices, split)
            train_dataset = Subset(train_dataset, subset_indices)
            print(f"  [Info] Using {len(train_dataset)} images for training.")
        
        train_loader = DataLoader(train_dataset, batch_size=Config.BATCH_SIZE, shuffle=True)
        test_loader = DataLoader(test_dataset, batch_size=Config.BATCH_SIZE, shuffle=False)
        return train_loader, test_loader
    except Exception as e:
        print(f"Error: {e}")
        return None, None

# ==========================================
# 3. MODEL
# ==========================================
class DynamicCNN(nn.Module):
    def __init__(self, num_layers, num_filters):
        super(DynamicCNN, self).__init__()
        self.convs = nn.ModuleList()
        in_channels = 3
        
        for _ in range(int(num_layers)):
            self.convs.append(nn.Conv2d(in_channels, int(num_filters), 3, 1, 1))
            self.convs.append(nn.MaxPool2d(2, 2))
            in_channels = int(num_filters)
        
        final_dim = Config.IMG_SIZE[0]
        for _ in range(int(num_layers)):
            final_dim = final_dim // 2
        if final_dim < 1:
            final_dim = 1
            
        self.flatten_size = in_channels * final_dim * final_dim
        self.fc1 = nn.Linear(self.flatten_size, 64)
        self.fc2 = nn.Linear(64, Config.NUM_CLASSES)

    def forward(self, x):
        for layer in self.convs:
            if isinstance(layer, nn.Conv2d):
                x = F.relu(layer(x))
            else:
                x = layer(x)
        x = x.view(-1, self.flatten_size)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# ==========================================
# 4. INERTIA WEIGHT STRATEGIES (FROM PAPER)
# ==========================================
class InertiaWeightStrategy:
    """
    Implements W strategies from:
    "A novel particle swarm optimization algorithm with adaptive inertia weight"
    """
    
    @staticmethod
    def linear_decreasing(iteration, max_iter, w_max=0.9, w_min=0.4):
        """W3: Standard linear decrease"""
        return w_max - ((w_max - w_min) * iteration / max_iter)
    
    @staticmethod
    def linear_increasing(iteration, max_iter, w_max=0.9, w_min=0.4):
        """W9: Linear increase"""
        return w_min + ((w_max - w_min) * iteration / max_iter)
    
    @staticmethod
    def random_inertia():
        """W2: Random inertia weight"""
        return 0.5 + np.random.rand() / 2
    
    @staticmethod
    def success_rate_adaptive(success_rate, w_max=1.0, w_min=0.0):
        """
        AIWPSO (Equation 20 from paper):
        w(t) = (w_max - w_min) * Ps(t) + w_min
        """
        return (w_max - w_min) * success_rate + w_min
    
    @staticmethod
    def rank_based(particle_rank, total_population, w_max=0.9, w_min=0.4):
        """
        W13: Rank-based inertia weight
        Better particles get lower w, worse particles get higher w
        """
        return w_min + (w_max - w_min) * (particle_rank / total_population)

# ==========================================
# 5. PSO WITH MULTIPLE W STRATEGIES
# ==========================================
class AdaptivePSO:
    def __init__(self, train_loader, test_loader, w_strategy='linear_decrease'):
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.w_strategy = w_strategy
        
        self.positions = np.zeros((Config.POPULATION_SIZE, 2))
        self.positions[:, 0] = np.random.uniform(Config.MIN_LAYERS, Config.MAX_LAYERS, Config.POPULATION_SIZE)
        self.positions[:, 1] = np.random.uniform(Config.MIN_FILTERS, Config.MAX_FILTERS, Config.POPULATION_SIZE)
        
        self.velocities = np.zeros((Config.POPULATION_SIZE, 2))
        self.pbest_pos = self.positions.copy()
        self.pbest_val = np.zeros(Config.POPULATION_SIZE)
        self.gbest_pos = np.zeros(2)
        self.gbest_val = 0.0
        
        self.prev_pbest_val = np.zeros(Config.POPULATION_SIZE)
        self.w_history = []

    def evaluate(self, pos):
        L = int(np.clip(np.round(pos[0]), Config.MIN_LAYERS, Config.MAX_LAYERS))
        F_num = int(np.clip(np.round(pos[1]), Config.MIN_FILTERS, Config.MAX_FILTERS))
        
        print(f"    > Eval: L={L}, F={F_num}...", end=" ")
        
        try:
            model = DynamicCNN(L, F_num).to(Config.DEVICE)
            opt = optim.Adam(model.parameters(), lr=0.001)
            
            model.train()
            for img, lbl in self.train_loader:
                opt.zero_grad()
                F.cross_entropy(model(img.to(Config.DEVICE)), lbl.to(Config.DEVICE)).backward()
                opt.step()
            
            model.eval()
            corr, tot = 0, 0
            with torch.no_grad():
                for img, lbl in self.test_loader:
                    out = model(img.to(Config.DEVICE))
                    corr += (out.argmax(1) == lbl.to(Config.DEVICE)).sum().item()
                    tot += lbl.size(0)
            
            acc = corr / tot
            print(f"Acc: {acc:.4f}")
            return acc
        except:
            return 0.0

    def calculate_success_rate(self):
        """Equation 17-18 from paper"""
        successes = 0
        for i in range(Config.POPULATION_SIZE):
            if self.pbest_val[i] > self.prev_pbest_val[i]:  # Maximization
                successes += 1
        return successes / Config.POPULATION_SIZE

    def get_inertia_weight(self, iteration, particle_idx=None, fitness=None):
        if self.w_strategy == 'linear_decrease':
            return InertiaWeightStrategy.linear_decreasing(iteration, Config.MAX_ITERATIONS)
        
        elif self.w_strategy == 'linear_increase':
            return InertiaWeightStrategy.linear_increasing(iteration, Config.MAX_ITERATIONS)
        
        elif self.w_strategy == 'random':
            return InertiaWeightStrategy.random_inertia()
        
        elif self.w_strategy == 'success_rate':
            success_rate = self.calculate_success_rate()
            return InertiaWeightStrategy.success_rate_adaptive(success_rate)
        
        elif self.w_strategy == 'rank_based':
            if fitness is not None and particle_idx is not None:
                ranks = np.argsort(np.argsort(-fitness))  # Higher is better
                return InertiaWeightStrategy.rank_based(ranks[particle_idx], Config.POPULATION_SIZE)
            return 0.7
        
        return 0.7

    def run(self):
        print(f"\n=== PSO with W Strategy: {self.w_strategy.upper()} ===")
        
        fitness = np.array([self.evaluate(p) for p in self.positions])
        self.pbest_val = fitness.copy()
        self.prev_pbest_val = fitness.copy()
        best_idx = fitness.argmax()
        self.gbest_val = fitness[best_idx]
        self.gbest_pos = self.positions[best_idx].copy()
        
        for iteration in range(Config.MAX_ITERATIONS):
            self.prev_pbest_val = self.pbest_val.copy()
            base_w = self.get_inertia_weight(iteration, fitness=fitness)
            
            if iteration > 0:
                success_rate = self.calculate_success_rate()
            else:
                success_rate = 1.0
            
            self.w_history.append(base_w)
            
            print(f"\nIter {iteration+1}/{Config.MAX_ITERATIONS} | Best Acc: {self.gbest_val:.4f} | W={base_w:.3f} | Success Rate={success_rate:.2f}")
            
            for i in range(Config.POPULATION_SIZE):
                if self.w_strategy == 'rank_based':
                    w = self.get_inertia_weight(iteration, particle_idx=i, fitness=fitness)
                else:
                    w = base_w
                
                r1, r2 = np.random.rand(2), np.random.rand(2)
                social_term = 0 if i == Config.POPULATION_SIZE - 1 else Config.C2 * r2 * (self.gbest_pos - self.positions[i])
                cognitive_term = Config.C1 * r1 * (self.pbest_pos[i] - self.positions[i])
                
                self.velocities[i] = w * self.velocities[i] + cognitive_term + social_term
                self.positions[i] += self.velocities[i]
                
                self.positions[i, 0] = np.clip(self.positions[i, 0], Config.MIN_LAYERS, Config.MAX_LAYERS)
                self.positions[i, 1] = np.clip(self.positions[i, 1], Config.MIN_FILTERS, Config.MAX_FILTERS)
                
                acc = self.evaluate(self.positions[i])
                fitness[i] = acc
                
                if acc > self.pbest_val[i]:
                    self.pbest_val[i] = acc
                    self.pbest_pos[i] = self.positions[i].copy()
                if acc > self.gbest_val:
                    self.gbest_val = acc
                    self.gbest_pos = self.positions[i].copy()
        
        return self.gbest_pos, self.gbest_val, self.w_history

# ==========================================
# 6. MAIN - COMPARE ALL STRATEGIES
# ==========================================
def main():
    print("=" * 100)
    print("CLASSIFICATION TASK 3: Compare Inertia Weight Strategies")
    print("Based on: 'A novel PSO algorithm with adaptive inertia weight'")
    print("=" * 100)
    
    train_loader, test_loader = get_data_loaders()
    if train_loader is None:
        return
    
    strategies = [
        'linear_decrease',
        'linear_increase',
        'random',
        'success_rate',
        'rank_based'
    ]
    
    results = []
    
    for strategy in strategies:
        print("\n" + "=" * 60)
        start = time.time()
        pso = AdaptivePSO(train_loader, test_loader, w_strategy=strategy)
        best_pos, best_acc, w_history = pso.run()
        elapsed = time.time() - start
        
        results.append({
            'strategy': strategy,
            'layers': int(np.round(best_pos[0])),
            'filters': int(np.round(best_pos[1])),
            'accuracy': best_acc,
            'time': elapsed,
            'avg_w': np.mean(w_history),
            'w_std': np.std(w_history)
        })
    
    # Print comparison
    print("\n" + "=" * 100)
    print("COMPARISON OF INERTIA WEIGHT STRATEGIES")
    print("=" * 100)
    print(f"{'STRATEGY':<20} | {'LAYERS':<6} | {'FILTERS':<7} | {'ACCURACY':<10} | {'TIME(s)':<8} | {'AVG W':<7} | {'W STD'}")
    print("-" * 100)
    
    for r in results:
        print(f"{r['strategy']:<20} | {r['layers']:<6} | {r['filters']:<7} | {r['accuracy']*100:<9.2f}% | {r['time']:<8.1f} | {r['avg_w']:<7.3f} | {r['w_std']:.3f}")
    
    print("=" * 100)
    
    best = max(results, key=lambda x: x['accuracy'])
    print(f"\nüèÜ BEST STRATEGY: {best['strategy'].upper()}")
    print(f"   Best Accuracy: {best['accuracy']*100:.2f}%")
    print(f"   Parameters: L={best['layers']}, F={best['filters']}")

if __name__ == "__main__":
    main()

CLASSIFICATION TASK 3: Compare Inertia Weight Strategies
Based on: 'A novel PSO algorithm with adaptive inertia weight'
  [Info] Using 5641 images for training.


=== PSO with W Strategy: LINEAR_DECREASE ===
    > Eval: L=2, F=58... Acc: 0.8524
    > Eval: L=3, F=29... Acc: 0.8766
    > Eval: L=3, F=103... Acc: 0.8548
    > Eval: L=3, F=28... Acc: 0.8508
    > Eval: L=3, F=24... Acc: 0.8424

Iter 1/8 | Best Acc: 0.8766 | W=0.900 | Success Rate=1.00
    > Eval: L=4, F=16... Acc: 0.8508
    > Eval: L=3, F=29... Acc: 0.8595
    > Eval: L=3, F=40... Acc: 0.8587
    > Eval: L=3, F=29... Acc: 0.8647
    > Eval: L=3, F=24... Acc: 0.8540

Iter 2/8 | Best Acc: 0.8766 | W=0.838 | Success Rate=0.00
    > Eval: L=2, F=22... Acc: 0.8687
    > Eval: L=3, F=29... Acc: 0.8603
    > Eval: L=3, F=16... Acc: 0.8695
    > Eval: L=3, F=30... Acc: 0.8002
    > Eval: L=3, F=24... Acc: 0.8683

Iter 3/8 | Best Acc: 0.8766 | W=0.775 | Success Rate=0.00
    > Eval: L=2, F=39... Acc: 0.8647
    > Eval: L=3, F=29.