# Different Architectures Experiment

**მიზანი**: წინა მოდელიდან დავადგინეთ, რომ საუკეთესო მოდელი იყო 5 ლეიერიანი cnn + batchnor, ამიტომ ახლა მას დავუმატებ და ერთმანეთს შევადარებ attention, skipping და ამ ორის კომბინაციას.

**გამოყენებული მოდელები**:
- `five_layer_batchnorm_attention`
- `five_layer_batchnorm_skipping`
- `five_layer_batchnorm_combo`

**მოლოდინი**: five_layer_batchnorm_combo-ს უნდა ჰქონდეთ წესით საუკეთესო შედეგი.

In [1]:
from google.colab import drive
drive.mount('/content/drive')
import sys
sys.path.append('/content/drive/MyDrive/PURI/')
import torch

Mounted at /content/drive


## 1. იმპორტები

In [2]:
import torch
import torch.nn as nn
import importlib
import sys
from pathlib import Path

from train_models_temp.helper import (
    set_seed,
    get_device,
    print_model_info,
    display_my_result,
)

from train_models_temp.my_trains import(
        EmotionCNNTrainer,
        run_adv_arch
)
set_seed(42)
device = get_device()


Seed set to 42
Using GPU: Tesla T4


In [3]:
!pip install wandb
import wandb
wandb.login()



<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mkechik21[0m ([33mkechik21-free-university-of-tbilisi-[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

## 2. მოდელის ბილდინგი

In [4]:
def build_model(model_name, num_classes=7, input_channels=1):
    try:
        from my_models import conc_model
        model = conc_model(curr_model=model_name, num_classes=num_classes)
        return model
    except Exception as e:
        print(f"Error loading {model_name}: {e}")
        raise

print("Model worked")

Model worked


## 3. Data-ს მიმოხილვა და ლოუდინგი

In [5]:
from data.my_data_folder.my_data import get_complete_fer_init, show_plots, plot_class_dist

def load_data(batch_size=64, augment_type='none'):
    print("Loading FER2013  dataset...")
    train_loader, val_loader, test_df = get_complete_fer_init(
        batch_size=batch_size,
        num_workers=2,
        augment_type=augment_type
    )
    print("Data loaded correctly")
    print(f"Train size: {len(train_loader)}")
    print(f"Validation size: {len(val_loader)}")
    return train_loader, val_loader


train_loader, val_loader = load_data(batch_size=64)

Loading FER2013  dataset...
 Using FER2013 from MyDrive
Train : 28709
Validation : 3589
Test : 3589
Data loaded correctly
Train size: 449
Validation size: 57


## 4. თრეინინგის მახასიათებლები

In [6]:
config = {
    'learning_rate': 0.001,
    'num_epochs': 40,
    'patience': 10,
    'device': device,
    'seed': 42,
    'wandb_enabled': True,
    'experiment_name': 'advanced_arch_exp'
}
print("Training characteristics:")
for k, v in config.items():
    print(f"   {k}: {v}")

Training characteristics:
   learning_rate: 0.001
   num_epochs: 40
   patience: 10
   device: cuda
   seed: 42
   wandb_enabled: True
   experiment_name: advanced_arch_exp


## 5. მოდელების მახასიათებლების შედარება

In [7]:
curr_mods = ['5_layer_batchnorm_attention', '5_layer_batchnorm_skipping', '5_layer_batchnorm_combo']
print(f"Working on {len(curr_mods)} models for Different Difficult Architectures ")
print("Testing advanced methods implemented")
print("")

model_info = {}
prev_params = 0
for i, model_name in enumerate(curr_mods):
    try:
        from my_models import conc_model
        model = conc_model(curr_model=model_name, num_classes=7)
        total_params, trainable_params = print_model_info(model, model_name)


        if i > 0:  #pirveli modeli tu araa
            growth = total_params - prev_params
            growth_ratio = total_params / prev_params
            print(f"Added {growth:,} parameters ({growth_ratio:.1f}x growth from previous)")
        else:
            print("Basic Model")

        model_info[model_name] = {
            'total_params': total_params,
            'trainable_params': trainable_params,
        }

        prev_params = total_params
        print()
    except Exception as e:
        print(f"Modeli kvdeba help {model_name}: {e}")
        print("")



Working on 3 models for Different Difficult Architectures 
Testing advanced methods implemented

 5_layer_batchnorm_attention Info:
   Total parameters: 11,117,895
   Trainable parameters: 11,117,895
Basic Model

 5_layer_batchnorm_skipping Info:
   Total parameters: 11,250,247
   Trainable parameters: 11,250,247
Added 132,352 parameters (1.0x growth from previous)

 5_layer_batchnorm_combo Info:
   Total parameters: 11,292,007
   Trainable parameters: 11,292,007
Added 41,760 parameters (1.0x growth from previous)



## 6. ექსპერიმენტი

In [None]:
trainer = EmotionCNNTrainer(config)
print("")
print("Starting Difficult Architecture Experiment...")
results = run_adv_arch(trainer, train_loader, val_loader, build_model)

Trainer ready for cuda

Starting Difficult Architecture Experiment...
Experiment to compare advanced architectural combinations

Comparing 3 models...
Training 5_layer_batchnorm_attention...


logging epoch 0
  Epoch   0: Train 42.5%, Val 50.6%
logging epoch 3
  Epoch   3: Train 63.6%, Val 60.9%
logging epoch 6
  Epoch   6: Train 76.4%, Val 60.0%


## 7. მნინშვნელოვანი საბოლოო პარამეტრები და მათი დალოგვა

In [None]:
import matplotlib.pyplot as plt
import importlib
import train_models_temp.plotting
importlib.reload(train_models_temp.plotting)

from train_models_temp.plotting import (
    plot_training_curves,
    plot_validation_comparison,
    plot_parameter_effectiveness,
    plot_training_curves,
    plot_model_comparison
)



In [None]:
# Initialize wandb for summary metrics
wandb.init(project="advanced_architecture_experiment",
           name="experiment3_results_summary",
           tags=["summary", "arch_experiment", "experiment3"])

trainer_results = trainer.results
all_metrics = {}  # Collect all metrics to log at once

for model_name, result in trainer_results.items():
    overfitting_score = max(0, (result['final_train_acc'] - result['best_val_acc']) / 10)

    chem_metr = {
        "final_train_accuracy": result['final_train_acc'],
        "final_val_accuracy": result['best_val_acc'],
        "final_overfitting_score": overfitting_score,
        "total_parameters": result['parameters'],
        "training_time_minutes": result['training_time'] / 60,
        "epochs_trained": len(result['val_accs']),
        "best_epoch": result['val_accs'].index(max(result['val_accs'])) + 1,
        "model_name": model_name,
        "fit_status": result['actual_results']['fit_status'],
        "performance": result['actual_results']['performance'],
        "efficiency_status": result['actual_results']['efficiency_status'],
        "prediction": result['prediction']['prediction'],
        "reality": result['actual_results']['fit_analysis'],
        "prediction_correct": result['prediction_correct']
    }

    print(" ")
    print(f" {model_name.replace('_', ' ').title()} Results ")
    for key, value in chem_metr.items():
        if key != "model_name":
            print(f"{key.replace('_', ' ').title()}: {value}")

    for key, value in chem_metr.items():
        if key != "model_name":
            wandb_key = f"final_results/{model_name}/{key}"
            all_metrics[wandb_key] = value

wandb.log(all_metrics)

print(" Results processed and logged to wandb")
wandb.finish()

## 8. შედეგების ვიზუალიზაცია და დალოგვა

In [None]:
from train_models_temp.plotting import (
    plot_model_comparison,
    plot_metrics_over_epochs,
    plot_parameter_effectiveness
)



In [None]:
print("Working on Stage 3 plots")

wandb.init(project="advanced_architecture_experiment",
           name="advanced_summary",
           tags=["summary", "arch_exp", "stage3"],
           notes="Comparing advanced architecture methods (BatchNorm + Attention/Skip/Combo)")

# Plot 1: Model Comparison
plt.figure(figsize=(12, 8))
plot_model_comparison(trainer_results, "Advanced Architecture: BatchNorm+Attention vs BatchNorm+Skip vs BatchNorm+Combo")
wandb.log({"arch_comparison_bars": wandb.Image(plt.gcf())})
plt.close()

# Plot 2: Training Dynamics
plt.figure(figsize=(15, 10))
plot_metrics_over_epochs(trainer_results, "Training Dynamics: Advanced Architecture Combinations")
wandb.log({"training_dynamics_analysis": wandb.Image(plt.gcf())})
plt.close()

# Plot 3: Parameter Effectiveness
plt.figure(figsize=(10, 8))
plot_parameter_effectiveness(trainer_results, "Parameter Efficiency: Advanced Architecture Methods")
wandb.log({"parameter_effectiveness_scatter": wandb.Image(plt.gcf())})
plt.close()

print("Advanced architecture plots completed")
wandb.finish()

In [None]:
print("Plotting done ")

## 9. ჩემი დასკვნა და საბოლოო ანალიზი

In [None]:
attention_result = None
skipping_result = None
combo_result = None

for model_name, result in trainer_results.items():
    if model_name == '5_layer_batchnorm_attention':
        attention_result = {
            'best_val_acc': result['best_val_acc'],
            'final_train_acc': result['final_train_acc'],
            'total_params': result['parameters']
        }
    elif model_name == '5_layer_batchnorm_skipping':
        skipping_result = {
            'best_val_acc': result['best_val_acc'],
            'final_train_acc': result['final_train_acc'],
            'total_params': result['parameters']
        }
    elif model_name == '5_layer_batchnorm_combo':
        combo_result = {
            'best_val_acc': result['best_val_acc'],
            'final_train_acc': result['final_train_acc'],
            'total_params': result['parameters']
        }

all_results = {
    'BatchNorm+Attention': attention_result,
    'BatchNorm+Skipping': skipping_result,
    'BatchNorm+Combo': combo_result
}

best_architecture = max(all_results.items(), key=lambda x: x[1]['best_val_acc'])
print("The best model among the advanced architecture combinations is:")
print(f"{best_architecture[0]}: {best_architecture[1]['best_val_acc']:.1f}% validation accuracy")

print("")
print("All results:")
for arch_name, result in sorted(all_results.items(), key=lambda x: x[1]['best_val_acc'], reverse=True):
    print(f"{arch_name}: {result['best_val_acc']:.1f}% val acc, {result['total_params']:,} params")

In [None]:
# # Complete მოდელის შენახვა
# print("Saving best model: 5_layer_batchnorm_skipping")

# # მოდელის მოძებნა
# best_model = None
# # if hasattr(trainer, 'models') and '5_layer_batchnorm_skipping' in trainer.models:
# best_model = results[1]
# print("✓ Found trained model in trainer.models")
# # elif hasattr(trainer, 'best_models') and '5_layer_batchnorm_skipping' in trainer.best_models:
# #     best_model = trainer.best_models['5_layer_batchnorm_skipping']
# #     print("✓ Found trained model in trainer.best_models")
# # else:
# #     print("⚠️  Model not found, creating architecture only")
# #     best_model = build_model('5_layer_batchnorm_skipping', num_classes=7)

# # შენახვა
# # save_info = {
# #     'model_state_dict': best_model.state_dict(),
# #     'model_name': '5_layer_batchnorm_skipping',
# #     'validation_accuracy': 62.9,
# #     'train_accuracy': 98.3,
# #     'total_parameters': 11250247,
# #     'experiment': 'Advanced Architecture - Skip Connections',
# #     'final_achievement': 'Best model from systematic optimization (55.6% → 62.9%)'
# # }

# save_path = '/content/drive/MyDrive/PURI/my_models/my_best_model.pth'
# torch.save(trainer, save_path)

# print(f"✓ Model saved to: {save_path}")
# print("Ready for testing on new data!")