In [1]:
import pandas as pd
import numpy as np
import json

with open(r'F:\up_git\off_plant_di_err_analyze_daok\results\all_models_comprehensive_results.json', 'r') as f:
    data = json.load(f)
data

{'LeNet': {'model_name': 'LeNet',
  'parameters': 9594494,
  'best_accuracy': 97.28385968142896,
  'final_accuracy': 97.2470306601602,
  'training_time_minutes': 57.51263257265091,
  'error_metrics': {'accuracy': 97.2470306601602,
   'error_rate': 2.752969339839794,
   'total_errors': 299,
   'total_samples': 10861,
   'avg_confidence_correct': 0.9873046875,
   'avg_confidence_incorrect': 0.751953125,
   'confidence_gap': 0.2353515625,
   'high_confidence_errors': 132,
   'worst_classes': [['Corn_(maize)___Cercospora_leaf_spot Gray_leaf_spot',
     20.0],
    ['Apple___Apple_scab', 13.63636363636364],
    ['Tomato___Early_blight', 13.259668508287291],
    ['Potato___healthy', 12.903225806451616],
    ['Tomato___Late_blight', 8.641975308641975]],
   'most_confused_pairs': [['Corn_(maize)___Cercospora_leaf_spot Gray_leaf_spot',
     'Corn_(maize)___Northern_Leaf_Blight',
     21],
    ['Corn_(maize)___Northern_Leaf_Blight',
     'Corn_(maize)___Cercospora_leaf_spot Gray_leaf_spot',
     

In [2]:
# JSON DATA STRUCTURE
print("=" * 80)
print("JSON DATA STRUCTURE")
print("=" * 80)

# 1. Top level: Models
print("\n1. LIST OF MODELS:")
print(f"   Total models: {len(data)}")
for i, model_name in enumerate(data.keys(), 1):
    print(f"   {i}. {model_name}")

# 2. Structure of ONE model (using LeNet as example)
print("\n" + "=" * 80)
print("2. STRUCTURE OF ONE MODEL (Example: LeNet)")
print("=" * 80)

model_example = data['LeNet']
print("\n   Main fields (keys):")
for key in model_example.keys():
    print(f"   - {key}")

# 3. Field details
print("\n" + "=" * 80)
print("3. FIELD DETAILS")
print("=" * 80)

print("\n   a) Basic information:")
print(f"      - model_name: {model_example['model_name']}")
print(f"      - parameters: {model_example['parameters']:,}")
print(f"      - best_accuracy: {model_example['best_accuracy']:.2f}%")
print(f"      - final_accuracy: {model_example['final_accuracy']:.2f}%")
print(f"      - training_time_minutes: {model_example['training_time_minutes']:.2f}")

print("\n   b) error_metrics (dict):")
error_metrics = model_example['error_metrics']
print(f"      Keys in error_metrics:")
for key in error_metrics.keys():
    print(f"      - {key}")

print("\n   c) confusion_matrix:")
cm = model_example['error_metrics']['confusion_matrix']
print(f"      - Data type: {type(cm)}")
print(f"      - Dimension: {len(cm)} x {len(cm[0])} (square matrix)")
print(f"      - Example first row (first 5 elements): {cm[0][:5]}")

print("\n   d) worst_classes (list of lists):")
worst = model_example['error_metrics']['worst_classes']
print(f"      - Count: {len(worst)} classes")
print(f"      - Structure: [class_name, error_rate]")
print(f"      - Examples:")
for i, (class_name, error_rate) in enumerate(worst[:3], 1):
    print(f"        {i}. {class_name}: {error_rate:.2f}%")

print("\n   e) most_confused_pairs (list of lists):")
confused = model_example['error_metrics']['most_confused_pairs']
print(f"      - Count: {len(confused)} pairs")
print(f"      - Structure: [true_class, predicted_class, count]")
print(f"      - Examples:")
for i, (true_cls, pred_cls, count) in enumerate(confused[:3], 1):
    print(f"        {i}. {true_cls} → {pred_cls}: {count} times")

print("\n   f) topk_accuracy (dict):")
topk = model_example['topk_accuracy']
for key, value in topk.items():
    print(f"      - {key}: {value:.2f}%")

print("\n   g) File paths:")
print(f"      - confusion_matrix_csv: {model_example['confusion_matrix_csv']}")
print(f"      - classification_stats_csv: {model_example['classification_stats_csv']}")
print(f"      - confusion_matrix_plot: {model_example['confusion_matrix_plot']}")

# 4. Tree structure diagram
print("\n" + "=" * 80)
print("4. TREE STRUCTURE DIAGRAM")
print("=" * 80)

print("""
data (dict)
│
├── LeNet (dict)
│   ├── model_name (str)
│   ├── parameters (int)
│   ├── best_accuracy (float)
│   ├── final_accuracy (float)
│   ├── training_time_minutes (float)
│   ├── error_metrics (dict)
│   │   ├── accuracy (float)
│   │   ├── error_rate (float)
│   │   ├── total_errors (int)
│   │   ├── total_samples (int)
│   │   ├── avg_confidence_correct (float)
│   │   ├── avg_confidence_incorrect (float)
│   │   ├── confidence_gap (float)
│   │   ├── high_confidence_errors (int)
│   │   ├── worst_classes (list of [str, float])
│   │   ├── most_confused_pairs (list of [str, str, int])
│   │   └── confusion_matrix (list of lists - 38x38)
│   ├── confusion_matrix_csv (str)
│   ├── classification_stats_csv (str)
│   ├── confusion_matrix_plot (str)
│   └── topk_accuracy (dict)
│       ├── top_1_acc (float)
│       ├── top_3_acc (float)
│       └── top_5_acc (float)
│
├── SimpleCNN (dict) - similar structure
├── AlexNet (dict) - similar structure
├── ResNet18 (dict) - similar structure
...
└── InceptionV3 (dict) - similar structure
""")

# 5. Summary statistics
print("\n" + "=" * 80)
print("5. SUMMARY STATISTICS")
print("=" * 80)

summary_data = []
for model_name, model_info in data.items():
    summary_data.append({
        'Model': model_name,
        'Parameters': model_info['parameters'],
        'Accuracy': model_info['final_accuracy'],
        'Errors': model_info['error_metrics']['total_errors'],
        'Training_Time': model_info['training_time_minutes']
    })

df_summary = pd.DataFrame(summary_data).sort_values('Accuracy', ascending=False)
print("\n", df_summary.to_string(index=False))

print("\n" + "=" * 80)

JSON DATA STRUCTURE

1. LIST OF MODELS:
   Total models: 12
   1. LeNet
   2. SimpleCNN
   3. AlexNet
   4. ResNet18
   5. ResNet34
   6. ResNet50
   7. MobileNetV2
   8. MobileNetV3Small
   9. ShuffleNetV2
   10. EfficientNetB0
   11. DenseNet121
   12. InceptionV3

2. STRUCTURE OF ONE MODEL (Example: LeNet)

   Main fields (keys):
   - model_name
   - parameters
   - best_accuracy
   - final_accuracy
   - training_time_minutes
   - error_metrics
   - confusion_matrix_csv
   - classification_stats_csv
   - confusion_matrix_plot
   - topk_accuracy

3. FIELD DETAILS

   a) Basic information:
      - model_name: LeNet
      - parameters: 9,594,494
      - best_accuracy: 97.28%
      - final_accuracy: 97.25%
      - training_time_minutes: 57.51

   b) error_metrics (dict):
      Keys in error_metrics:
      - accuracy
      - error_rate
      - total_errors
      - total_samples
      - avg_confidence_correct
      - avg_confidence_incorrect
      - confidence_gap
      - high_confidence_

In [3]:
# create summary basic model imformation

# Create comprehensive dataframe with all basic model information
models_info = []

for model_name, model_data in data.items():
    model_dict = {
        # Basic model info
        'Model': model_name,
        'Parameters': model_data['parameters'],
        
        # Accuracy metrics
        'Best_Accuracy': model_data['best_accuracy'],
        'Final_Accuracy': model_data['final_accuracy'],
        'Top_1_Accuracy': model_data['topk_accuracy']['top_1_acc'],
        'Top_3_Accuracy': model_data['topk_accuracy']['top_3_acc'],
        'Top_5_Accuracy': model_data['topk_accuracy']['top_5_acc'],
        
        # Training info
        'Training_Time_Minutes': model_data['training_time_minutes'],
        
        # Error metrics
        'Error_Rate': model_data['error_metrics']['error_rate'],
        'Total_Errors': model_data['error_metrics']['total_errors'],
        'Total_Samples': model_data['error_metrics']['total_samples'],
        
        # Confidence metrics
        'Avg_Confidence_Correct': model_data['error_metrics']['avg_confidence_correct'],
        'Avg_Confidence_Incorrect': model_data['error_metrics']['avg_confidence_incorrect'],
        'Confidence_Gap': model_data['error_metrics']['confidence_gap'],
        'High_Confidence_Errors': model_data['error_metrics']['high_confidence_errors'],
        
        # Derived metrics
        'Accuracy_Drop': model_data['best_accuracy'] - model_data['final_accuracy'],
        'Parameters_Million': model_data['parameters'] / 1_000_000,
        'Errors_Per_Minute': model_data['error_metrics']['total_errors'] / model_data['training_time_minutes']
    }
    
    models_info.append(model_dict)

# Create DataFrame
df_models = pd.DataFrame(models_info)

# Display the dataframe
print("=" * 120)
print("COMPREHENSIVE MODEL INFORMATION")
print("=" * 120)
print("\n", df_models.to_string(index=False))

# Save to CSV
output_path = r'F:\up_git\off_plant_di_err_analyze_daok\analyze_data_code_scrip\models_info.csv'
df_models.to_csv(output_path, index=False)
print(f"\n✓ Saved to: {output_path}")

COMPREHENSIVE MODEL INFORMATION

            Model  Parameters  Best_Accuracy  Final_Accuracy  Top_1_Accuracy  Top_3_Accuracy  Top_5_Accuracy  Training_Time_Minutes  Error_Rate  Total_Errors  Total_Samples  Avg_Confidence_Correct  Avg_Confidence_Incorrect  Confidence_Gap  High_Confidence_Errors  Accuracy_Drop  Parameters_Million  Errors_Per_Minute
           LeNet     9594494      97.283860       97.247031       97.247031       99.742197       99.907927              57.512633    2.752969           299          10861                0.987305                  0.751953        0.235352                     132       0.036829            9.594494           5.198858
       SimpleCNN     1739686      99.502808       99.484394       99.484394       99.981585       99.990793              59.792955    0.515606            56          10861                0.996094                  0.777832        0.218262                      27       0.018415            1.739686           0.936565
         AlexNet  

In [6]:
import pandas as pd
import os

BASE_PATH = r'F:\up_git\off_plant_di_err_analyze_daok\results'

# Find file
files = [
    os.path.join(root, f) 
    for root, _, files in os.walk(BASE_PATH) 
    for f in files if f.endswith('confusion_matrix.csv')
]

# read and combine
data_list = []
for path in files:
    df = pd.read_csv(path)
    model_name = os.path.basename(path).replace('_confusion_matrix.csv', '')
    
    # check if column not exist
    if 'model_name' not in df.columns:
        df.insert(0, 'model_name', model_name)
    
    data_list.append(df)

# combine
combined = pd.concat(data_list, ignore_index=True)

output = os.path.join('combined_confusion_matrix_wide.csv')
combined.to_csv(output, index=False, encoding='utf-8')

print(f"combined {len(files)} file → {output}")

combined 12 file → combined_confusion_matrix_wide.csv


In [8]:
import pandas as pd
import os

BASE_PATH = r'F:\up_git\off_plant_di_err_analyze_daok\results'

# find file
files = [
    os.path.join(root, f) 
    for root, _, files in os.walk(BASE_PATH) 
    for f in files if f.endswith('classification_stats.csv')
]
# read and combine
data_list = []
for path in files:
    df = pd.read_csv(path)
    model_name = os.path.basename(path).replace('_classification_stats.csv', '')
    
    if 'model_name' not in df.columns:
        df.insert(0, 'model_name', model_name)
    
    data_list.append(df)
    print(f"  {model_name}: {len(df)} classes")

# wide combine
combined = pd.concat(data_list, ignore_index=True)

# Lưu
output = os.path.join('combined_classification_stats_wide.csv')
combined.to_csv(output, index=False, encoding='utf-8')

print(f"\ncombined → {output}")
print(f"Sum: {len(combined)} lines")

  alexnet: 38 classes
  densenet121: 38 classes
  efficientnetb0: 38 classes
  inceptionv3: 38 classes
  lenet: 38 classes
  mobilenetv2: 38 classes
  mobilenetv3small: 38 classes
  resnet18: 38 classes
  resnet34: 38 classes
  resnet50: 38 classes
  shufflenetv2: 38 classes
  simplecnn: 38 classes

combined → combined_classification_stats_wide.csv
Sum: 456 lines
