# DDP_KBIT Jupyter Notebook Interface

This notebook provides a simple interface to run the DDP_KBIT distributed deep learning system without using command line arguments. It wraps the existing `main.py` functionality for easy experimentation.

## Setup and Imports

## ÏÑ∏ÏÖò Ï¥àÍ∏∞Ìôî (Îß§Î≤à Ïã§Ìñâ ÌïÑÏöî)

ÏïÑÎûò ÏÖÄÏùÑ Îß§ ÏÑ∏ÏÖòÎßàÎã§ Í∞ÄÏû• Î®ºÏ†Ä Ïã§ÌñâÌïòÏó¨ Î°úÏª¨ Î™®ÎìàÏóê Ïó∞Í≤∞ÌïòÏÑ∏Ïöî.

In [1]:
import os
import sys
import json
import logging
from typing import Dict, Any, Optional

# Ï£ºÌîºÌÑ∞ ÏÑúÎ≤Ñ ÎÇ¥Ïóê ÏûàÎäî DDP_KBIT Î™®Îìà Í≤ΩÎ°ú ÏÑ§Ï†ï
ddp_kbit_path = r"/mnt/data/DDP_KBIT"
sys.path.append(ddp_kbit_path)

# Import the main functions from main.py
try:
    from main import (
        setup_logging, 
        load_external_config,
        run_training_mode,
        run_experiment_mode, 
        create_sample_config
    )
    print("‚úì Successfully imported DDP_KBIT modules")
except ImportError as e:
    print(f"‚ùå Error importing DDP_KBIT modules: {e}")
    print("Available modules in current path:")
    try:
        import glob
        py_files = glob.glob("*.py")
        if py_files:
            print(f"  Python files found: {py_files}")
        else:
            print("  No Python files found in current directory")
    except:
        pass
    print("\nTrying alternative import methods...")
    
    # ÎåÄÏïà: execÎ•º ÏÇ¨Ïö©Ìïú ÎèôÏ†Å ÏûÑÌè¨Ìä∏
    try:
        if os.path.exists(os.path.join(local_module_path, "main.py")):
            exec(open(os.path.join(local_module_path, "main.py")).read(), globals())
            print("‚úì Successfully loaded main.py using exec method")
        else:
            print("‚ùå main.py not found in local path")
    except Exception as exec_error:
        print(f"‚ùå Exec method failed: {exec_error}")
        print("Please ensure you're running from the correct directory and all dependencies are installed.")

  from torch.distributed.optim import ZeroRedundancyOptimizer


Error importing DDP_KBIT modules: cannot import name 'get_kafka_config' from 'config.data_config' (/mnt/data/DDP_KBIT/config/data_config.py)
Please ensure you're running from the correct directory and all dependencies are installed.


AttributeError: 'tuple' object has no attribute 'tb_frame'

## Configuration Setup

In [None]:
# Setup logging
setup_logging("INFO")

# Create a mock args object to simulate command line arguments
class NotebookArgs:
    def __init__(self):
        self.config_path = "sample_config.json"
        self.distributed = False
        self.experiment_type = "single"
        self.iterations = 3
        self.log_level = "INFO"

# Initialize default arguments
args = NotebookArgs()

print("‚úì Configuration setup complete")
print(f"Config path: {args.config_path}")
print(f"Distributed: {args.distributed}")
print(f"Iterations: {args.iterations}")

## Create Sample Configuration (Run this first)

In [None]:
# Create a sample configuration file
create_sample_config()
print("‚úì Sample configuration created!")

# Display the configuration
if os.path.exists("sample_config.json"):
    with open("sample_config.json", 'r') as f:
        config = json.load(f)
    print("\nCurrent configuration:")
    print(json.dumps(config, indent=2))

## Training Mode

Run single node or distributed training.

In [None]:
# Single node training
print("üöÄ Starting single node training...")
args.distributed = False

try:
    run_training_mode(args)
    print("‚úÖ Training completed successfully!")
except Exception as e:
    print(f"‚ùå Training failed: {e}")

In [None]:
# Distributed training (uncomment to run)
# print("üöÄ Starting distributed training...")
# args.distributed = True

# try:
#     run_training_mode(args)
#     print("‚úÖ Distributed training completed successfully!")
# except Exception as e:
#     print(f"‚ùå Distributed training failed: {e}")

## Experiment Mode

Run single experiments or multiple iterations with statistical analysis.

In [None]:
# Single experiment
print("üß™ Running single experiment...")
args.experiment_type = "single"

try:
    run_experiment_mode(args)
    print("‚úÖ Single experiment completed successfully!")
except Exception as e:
    print(f"‚ùå Single experiment failed: {e}")

In [None]:
# Multiple experiments with statistical analysis
print("üß™ Running multiple experiments...")
args.experiment_type = "multiple"
args.iterations = 5  # You can change this number

try:
    run_experiment_mode(args)
    print(f"‚úÖ {args.iterations} experiments completed successfully!")
except Exception as e:
    print(f"‚ùå Multiple experiments failed: {e}")

## Custom Configuration

Modify configuration parameters for your specific needs.

In [None]:
# Customize configuration
custom_config = {
    "spark_config": {
        "master": "local[*]",
        "app_name": "DDP_KBIT_Custom",
        "executor_instances": 4,
        "executor_cores": 2,
        "executor_memory": "8g"
    },
    "training_config": {
        "epochs": 10,
        "batch_size": 128,
        "learning_rate": 0.0001
    },
    "data_config": {
        "kafka_servers": ["localhost:9092"],
        "topic": "custom_topic",
        "batch_size": 64
    }
}

# Save custom configuration
custom_config_path = "custom_config.json"
with open(custom_config_path, "w") as f:
    json.dump(custom_config, f, indent=2)

# Update args to use custom config
args.config_path = custom_config_path

print(f"‚úì Custom configuration saved to: {custom_config_path}")
print("\nCustom configuration:")
print(json.dumps(custom_config, indent=2))

## Utility Functions

Helper functions for notebook usage.

In [None]:
def quick_train(distributed=False, config_path="sample_config.json"):
    """Quick training function for easy execution."""
    args.distributed = distributed
    args.config_path = config_path
    
    print(f"üöÄ Quick training - Distributed: {distributed}")
    try:
        run_training_mode(args)
        print("‚úÖ Training completed!")
    except Exception as e:
        print(f"‚ùå Training failed: {e}")

def quick_experiment(experiment_type="single", iterations=3):
    """Quick experiment function for easy execution."""
    args.experiment_type = experiment_type
    args.iterations = iterations
    
    print(f"üß™ Quick experiment - Type: {experiment_type}, Iterations: {iterations}")
    try:
        run_experiment_mode(args)
        print("‚úÖ Experiment completed!")
    except Exception as e:
        print(f"‚ùå Experiment failed: {e}")

print("‚úì Utility functions loaded!")
print("\nUse these functions for quick execution:")
print("- quick_train(distributed=False)")
print("- quick_experiment(experiment_type='multiple', iterations=5)")

## Quick Execution Examples

Use the utility functions for quick execution.

In [None]:
# Example: Quick single training
# quick_train()

# Example: Quick multiple experiments
# quick_experiment(experiment_type="multiple", iterations=3)

print("üí° Uncomment the lines above to run quick examples!")

In [None]:
import os
print("Current working directory:", os.getcwd())
print("Files in current directory:")
for f in os.listdir('.'):
    print(f"  {f}")