# Flow SDK Getting Started Notebook

Interactive introduction to GPU computing with Flow SDK.

## Prerequisites
- Flow SDK installed: `pip install flow-compute`
- API key from https://app.mithril.ai/account/apikeys

## 1. Setup and Authentication

In [None]:
# Install Flow SDK if needed
!pip install flow-sdk --quiet

# Import required libraries
import json
from datetime import datetime

import flow
from flow import TaskConfig

In [None]:
# Set up authentication
# Option 1: Set API key directly (not recommended for shared notebooks)
# os.environ['FLOW_API_KEY'] = 'your-api-key'

# Option 2: Load from secure location
# with open(os.path.expanduser('~/.flow/credentials'), 'r') as f:
#     os.environ['FLOW_API_KEY'] = f.read().strip()

# Initialize Flow client
flow_client = flow.Flow()
print("✅ Flow SDK initialized successfully!")

## 2. GPU Validation

Let's verify GPU access and explore available instances.

In [None]:
# Quick GPU test
validation_config = TaskConfig(
    name="notebook-gpu-test",
    command="""
    nvidia-smi -L && echo "---" &&
    python -c "
import torch
print(f'PyTorch version: {torch.__version__}')
print(f'CUDA available: {torch.cuda.is_available()}')
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name(0)}')
    print(f'Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB')
"
    """,
    instance_type="h100-80gb.sxm.8x",
    region="us-central1-b",
    max_run_time_seconds=60,
    max_price_per_hour=10.00,
)

print("🚀 Running GPU validation...")
task = flow_client.run(validation_config, wait=True)

# Display results
print("\n📋 GPU Information:")
print(task.logs())
print(f"\n💰 Test cost: ${task.total_cost:.4f}")

In [None]:
# Available Mithril instance types
instance_types = [
    {"name": "a100", "vram": "80GB", "gpus": 1, "use_case": "Training, fine-tuning, inference"},
    {
        "name": "2xa100",
        "vram": "160GB",
        "gpus": 2,
        "use_case": "Larger models, distributed training",
    },
    {"name": "4xa100", "vram": "320GB", "gpus": 4, "use_case": "Large-scale distributed training"},
    {"name": "8xa100", "vram": "640GB", "gpus": 8, "use_case": "Massive models and workloads"},
    {"name": "h100", "vram": "640GB", "gpus": 8, "use_case": "Cutting-edge performance (8× H100)"},
]

# Display as formatted table
import pandas as pd

df = pd.DataFrame(instance_types)
print("💡 Note: Mithril uses dynamic auction-based pricing.")
print("   Use 'flow instances' to see current spot prices.\n")
df

## 3. Interactive GPU Selection

Choose the right GPU for your workload.

In [None]:
# Interactive instance selector
from ipywidgets import Checkbox, Dropdown, FloatSlider, interact


def recommend_instance(workload_type, model_size_gb, max_budget):
    """Recommend best instance for workload"""

    # Calculate memory needs
    memory_multiplier = {"inference": 1.2, "training": 3.0, "fine-tuning": 2.5}

    memory_needed = model_size_gb * memory_multiplier[workload_type]

    # Filter suitable instances
    suitable = []
    for inst in instance_types:
        vram = float(inst["vram"].replace("GB", ""))

        if vram >= memory_needed:
            suitable.append({**inst, "memory_headroom": vram - memory_needed})

    if suitable:
        # Sort by least waste (smallest instance that fits)
        suitable.sort(key=lambda x: x["memory_headroom"])
        best = suitable[0]
        print(f"💡 Recommended: {best['name']}")
        print(f"   VRAM: {best['vram']} ({best['gpus']} GPU{'s' if best['gpus'] > 1 else ''})")
        print(f"   Memory headroom: {best['memory_headroom']:.1f}GB")
        print(f"   Use case: {best['use_case']}")
        print("\n📌 Set your max price with --max-price to control costs")
    else:
        print("❌ No suitable instances found. Try:")
        print("   - Using model quantization (8-bit or 4-bit)")
        print("   - Gradient checkpointing for training")
        print("   - Smaller batch sizes")


interact(
    recommend_instance,
    workload_type=Dropdown(options=["inference", "training", "fine-tuning"], value="inference"),
    model_size_gb=FloatSlider(min=1, max=200, step=1, value=7, description="Model Size (GB)"),
    max_budget=FloatSlider(min=10, max=100, step=5, value=30, description="Max $/hour"),
)

## 4. Running Your First GPU Task

Let's run a simple matrix multiplication benchmark.

In [None]:
# Matrix multiplication benchmark
benchmark_script = """
import torch
import time

# GPU warmup
device = torch.device("cuda")
_ = torch.randn(100, 100).to(device) @ torch.randn(100, 100).to(device)
torch.cuda.synchronize()

# Benchmark different sizes
sizes = [1000, 2000, 4000, 8000]
results = []

for size in sizes:
    A = torch.randn(size, size).to(device)
    B = torch.randn(size, size).to(device)
    
    # Time the operation
    torch.cuda.synchronize()
    start = time.time()
    
    C = A @ B
    
    torch.cuda.synchronize()
    elapsed = time.time() - start
    
    # Calculate TFLOPS
    flops = 2 * size**3  # Matrix multiplication FLOPs
    tflops = flops / elapsed / 1e12
    
    results.append({
        "size": size,
        "time": elapsed,
        "tflops": tflops
    })
    
    print(f"Size {size}x{size}: {elapsed:.3f}s ({tflops:.1f} TFLOPS)")

# Save results
import json
with open("/tmp/benchmark_results.json", "w") as f:
    json.dump(results, f)
"""

# Run benchmark
benchmark_config = TaskConfig(
    name="gpu-benchmark",
    command=f"python -c '{benchmark_script}'",
    instance_type="h100-80gb.sxm.8x",
    region="us-central1-b",
    max_run_time_minutes=5,
    max_price_per_hour=20.00,
    output_artifacts=["/tmp/benchmark_results.json"],
)

print("🏃 Running GPU benchmark...")
benchmark_task = flow_client.run(benchmark_config, wait=True)

# Display results
print("\n📊 Benchmark Results:")
print(benchmark_task.logs())
print(f"\n💰 Benchmark cost: ${benchmark_task.total_cost:.4f}")

In [None]:
# Download and visualize results
if benchmark_task.status == "completed":
    # Download results file
    results_path = benchmark_task.download_artifact(
        "/tmp/benchmark_results.json", "./benchmark_results.json"
    )

    # Load and visualize
    with open(results_path) as f:
        results = json.load(f)

    import matplotlib.pyplot as plt

    sizes = [r["size"] for r in results]
    tflops = [r["tflops"] for r in results]

    plt.figure(figsize=(10, 6))
    plt.bar(sizes, tflops, color="#2E86AB")
    plt.xlabel("Matrix Size")
    plt.ylabel("Performance (TFLOPS)")
    plt.title("GPU Matrix Multiplication Performance")
    plt.grid(axis="y", alpha=0.3)

    # Add value labels on bars
    for i, (s, t) in enumerate(zip(sizes, tflops, strict=False)):
        plt.text(s, t + 0.5, f"{t:.1f}", ha="center")

    plt.show()

## 5. Cost Estimation Tool

Estimate costs for your workloads before running them.

In [None]:
# Interactive cost calculator
def calculate_costs(instance_type, hours_per_day, days, use_spot):
    """Calculate and display costs"""

    # Find instance pricing
    instance = next((i for i in instance_types if i["name"] == instance_type), None)
    if not instance:
        print("Instance type not found")
        return

    hourly_rate = instance["spot_price"] if use_spot else instance["price"]

    # Calculate costs
    daily_cost = hourly_rate * hours_per_day
    total_cost = daily_cost * days
    monthly_cost = hourly_rate * 24 * 30  # If running 24/7

    # Display results
    print(f"💻 Instance: {instance_type} ({instance['vram']} VRAM)")
    print(f"💵 Hourly rate: ${hourly_rate:.2f}")
    print(f"📅 Daily cost: ${daily_cost:.2f} ({hours_per_day}h/day)")
    print(f"📊 Total cost: ${total_cost:.2f} ({days} days)")
    print(f"📆 Monthly 24/7: ${monthly_cost:.2f}")

    if use_spot:
        savings = (instance["price"] - hourly_rate) * hours_per_day * days
        print(f"\n✨ Spot savings: ${savings:.2f} (70% off)")

    # ROI calculation
    print("\n📈 ROI Considerations:")
    print(f"   - Inference: ~${hourly_rate / 100:.4f} per 1K requests")
    print(f"   - Training: ~${total_cost / 10:.2f} per epoch (estimated)")


# Create interactive widget
from ipywidgets import interact

interact(
    calculate_costs,
    instance_type=Dropdown(
        options=[i["name"] for i in instance_types], value="l40s", description="Instance:"
    ),
    hours_per_day=FloatSlider(min=1, max=24, step=1, value=8, description="Hours/day:"),
    days=FloatSlider(min=1, max=30, step=1, value=7, description="Days:"),
    use_spot=Checkbox(value=True, description="Use spot instances"),
)

## 6. Task Management

Monitor and manage your running tasks.

In [None]:
# List recent tasks
def list_my_tasks(limit=5):
    """List recent tasks with details"""
    tasks = flow_client.list_tasks(limit=limit)

    task_data = []
    for task in tasks:
        task_data.append(
            {
                "ID": task.task_id[:8],
                "Name": task.name,
                "Status": task.status,
                "Instance": task.instance_type,
                "Runtime": f"{task.runtime_seconds / 60:.1f}m" if task.runtime_seconds else "N/A",
                "Cost": f"${task.total_cost:.3f}" if task.total_cost else "N/A",
            }
        )

    df = pd.DataFrame(task_data)
    return df


print("📋 Recent Tasks:")
list_my_tasks()

In [None]:
# Task monitoring function
def monitor_task(task_id):
    """Monitor a running task"""
    import time

    from IPython.display import clear_output

    while True:
        task = flow_client.get_task(task_id)

        clear_output(wait=True)
        print(f"🔄 Task: {task.name} ({task.task_id[:8]})")
        print(f"📊 Status: {task.status}")
        print(f"⏱️  Runtime: {task.runtime_seconds / 60:.1f} minutes")
        print(f"💰 Cost: ${task.total_cost:.4f}")

        if task.status in ["completed", "failed", "cancelled"]:
            print(f"\n✅ Task {task.status}!")
            break

        time.sleep(5)


# Example: monitor the last task
# monitor_task(benchmark_task.task_id)

## 7. Best Practices Summary

Key recommendations for using Flow SDK effectively.

In [None]:
# Best practices checklist
best_practices = [
    {
        "category": "Cost Management",
        "practices": [
            "Always set max_price_per_hour",
            "Use spot instances for fault-tolerant workloads",
            "Set max_total_cost for budget control",
            "Monitor costs with flow_client.get_spending()",
        ],
    },
    {
        "category": "Performance",
        "practices": [
            "Choose instance types based on VRAM needs",
            "Use mixed precision (fp16) for 2x speedup",
            "Enable gradient checkpointing for memory savings",
            "Batch operations for better GPU utilization",
        ],
    },
    {
        "category": "Reliability",
        "practices": [
            "Implement checkpointing for long runs",
            "Use retry policies for spot instances",
            "Save outputs as artifacts",
            "Set appropriate timeouts",
        ],
    },
    {
        "category": "Development",
        "practices": [
            "Test on smaller instances first",
            "Use interactive notebooks for experimentation",
            "Version control your TaskConfigs",
            "Log metrics for analysis",
        ],
    },
]

for bp in best_practices:
    print(f"\n📌 {bp['category']}:")
    for practice in bp["practices"]:
        print(f"   ✓ {practice}")

## 8. Next Steps

Ready to do more? Check out these resources:

1. **[Inference Notebook](./inference.ipynb)** - Deploy model servers
2. **[Training Notebook](./training.ipynb)** - Train models from scratch
3. **[Fine-tuning Notebook](./fine-tuning.ipynb)** - Adapt pre-trained models
4. **[Flow SDK Docs](https://docs.flow.ai)** - Complete documentation

### Quick Links
- [GPU Instance Types](../index.md#gpu-instances)
- [Cost Estimates](../_shared/cost-estimates.md)
- [API Reference](https://docs.flow.ai/api)

In [None]:
# Save your session info
session_info = {
    "timestamp": datetime.now().isoformat(),
    "flow_version": flow.__version__,
    "total_tasks_run": len(list_my_tasks(100)),
    "estimated_total_cost": sum(
        [t.get("total_cost", 0) for t in flow_client.list_tasks(limit=100)]
    ),
}

print("📊 Session Summary:")
print(json.dumps(session_info, indent=2))

# Save for future reference
with open("flow_session.json", "w") as f:
    json.dump(session_info, f, indent=2)

print("\n✅ Session info saved to flow_session.json")