# FoxZero: Sevens (排七) RL Agent - Exploration Mode

This notebook runs the **FoxZero** reinforcement learning training pipeline on Google Colab with **Adjustable Exploration Parameters**.

**New Features:**
- **Temperature Control:** Adjust softmax temperature to control randomness.
- **Dirichlet Noise:** Inject noise at the start of games to encourage exploration.
- **Top-K Sampling:** Prune low-probability moves (e.g., Set k=1 for Greedy).
- **Auto-Persistence:** Saves checkpoints to Google Drive.

In [None]:
# 1. Check GPU
!nvidia-smi

In [None]:
# 2. Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Create Checkpoint Directory
import os
DRIVE_DIR = '/content/drive/MyDrive/FoxZero_Checkpoints'
os.makedirs(DRIVE_DIR, exist_ok=True)
print(f"Checkpoints will be saved to: {DRIVE_DIR}")

In [None]:
# 3. Clone Repository
!git clone https://github.com/shihte/FoxZero.git
%cd FoxZero
!git pull

In [None]:
# 4. Install Dependencies & Build C++ Core
!pip install pybind11

# Build the C++ extension in-place
!python setup.py build_ext --inplace

# Verify installation
!python verify_cpp_binding.py

In [None]:
# 5. Run Training with Exploration Parameters
# @title Training Configuration

temperature = 1.5 # @param {type:"slider", min:0.1, max:5.0, step:0.1}
dirichlet_alpha = 0.3 # @param {type:"slider", min:0.0, max:1.0, step:0.05}
top_k = 0 # @param {type:"integer"}

import os
DRIVE_DIR = '/content/drive/MyDrive/FoxZero_Checkpoints'
weights_path = f"{DRIVE_DIR}/foxzero_weights.pth"
log_path = f"{DRIVE_DIR}/train_log.csv"

print(f"Starting Training...")
print(f"Weights: {weights_path}")
print(f"Logs: {log_path}")
print(f"Exploration: Temp={temperature}, Dirichlet={dirichlet_alpha}, Top-k={top_k}")

# Run Training Loop
!PYTHONPATH=. python3 foxzero/train_colab.py \
    --weights_path "{weights_path}" \
    --log_path "{log_path}" \
    --temperature {temperature} \
    --dirichlet {dirichlet_alpha} \
    --top_k {top_k}

In [None]:
# 6. Monitor Progress
import pandas as pd
import matplotlib.pyplot as plt

log_path = f"/content/drive/MyDrive/FoxZero_Checkpoints/train_log.csv"

try:
    # Simple manual refresh
    df = pd.read_csv(log_path)
    print(df.tail())
    
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(df['step'], df['loss'], label='Loss')
    plt.title('Training Loss')
    plt.legend()
    
    plt.subplot(1, 2, 2)
    plt.plot(df['step'], df['buffer_size'], label='Buffer Size', color='orange')
    plt.title('Buffer Growth')
    plt.legend()
    
    plt.show()
except FileNotFoundError:
    print("No log file found yet. Wait for training to proceed.")