In [None]:
# 1. Install Dependencies
!pip install ultralytics mplfinance pandas opencv-python matplotlib seaborn -q

import os
import sys
import shutil
from pathlib import Path
import torch
import yaml
from ultralytics import YOLO
import matplotlib.pyplot as plt
import cv2

# Ensure project root is in path for imports
current_dir = Path.cwd()
if str(current_dir) not in sys.path:
    sys.path.append(str(current_dir))

# Check GPU
print(f"PyTorch Version: {torch.__version__}")
if torch.cuda.is_available():
    print(f"✅ GPU Detected: {torch.cuda.get_device_name(0)}")
else:
    print("⚠️ No GPU detected. Training will be slow!")

# Import project modules
from src.config import (
    RAW_DATA_DIR, DATASET_YAML, PROCESSED_DATA_DIR, 
    WEIGHTS_DIR, MODEL_CONFIG, create_directories
)
from src.data_utils import verify_dataset_structure, create_data_yaml
from src.pattern_detector import PatternDetector

# Initialize Directories
create_directories()
print("Directories initialized.")

In [None]:
print("=== Step 1: Data Preparation ===")

# Verify raw data exists
if verify_dataset_structure():
    print("✅ Raw dataset found.")
else:
    raise FileNotFoundError("Raw data not found in data/raw/. Please upload data.")

# Create the YOLO configuration file (data.yaml)
yaml_path = create_data_yaml()
print(f"✅ Configuration generated at: {yaml_path}")

# Verify the file content
with open(yaml_path, 'r') as f:
    print("\nData YAML Content:")
    print(f.read())

In [None]:
print("=== Step 2: Model Training ===")

# Load a pre-trained YOLOv8 Nano model (efficient for candlesticks)
model = YOLO('yolov8n.pt') 

# Start Training
results = model.train(
    data=str(DATASET_YAML),
    epochs=30,                  # Adjustable: 30-50 is usually sufficient
    batch=16,                   # Adjust based on GPU VRAM
    imgsz=640,
    patience=10,                # Early stopping
    project='models',           # Save to models/ folder
    name='candlestick_run',
    exist_ok=True,              # Overwrite existing run
    verbose=True
)

print("✅ Training Complete.")

In [None]:
print("=== Step 3: Evaluation & Artifacts ===")

# Path to the best weights from the run we just finished
run_weights = Path('models/candlestick_run/weights/best.pt')
final_weights = WEIGHTS_DIR / 'best.pt'

# Copy weights to the standard 'production' path
if run_weights.exists():
    shutil.copy(run_weights, final_weights)
    print(f"✅ Best weights saved to: {final_weights}")
else:
    print("⚠️ Weights file not found. Check training logs.")

# Validation Run
print("\nRunning Validation on Test Set...")
val_results = model.val(data=str(DATASET_YAML), split='val')
print(f"mAP@50: {val_results.box.map50:.4f}")

In [None]:
print("=== Step 4: Inference Pipeline ===")

# Initialize detector with our new weights
detector = PatternDetector(model_path=final_weights)

# Pick a random sample image from validation set
valid_images = list((RAW_DATA_DIR / "valid" / "images").glob("*.jpg"))
if valid_images:
    sample_img = valid_images[0]
    
    # Run detection
    detections, annotated_img = detector.detect(str(sample_img))
    
    # Show Results
    plt.figure(figsize=(10, 10))
    plt.imshow(annotated_img)
    plt.axis('off')
    plt.title(f"Detections: {len(detections)}")
    plt.show()
    
    detector.print_detections(detections)
else:
    print("No validation images found to test.")