# ML Model Training for Exercise Form Analysis

Bu notebook, toplanan egzersiz verileriyle ML modelini eƒüitmek i√ßin kullanƒ±lƒ±r.

## üöÄ GPU/CPU Desteƒüi

- **CPU Mode:** scikit-learn (hƒ±zlƒ±, Random Forest paralel √ßalƒ±≈üƒ±r)
- **GPU Mode:** RAPIDS cuML (GPU-accelerated sklearn, √ßok daha hƒ±zlƒ±)

**Google Colab Runtime Se√ßimi:**
- Runtime > Change runtime type > Hardware accelerator > **GPU** (GPU i√ßin)
- Runtime > Change runtime type > Hardware accelerator > **CPU** (CPU i√ßin - varsayƒ±lan)

## Kullanƒ±m:
1. **Runtime se√ßin:** GPU veya CPU (yukarƒ±daki men√ºden)
2. Dosyalarƒ± Google Drive'a y√ºkleyin veya Colab'a doƒürudan y√ºkleyin
3. Gerekli dosyalarƒ± y√ºkleyin:
   - `dataset_collector.py`
   - `ml_trainer.py`
   - `imu_feature_extractor.py` (IMU modeli i√ßin)
   - `MLTRAINCAMERA/{exercise}/` klas√∂rleri
4. A≈üaƒüƒ±daki h√ºcreleri sƒ±rayla √ßalƒ±≈ütƒ±rƒ±n


In [None]:
# Hardware kontrol√º ve se√ßim
import os

# Colab runtime tipini kontrol et
try:
    # GPU kontrol√º (CUDA)
    import subprocess
    result = subprocess.run(['nvidia-smi'], capture_output=True, text=True)
    if result.returncode == 0:
        GPU_AVAILABLE = True
        print("‚úÖ GPU detected (NVIDIA)")
        print(result.stdout.split('\n')[9])  # GPU info line
    else:
        GPU_AVAILABLE = False
        print("‚ÑπÔ∏è  No GPU detected - CPU mode")
except:
    GPU_AVAILABLE = False
    print("‚ÑπÔ∏è  No GPU detected - CPU mode")

# Runtime se√ßimi
USE_GPU = GPU_AVAILABLE  # GPU varsa kullan, yoksa CPU
print(f"\nüîß Training mode: {'GPU (RAPIDS cuML)' if USE_GPU else 'CPU (scikit-learn)'}")

if not GPU_AVAILABLE and USE_GPU:
    print("‚ö†Ô∏è  GPU not available. Switching to CPU mode.")
    print("   To use GPU: Runtime > Change runtime type > Hardware accelerator > GPU")
    USE_GPU = False


In [None]:
# Gerekli k√ºt√ºphaneleri y√ºkle
if USE_GPU:
    # GPU mode: RAPIDS cuML (GPU-accelerated sklearn)
    print("üì¶ Installing GPU-accelerated libraries (RAPIDS cuML)...")
    %pip install cuml-cu11 cudf-cu11 --extra-index-url=https://pypi.nvidia.com -q
    print("‚úÖ RAPIDS cuML installed (GPU mode)")
else:
    # CPU mode: Standard scikit-learn
    print("üì¶ Installing CPU libraries...")
    %pip install scikit-learn numpy scipy pandas joblib -q
    print("‚úÖ scikit-learn installed (CPU mode)")

# Her zaman gerekli olanlar
%pip install numpy pandas joblib -q
print("‚úÖ Base libraries installed")


In [None]:
# Dosyalarƒ± y√ºkle (Google Drive'dan veya doƒürudan)
# Se√ßenek 1: Google Drive baƒüla
# from google.colab import drive
# drive.mount('/content/drive')
# %cd /content/drive/MyDrive/path/to/project/CameraV2

# Se√ßenek 2: Dosyalarƒ± doƒürudan y√ºkle
# A≈üaƒüƒ±daki dosyalarƒ± y√ºkleyin:
# - dataset_collector.py
# - ml_trainer.py
# - imu_feature_extractor.py (opsiyonel)
# - MLTRAINCAMERA/ klas√∂r√º

print("üìÅ Upload files using the file browser on the left")
print("   Or mount Google Drive and navigate to your project folder")


In [None]:
# Model eƒüitimi - GPU/CPU desteƒüi ile
import sys
sys.path.insert(0, '.')

from pathlib import Path
from dataset_collector import DatasetCollector
from ml_trainer import FormScorePredictor, BaselineCalculator

# GPU mode i√ßin cuML import
if USE_GPU:
    try:
        from cuml.ensemble import RandomForestRegressor as cuRF
        from cuml.model_selection import train_test_split as cu_train_test_split
        from cuml.metrics import mean_squared_error as cu_mse, mean_absolute_error as cu_mae, r2_score as cu_r2
        import cudf
        import cupy as cp
        GPU_MODE = True
        print("‚úÖ RAPIDS cuML loaded (GPU mode)")
    except ImportError as e:
        print(f"‚ö†Ô∏è  RAPIDS cuML import failed: {e}")
        print("   Falling back to CPU mode (scikit-learn)")
        GPU_MODE = False
        USE_GPU = False
else:
    GPU_MODE = False
    print("‚ÑπÔ∏è  Using CPU mode (scikit-learn)")

# Eƒüitilecek egzersiz
EXERCISE = 'bicep_curls'  # Deƒüi≈ütirin: bicep_curls, squats, lunges, vb.

print(f"\nüìπ Training Camera Model for {EXERCISE}...")
print(f"   Mode: {'GPU (RAPIDS cuML)' if GPU_MODE else 'CPU (scikit-learn)'}")


In [None]:
# Veri y√ºkleme
collector = DatasetCollector("MLTRAINCAMERA")
samples = collector.load_dataset(exercise=EXERCISE)

print(f"   Found {len(samples)} samples for {EXERCISE}")

if len(samples) == 0:
    print(f"‚ùå No samples found for exercise: {EXERCISE}")
    print(f"   MLTRAINCAMERA/{EXERCISE}/ klas√∂r√ºn√º kontrol edin.")
else:
    print(f"‚úÖ {len(samples)} samples loaded")


In [None]:
# Auto-labeling (eƒüer etiketlenmemi≈üse)
labeled_samples = [s for s in samples if s.expert_score is not None or s.is_perfect_form is not None]

if len(labeled_samples) == 0:
    print("   Auto-labeling samples based on regional scores...")
    for sample in samples:
        if sample.regional_scores:
            avg_score = sum(sample.regional_scores.values()) / len(sample.regional_scores)
            sample.expert_score = avg_score
            sample.is_perfect_form = (avg_score >= 90)
    labeled_samples = samples
    print(f"‚úÖ {len(labeled_samples)} samples labeled")
else:
    print(f"‚úÖ {len(labeled_samples)} samples already labeled")


In [None]:
# Feature extraction
print(f"   Extracting features for {len(labeled_samples)} samples...")
for i, sample in enumerate(labeled_samples):
    if sample.features is None:
        collector.extract_features(sample)
    if (i + 1) % 10 == 0:
        print(f"      Processed {i + 1}/{len(labeled_samples)} samples...")

print(f"‚úÖ Features extracted")


In [None]:
# Model eƒüitimi (GPU veya CPU)
if len(labeled_samples) < 10:
    print(f"‚ùå Not enough labeled samples (need >=10, got {len(labeled_samples)})")
else:
    print(f"   Training model with {len(labeled_samples)} samples...")
    print(f"   Using: {'GPU (RAPIDS cuML)' if GPU_MODE else 'CPU (scikit-learn)'}")
    
    if GPU_MODE:
        # GPU mode: cuML kullan
        print("\nüöÄ GPU Training Mode (RAPIDS cuML)")
        
        # Veriyi hazƒ±rla
        import numpy as np
        X_list = []
        y_list = []
        
        for sample in labeled_samples:
            if sample.features:
                X_list.append(list(sample.features.values()))
                y_list.append(sample.expert_score if sample.expert_score else 0)
        
        X = np.array(X_list)
        y = np.array(y_list)
        
        # cuDF DataFrame'e d√∂n√º≈üt√ºr
        X_df = cudf.DataFrame(X)
        y_df = cudf.Series(y)
        
        # Train/test split
        X_train_df, X_test_df, y_train_df, y_test_df = cu_train_test_split(
            X_df, y_df, test_size=0.2, random_state=42
        )
        
        # Model eƒüit
        print("   Training Random Forest on GPU...")
        model = cuRF(n_estimators=100, max_depth=10, random_state=42)
        model.fit(X_train_df, y_train_df)
        
        # Predict
        y_train_pred = model.predict(X_train_df).to_numpy()
        y_test_pred = model.predict(X_test_df).to_numpy()
        y_train = y_train_df.to_numpy()
        y_test = y_test_df.to_numpy()
        
        # Metrics
        train_mse = cu_mse(y_train, y_train_pred)
        test_mse = cu_mse(y_test, y_test_pred)
        train_mae = cu_mae(y_train, y_train_pred)
        test_mae = cu_mae(y_test, y_test_pred)
        train_r2 = cu_r2(y_train, y_train_pred)
        test_r2 = cu_r2(y_test, y_test_pred)
        
        results = {
            'train_r2': float(train_r2),
            'test_r2': float(test_r2),
            'train_mae': float(train_mae),
            'test_mae': float(test_mae),
            'train_mse': float(train_mse),
            'test_mse': float(test_mse)
        }
        
        # Predictor objesi olu≈ütur (save i√ßin)
        predictor = FormScorePredictor(model_type="random_forest")
        predictor.model = model  # cuML model'i kaydet (ama pickle √ßalƒ±≈ümayabilir)
        predictor.is_trained = True
        
        print(f"\n‚úÖ GPU Training completed!")
        
    else:
        # CPU mode: Standart scikit-learn
        print("\nüöÄ CPU Training Mode (scikit-learn)")
        predictor = FormScorePredictor(model_type="random_forest")
        results = predictor.train(labeled_samples, verbose=True, use_imu_features=False)
        print(f"\n‚úÖ CPU Training completed!")
    
    print(f"\nüìä Performance Metrics:")
    print(f"   Train R¬≤: {results.get('train_r2', 'N/A'):.3f}")
    print(f"   Test R¬≤: {results.get('test_r2', 'N/A'):.3f}")
    print(f"   Test MAE: {results.get('test_mae', 'N/A'):.3f}")
    print(f"   Test MSE: {results.get('test_mse', 'N/A'):.3f}")


In [None]:
# Model kaydetme
from datetime import datetime
import json

model_dir = Path("models") / EXERCISE / "form_score_camera_random_forest"
model_dir.mkdir(parents=True, exist_ok=True)

if GPU_MODE:
    # GPU mode: cuML model pickle ile uyumlu olmayabilir
    # Alternatif: Model parametrelerini ve performansƒ± kaydet
    print("‚ö†Ô∏è  GPU mode: cuML models cannot be pickled directly")
    print("   Saving metadata and performance metrics...")
    print("   Note: For production, retrain in CPU mode or use model conversion")
    
    # Metadata kaydet
    metadata = {
        'model_type': 'random_forest',
        'training_mode': 'GPU (RAPIDS cuML)',
        'exercise': EXERCISE,
        'training_date': datetime.now().isoformat(),
        'training_samples': len(labeled_samples),
        'performance': results,
        'note': 'Model trained with cuML. Cannot be pickled. Retrain in CPU mode for deployment.'
    }
    
    metadata_file = model_dir / "metadata.json"
    with open(metadata_file, 'w') as f:
        json.dump(metadata, f, indent=2)
    
    print(f"‚úÖ Metadata saved to {metadata_file}")
    print(f"   Model performance metrics saved")
    print(f"   For deployment, retrain in CPU mode using train_ml_models.py")
    
    # CPU mode'da tekrar eƒüit (deployment i√ßin)
    print("\nüîÑ Training CPU version for deployment...")
    cpu_predictor = FormScorePredictor(model_type="random_forest")
    cpu_results = cpu_predictor.train(labeled_samples, verbose=False, use_imu_features=False)
    
    cpu_predictor.save(
        str(model_dir),
        exercise=EXERCISE,
        training_samples=len(labeled_samples),
        performance_metrics=cpu_results
    )
    
    print(f"‚úÖ CPU model saved to {model_dir} (for deployment)")
    
else:
    # CPU mode: Normal save
    predictor.save(
        str(model_dir),
        exercise=EXERCISE,
        training_samples=len(labeled_samples),
        performance_metrics=results
    )
    print(f"‚úÖ Model saved to {model_dir}")


In [None]:
# Baselines hesaplama
perfect_samples = [s for s in labeled_samples if s.is_perfect_form == True]
if perfect_samples:
    baselines = BaselineCalculator.calculate_baselines(perfect_samples)
    baseline_file = model_dir / "baselines.json"
    import json
    with open(baseline_file, 'w') as f:
        json.dump(baselines, f, indent=2, default=str)
    print(f"‚úÖ Baselines saved to {baseline_file}")
    print(f"   Perfect samples: {len(perfect_samples)}")
else:
    print("‚ö†Ô∏è  No perfect samples found for baseline calculation")


In [None]:
# Model dosyalarƒ±nƒ± indir (Google Drive'a kaydetmek i√ßin)
# from google.colab import files
# import shutil
# 
# # Zip model klas√∂r√º
# shutil.make_archive(f"model_{EXERCISE}", 'zip', model_dir)
# files.download(f"model_{EXERCISE}.zip")

# VEYA Google Drive'a kaydet:
# drive.mount('/content/drive')
# shutil.copytree(model_dir, f"/content/drive/MyDrive/models/{EXERCISE}", dirs_exist_ok=True)

print("‚úÖ Model files ready for download")
print(f"   Location: {model_dir}")
