# 🧠 N-Beats Model Eğitimi - Google Colab

Bu notebook N-Beats (Neural Basis Expansion Analysis for Time Series) modelini JetX verileri üzerinde eğitir.

## 📋 N-Beats Özellikleri:
- **Interpretable**: Trend ve seasonality ayrıştırması
- **JetX Optimized**: Crash/pump pattern detection
- **GPU Accelerated**: PyTorch CUDA desteği
- **Multi-output**: Value, probability, confidence

---

**⚠️ Önce `colab_setup.ipynb` notebook'unu çalıştırın!**

In [None]:
# 🔧 Kurulum ve Import'lar
import sys
import os
import numpy as np
import torch
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import time
from datetime import datetime

# Proje path'i ekle
sys.path.insert(0, '/content/predictor_1/src')

# Model import'ları
from models.deep_learning.n_beats.n_beats_model import NBeatsPredictor
from data_processing.loader import load_data_from_sqlite
from data_processing.splitter import create_sequences

# GPU kontrolü
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"🎯 Device: {device}")
if torch.cuda.is_available():
    print(f"📱 GPU: {torch.cuda.get_device_name(0)}")
    print(f"💾 GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")

# Matplotlib için stil
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("✅ N-Beats kurulumu tamamlandı!")

In [None]:
# 📊 Veri Yükleme
print("📊 JETX VERİSİ YÜKLEME")
print("=" * 50)

# Colab'dan veritabanını yükle
db_path = "/content/colab_jetx_data.db"
df = load_data_from_sqlite(db_path)

# Veriyi liste formatına çevir
jetx_data = [row[1] for row in df.data]  # value column
print(f"✅ {len(jetx_data)} JetX verisi yüklendi")

# Veri analizi
jetx_array = np.array(jetx_data)
print(f"📈 İstatistikler:")
print(f"   - Min: {jetx_array.min():.2f}")
print(f"   - Max: {jetx_array.max():.2f}")
print(f"   - Mean: {jetx_array.mean():.2f}")
print(f"   - Std: {jetx_array.std():.2f}")
print(f"   - Crash rate (<1.5): {(jetx_array < 1.5).mean()*100:.1f}%")

# Veri görselleştirmesi
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Time series plot
axes[0,0].plot(jetx_data[:200])
axes[0,0].set_title('İlk 200 JetX Değeri')
axes[0,0].set_xlabel('Zaman')
axes[0,0].set_ylabel('JetX Değeri')
axes[0,0].axhline(y=1.5, color='r', linestyle='--', alpha=0.7, label='Threshold')
axes[0,0].legend()

# Histogram
axes[0,1].hist(jetx_data, bins=50, alpha=0.7, edgecolor='black')
axes[0,1].set_title('JetX Değer Dağılımı')
axes[0,1].set_xlabel('JetX Değeri')
axes[0,1].set_ylabel('Frekans')
axes[0,1].axvline(x=1.5, color='r', linestyle='--', alpha=0.7, label='Threshold')
axes[0,1].legend()

# Box plot
axes[1,0].boxplot(jetx_data)
axes[1,0].set_title('JetX Değer Box Plot')
axes[1,0].set_ylabel('JetX Değeri')

# Autocorrelation
lags = range(1, min(50, len(jetx_data)//10))
autocorr = [np.corrcoef(jetx_data[:-lag], jetx_data[lag:])[0,1] for lag in lags]
axes[1,1].plot(lags, autocorr)
axes[1,1].set_title('Autocorrelation')
axes[1,1].set_xlabel('Lag')
axes[1,1].set_ylabel('Correlation')

plt.tight_layout()
plt.show()

print("🎯 Veri analizi tamamlandı!")

In [None]:
# 🔧 Veri Hazırlama
print("🔧 N-BEATS İÇİN VERİ HAZIRLAMA")
print("=" * 50)

# N-Beats için parametreler
SEQUENCE_LENGTH = 100  # Daha kısa sequence GPU memory için
TRAIN_SPLIT = 0.8
VALIDATION_SPLIT = 0.1
BATCH_SIZE = 32

# Veriyi train/val/test olarak böl
n_train = int(len(jetx_data) * TRAIN_SPLIT)
n_val = int(len(jetx_data) * VALIDATION_SPLIT)

train_data = jetx_data[:n_train]
val_data = jetx_data[n_train:n_train + n_val]
test_data = jetx_data[n_train + n_val:]

print(f"📊 Veri bölümü:")
print(f"   - Train: {len(train_data)} samples")
print(f"   - Validation: {len(val_data)} samples")
print(f"   - Test: {len(test_data)} samples")

# Normalizasyon (optional)
train_mean = np.mean(train_data)
train_std = np.std(train_data)
print(f"📏 Normalizasyon parametreleri:")
print(f"   - Mean: {train_mean:.3f}")
print(f"   - Std: {train_std:.3f}")

print("✅ Veri hazırlandı!")

In [None]:
# 🧠 N-Beats Model Oluşturma
print("🧠 N-BEATS MODEL OLUŞTURMA")
print("=" * 50)

# Model parametreleri
model_params = {
    'sequence_length': SEQUENCE_LENGTH,
    'hidden_size': 256,  # GPU memory için optimize
    'num_stacks': 3,
    'num_blocks': 2,  # Daha az block GPU memory için
    'learning_rate': 0.001,
    'threshold': 1.5,
    'crash_weight': 2.0
}

print(f"🔧 Model parametreleri:")
for key, value in model_params.items():
    print(f"   - {key}: {value}")

# Model oluştur
nbeats_model = NBeatsPredictor(**model_params)

# GPU'ya taşı
if torch.cuda.is_available():
    nbeats_model.model = nbeats_model.model.cuda()
    print("✅ Model GPU'ya taşındı")

# Model özeti
total_params = sum(p.numel() for p in nbeats_model.model.parameters())
trainable_params = sum(p.numel() for p in nbeats_model.model.parameters() if p.requires_grad)

print(f"📊 Model istatistikleri:")
print(f"   - Total parameters: {total_params:,}")
print(f"   - Trainable parameters: {trainable_params:,}")
print(f"   - Model size: ~{total_params * 4 / 1024**2:.1f} MB")

print("✅ N-Beats model hazır!")

In [None]:
# 🚀 Model Eğitimi
print("🚀 N-BEATS MODEL EĞİTİMİ")
print("=" * 50)

# Eğitim parametreleri
EPOCHS = 50  # Colab zaman limiti için
VERBOSE = True

print(f"🎯 Eğitim başlıyor...")
print(f"   - Epochs: {EPOCHS}")
print(f"   - Batch size: {BATCH_SIZE}")
print(f"   - Device: {device}")

# Eğitim başlangıç zamanı
start_time = time.time()

# Model eğitimi
try:
    history = nbeats_model.train(
        data=train_data,
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        validation_split=0.2,
        verbose=VERBOSE
    )
    
    # Eğitim süresi
    training_time = time.time() - start_time
    print(f"\n⏱️ Eğitim süresi: {training_time/60:.2f} dakika")
    
    # Eğitim sonuçları
    final_train_loss = history['train_losses'][-1]
    final_val_loss = history['val_losses'][-1]
    
    print(f"📊 Final sonuçlar:")
    print(f"   - Train Loss: {final_train_loss:.6f}")
    print(f"   - Val Loss: {final_val_loss:.6f}")
    print(f"   - Epochs completed: {len(history['train_losses'])}")
    
    print("✅ Model eğitimi tamamlandı!")
    
except Exception as e:
    print(f"❌ Eğitim hatası: {e}")
    import traceback
    traceback.print_exc()

In [None]:
# 📈 Eğitim Sonuçları Görselleştirme
if 'history' in locals() and history:
    print("📈 EĞİTİM SONUÇLARI GÖRSELLEŞTİRME")
    print("=" * 50)
    
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # Loss curves
    epochs_range = range(1, len(history['train_losses']) + 1)
    
    axes[0,0].plot(epochs_range, history['train_losses'], 'b-', label='Training Loss')
    axes[0,0].plot(epochs_range, history['val_losses'], 'r-', label='Validation Loss')
    axes[0,0].set_title('Model Loss')
    axes[0,0].set_xlabel('Epochs')
    axes[0,0].set_ylabel('Loss')
    axes[0,0].legend()
    axes[0,0].grid(True)
    
    # Loss zoom (son %20)
    start_idx = max(0, int(len(history['train_losses']) * 0.8))
    axes[0,1].plot(epochs_range[start_idx:], history['train_losses'][start_idx:], 'b-', label='Training Loss')
    axes[0,1].plot(epochs_range[start_idx:], history['val_losses'][start_idx:], 'r-', label='Validation Loss')
    axes[0,1].set_title('Model Loss (Son %20)')
    axes[0,1].set_xlabel('Epochs')
    axes[0,1].set_ylabel('Loss')
    axes[0,1].legend()
    axes[0,1].grid(True)
    
    # Learning rate curve (eğer var ise)
    axes[1,0].plot(epochs_range, history['train_losses'], 'g-')
    axes[1,0].set_title('Training Loss Trend')
    axes[1,0].set_xlabel('Epochs')
    axes[1,0].set_ylabel('Loss')
    axes[1,0].grid(True)
    
    # Loss statistics
    loss_stats = {
        'Min Train Loss': min(history['train_losses']),
        'Min Val Loss': min(history['val_losses']),
        'Final Train Loss': history['train_losses'][-1],
        'Final Val Loss': history['val_losses'][-1]
    }
    
    axes[1,1].axis('off')
    table_data = [[k, f"{v:.6f}"] for k, v in loss_stats.items()]
    table = axes[1,1].table(cellText=table_data, colLabels=['Metric', 'Value'], loc='center')
    table.auto_set_font_size(False)
    table.set_fontsize(10)
    table.scale(1.2, 1.5)
    
    plt.tight_layout()
    plt.show()
    
    print("✅ Eğitim sonuçları görselleştirildi!")
else:
    print("⚠️ Eğitim history bulunamadı.")

In [None]:
# 🧪 Model Test ve Değerlendirme
print("🧪 MODEL TEST VE DEĞERLENDİRME")
print("=" * 50)

# Test verisi üzerinde tahminler
test_predictions = []
test_actuals = []
test_probabilities = []
test_confidences = []

print("🔍 Test verisi üzerinde tahminler yapılıyor...")

for i in tqdm(range(len(test_data) - SEQUENCE_LENGTH)):
    # Sequence hazırla
    sequence = test_data[i:i + SEQUENCE_LENGTH]
    actual = test_data[i + SEQUENCE_LENGTH]
    
    try:
        # Tahmin yap
        predicted_value, probability, confidence = nbeats_model.predict(sequence)
        
        test_predictions.append(predicted_value)
        test_actuals.append(actual)
        test_probabilities.append(probability)
        test_confidences.append(confidence)
        
    except Exception as e:
        print(f"⚠️ Tahmin hatası: {e}")
        break

print(f"✅ {len(test_predictions)} tahmin yapıldı")

# Metrikleri hesapla
if test_predictions:
    test_predictions = np.array(test_predictions)
    test_actuals = np.array(test_actuals)
    test_probabilities = np.array(test_probabilities)
    test_confidences = np.array(test_confidences)
    
    # Regression metrikleri
    mae = np.mean(np.abs(test_predictions - test_actuals))
    mse = np.mean((test_predictions - test_actuals) ** 2)
    rmse = np.sqrt(mse)
    
    # Classification metrikleri (threshold = 1.5)
    actual_binary = (test_actuals >= 1.5).astype(int)
    pred_binary = (test_probabilities >= 0.5).astype(int)
    
    accuracy = np.mean(actual_binary == pred_binary)
    precision = np.sum((pred_binary == 1) & (actual_binary == 1)) / np.sum(pred_binary == 1) if np.sum(pred_binary == 1) > 0 else 0
    recall = np.sum((pred_binary == 1) & (actual_binary == 1)) / np.sum(actual_binary == 1) if np.sum(actual_binary == 1) > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    
    print(f"📊 TEST SONUÇLARI:")
    print(f"   Regression Metrics:")
    print(f"   - MAE: {mae:.4f}")
    print(f"   - RMSE: {rmse:.4f}")
    print(f"   Classification Metrics:")
    print(f"   - Accuracy: {accuracy:.4f}")
    print(f"   - Precision: {precision:.4f}")
    print(f"   - Recall: {recall:.4f}")
    print(f"   - F1-Score: {f1:.4f}")
    print(f"   Confidence:")
    print(f"   - Mean Confidence: {np.mean(test_confidences):.4f}")
    print(f"   - Std Confidence: {np.std(test_confidences):.4f}")

print("✅ Model değerlendirmesi tamamlandı!")

In [None]:
# 📊 Tahmin Sonuçları Görselleştirme
if 'test_predictions' in locals() and len(test_predictions) > 0:
    print("📊 TAHMİN SONUÇLARI GÖRSELLEŞTİRME")
    print("=" * 50)
    
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # Actual vs Predicted
    axes[0,0].scatter(test_actuals, test_predictions, alpha=0.6)
    axes[0,0].plot([test_actuals.min(), test_actuals.max()], [test_actuals.min(), test_actuals.max()], 'r--', alpha=0.8)
    axes[0,0].set_xlabel('Actual Values')
    axes[0,0].set_ylabel('Predicted Values')
    axes[0,0].set_title('Actual vs Predicted')
    axes[0,0].grid(True)
    
    # Time series comparison (ilk 100 tahmin)
    n_show = min(100, len(test_predictions))
    x_axis = range(n_show)
    axes[0,1].plot(x_axis, test_actuals[:n_show], 'b-', label='Actual', alpha=0.7)
    axes[0,1].plot(x_axis, test_predictions[:n_show], 'r-', label='Predicted', alpha=0.7)
    axes[0,1].axhline(y=1.5, color='g', linestyle='--', alpha=0.5, label='Threshold')
    axes[0,1].set_xlabel('Time Steps')
    axes[0,1].set_ylabel('JetX Value')
    axes[0,1].set_title(f'Time Series Comparison (İlk {n_show} tahmin)')
    axes[0,1].legend()
    axes[0,1].grid(True)
    
    # Error distribution
    errors = test_predictions - test_actuals
    axes[1,0].hist(errors, bins=30, alpha=0.7, edgecolor='black')
    axes[1,0].axvline(x=0, color='r', linestyle='--', alpha=0.7)
    axes[1,0].set_xlabel('Prediction Error')
    axes[1,0].set_ylabel('Frequency')
    axes[1,0].set_title('Error Distribution')
    axes[1,0].grid(True)
    
    # Confidence vs Error
    axes[1,1].scatter(test_confidences, np.abs(errors), alpha=0.6)
    axes[1,1].set_xlabel('Confidence')
    axes[1,1].set_ylabel('Absolute Error')
    axes[1,1].set_title('Confidence vs Absolute Error')
    axes[1,1].grid(True)
    
    plt.tight_layout()
    plt.show()
    
    print("✅ Tahmin sonuçları görselleştirildi!")
else:
    print("⚠️ Tahmin sonuçları bulunamadı.")

In [None]:
# 💾 Model Kaydetme
print("💾 MODEL KAYDETME")
print("=" * 50)

# Model kaydetme dizini
model_dir = "/content/trained_models/"
os.makedirs(model_dir, exist_ok=True)

# Model dosya adı
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
model_filename = f"nbeats_model_{timestamp}.pth"
model_path = os.path.join(model_dir, model_filename)

# Model kaydet
try:
    nbeats_model.save_model(model_path)
    print(f"✅ Model kaydedildi: {model_path}")
    
    # Model bilgilerini kaydet
    model_info = {
        'model_type': 'N-Beats',
        'timestamp': timestamp,
        'parameters': model_params,
        'training_time': f"{training_time/60:.2f} minutes" if 'training_time' in locals() else 'Unknown',
        'final_losses': {
            'train': history['train_losses'][-1] if 'history' in locals() else 'Unknown',
            'val': history['val_losses'][-1] if 'history' in locals() else 'Unknown'
        },
        'test_metrics': {
            'mae': mae if 'mae' in locals() else 'Unknown',
            'rmse': rmse if 'rmse' in locals() else 'Unknown',
            'accuracy': accuracy if 'accuracy' in locals() else 'Unknown',
            'f1_score': f1 if 'f1' in locals() else 'Unknown'
        },
        'data_info': {
            'sequence_length': SEQUENCE_LENGTH,
            'train_size': len(train_data),
            'val_size': len(val_data),
            'test_size': len(test_data)
        }
    }
    
    # Model bilgilerini JSON olarak kaydet
    import json
    info_filename = f"nbeats_info_{timestamp}.json"
    info_path = os.path.join(model_dir, info_filename)
    
    with open(info_path, 'w') as f:
        json.dump(model_info, f, indent=2)
    
    print(f"✅ Model bilgileri kaydedildi: {info_path}")
    
    # Google Drive'a kopyala (optional)
    print("\n💡 Google Drive'a kaydetmek için:")
    print(f"   !cp {model_path} /content/drive/MyDrive/")
    print(f"   !cp {info_path} /content/drive/MyDrive/")
    
except Exception as e:
    print(f"❌ Model kaydetme hatası: {e}")
    import traceback
    traceback.print_exc()

print("\n🎉 N-Beats model eğitimi tamamlandı!")

# 🎯 N-Beats Eğitimi Özeti

## ✅ Tamamlanan İşlemler:
1. **Veri Hazırlama** - JetX verisi yüklendi ve işlendi
2. **Model Oluşturma** - N-Beats modeli GPU'da oluşturuldu
3. **Eğitim** - Model JetX verisi üzerinde eğitildi
4. **Değerlendirme** - Test verisi üzerinde performans ölçüldü
5. **Görselleştirme** - Eğitim ve test sonuçları görselleştirildi
6. **Kaydetme** - Model ve bilgiler kaydedildi

## 📊 Model Özellikleri:
- **Architecture**: Neural Basis Expansion Analysis
- **Input**: JetX değer dizileri
- **Output**: Değer, olasılık, güven skoru
- **Optimization**: JetX crash/pump pattern detection
- **Device**: GPU accelerated

## 🚀 Sonraki Adımlar:
1. **Diğer Modeller** - TFT, LSTM, Ensemble eğitimi
2. **Hyperparameter Tuning** - Optuna ile optimizasyon
3. **Production Deployment** - Model serving
4. **Real-time Testing** - Canlı veri testi

---

**🎉 N-Beats eğitimi başarıyla tamamlandı!**