In [None]:
"""
============================================================================
GELƒ∞≈ûTƒ∞Rƒ∞LMƒ∞≈û SVM - MULTI-FEATURE STRATEGY
Claude
============================================================================
‚úÖ Data Leakage D√ºzeltildi
‚úÖ Alternatif Feature Setleri Eklendi
‚úÖ Ensemble Yakla≈üƒ±mƒ±
============================================================================
"""

import sys
import subprocess
print("üì¶ K√ºt√ºphaneler y√ºkleniyor...")
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q",
                      "yfinance", "ta", "scikit-learn", "pandas", "numpy",
                      "scikit-optimize"])

import yfinance as yf
import pandas as pd
import numpy as np
import ta
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, classification_report
from skopt import BayesSearchCV
from skopt.space import Real
import warnings
warnings.filterwarnings('ignore')

print("‚úÖ Kurulum tamamlandƒ±!\n")

# ============================================================================
# VERƒ∞ √áEKME
# ============================================================================
print("="*80)
print("VERƒ∞ √áEKME")
print("="*80)

tickers = {
    'KSE100': '^KSE',
    'KOSPI': '^KS11',
    'Nikkei225': '^N225',
    'SZSE': '000001.SS'
}

all_data = {}
for name, ticker in tickers.items():
    print(f"{name}...", end=" ")
    try:
        data = yf.download(ticker, start="2011-01-01", end="2020-09-27",
                          progress=False, auto_adjust=True)

        if data.empty:
            print("‚ùå")
            continue

        if isinstance(data.columns, pd.MultiIndex):
            data.columns = data.columns.get_level_values(0)

        data = data[['Open', 'High', 'Low', 'Close', 'Volume']].copy()
        data = data.dropna()
        all_data[name] = data
        print(f"‚úÖ {len(data)} g√ºn")
    except Exception as e:
        print(f"‚ùå {e}")

print(f"‚úÖ {len(all_data)} borsa\n")

# ============================================================================
# FEATURE SET 1: TEKNƒ∞K G√ñSTERGELER (Orijinal)
# ============================================================================

def feature_set_1_technical(df):
    """Orijinal teknik g√∂stergeler"""
    df = df.copy()

    high = df['High'].squeeze()
    low = df['Low'].squeeze()
    close = df['Close'].squeeze()

    # Stochastic
    stoch = ta.momentum.StochasticOscillator(high, low, close, window=14, smooth_window=3)
    df['Stochastic_K'] = stoch.stoch()
    df['Stochastic_D'] = stoch.stoch_signal()

    # ROC
    df['ROC'] = ta.momentum.ROCIndicator(close, window=10).roc()

    # Williams %R
    df['Williams_R'] = ta.momentum.WilliamsRIndicator(high, low, close, lbp=14).williams_r()

    # Momentum
    df['Momentum'] = close.diff(4)

    # Disparity
    ma5 = close.rolling(5).mean()
    ma14 = close.rolling(14).mean()
    df['Disparity_5'] = np.where(ma5 != 0, (close / ma5) * 100, 100)
    df['Disparity_14'] = np.where(ma14 != 0, (close / ma14) * 100, 100)

    # OSCP
    ma10 = close.rolling(10).mean()
    df['OSCP'] = np.where(ma5 != 0, ((ma5 - ma10) / ma5), 0)

    # CCI
    df['CCI'] = ta.trend.CCIIndicator(high, low, close, window=20).cci()

    # RSI
    df['RSI'] = ta.momentum.RSIIndicator(close, window=14).rsi()

    # Pivot Points
    prev_high = high.shift(1)
    prev_low = low.shift(1)
    prev_close = close.shift(1)

    df['Pivot_Point'] = (prev_high + prev_low + prev_close) / 3
    df['S1'] = (df['Pivot_Point'] * 2) - prev_high
    df['S2'] = df['Pivot_Point'] - (prev_high - prev_low)
    df['R1'] = (df['Pivot_Point'] * 2) - prev_low
    df['R2'] = df['Pivot_Point'] + (prev_high - prev_low)

    return df

# ============================================================================
# FEATURE SET 2: BASITLE≈ûTIRILMI≈û MOMENTUM/VOLATILITY
# ============================================================================

def feature_set_2_simplified(df):
    """Alternatif basitle≈ütirilmi≈ü √∂zellikler"""
    df = df.copy()
    close = df['Close']

    # Binary Momentum (g√ºnl√ºk)
    df['Daily_Momentum'] = (close > close.shift(1)).astype(int) * 2 - 1  # +1 veya -1

    # Volatility (y√ºzdesel deƒüi≈üim)
    df['Daily_Volatility'] = (close - close.shift(1)) / close.shift(1)

    # 5-g√ºnl√ºk ortalamalar
    df['Index_Momentum_5D'] = df['Daily_Momentum'].rolling(5).mean()
    df['Index_Volatility_5D'] = df['Daily_Volatility'].rolling(5).mean()

    # 10-g√ºnl√ºk ortalamalar
    df['Index_Momentum_10D'] = df['Daily_Momentum'].rolling(10).mean()
    df['Index_Volatility_10D'] = df['Daily_Volatility'].rolling(10).mean()

    # Volume momentum
    df['Volume_Change'] = df['Volume'].pct_change()
    df['Volume_Momentum_5D'] = df['Volume_Change'].rolling(5).mean()

    # Price position (mevcut fiyat / 20-g√ºnl√ºk max)
    df['Price_Position'] = close / close.rolling(20).max()

    return df

# ============================================================================
# FEATURE SET 3: TREND VE PATTERN
# ============================================================================

def feature_set_3_trends(df):
    """Trend ve pattern √∂zellikleri"""
    df = df.copy()
    close = df['Close']
    high = df['High']
    low = df['Low']

    # Moving Average Crossovers
    ma5 = close.rolling(5).mean()
    ma20 = close.rolling(20).mean()
    ma50 = close.rolling(50).mean()

    df['MA5_20_Cross'] = (ma5 > ma20).astype(int)
    df['MA5_50_Cross'] = (ma5 > ma50).astype(int)
    df['Price_MA20_Ratio'] = close / ma20

    # Bollinger Bands
    bb = ta.volatility.BollingerBands(close, window=20, window_dev=2)
    df['BB_High'] = bb.bollinger_hband_indicator()
    df['BB_Low'] = bb.bollinger_lband_indicator()
    df['BB_Width'] = bb.bollinger_wband()

    # ATR (Average True Range)
    df['ATR'] = ta.volatility.AverageTrueRange(high, low, close, window=14).average_true_range()

    # ADX (Trend Strength)
    df['ADX'] = ta.trend.ADXIndicator(high, low, close, window=14).adx()

    # MACD
    macd = ta.trend.MACD(close)
    df['MACD'] = macd.macd()
    df['MACD_Signal'] = macd.macd_signal()
    df['MACD_Diff'] = macd.macd_diff()

    return df

# ============================================================================
# VERƒ∞ HAZIRLAMA (DATA LEAKAGE D√úZELTƒ∞LDƒ∞!)
# ============================================================================

def prepare_data_no_leakage(df, feature_set='set1', test_ratio=0.2):
    """
    ‚úÖ Data Leakage D√ºzeltildi:
    1. LAG √∂nce uygulanƒ±r
    2. Train/Test split yapƒ±lƒ±r
    3. Scaler sadece TRAIN'e fit edilir
    """
    df = df.copy()

    # Feature setini se√ß
    if feature_set == 'set1':
        df = feature_set_1_technical(df)
        features = ['Stochastic_K', 'Stochastic_D', 'ROC', 'Williams_R',
                   'Momentum', 'Disparity_5', 'Disparity_14', 'OSCP',
                   'CCI', 'RSI', 'Pivot_Point', 'S1', 'S2', 'R1', 'R2']
    elif feature_set == 'set2':
        df = feature_set_2_simplified(df)
        features = ['Daily_Momentum', 'Daily_Volatility',
                   'Index_Momentum_5D', 'Index_Volatility_5D',
                   'Index_Momentum_10D', 'Index_Volatility_10D',
                   'Volume_Momentum_5D', 'Price_Position']
    elif feature_set == 'set3':
        df = feature_set_3_trends(df)
        features = ['MA5_20_Cross', 'MA5_50_Cross', 'Price_MA20_Ratio',
                   'BB_High', 'BB_Low', 'BB_Width', 'ATR', 'ADX',
                   'MACD', 'MACD_Signal', 'MACD_Diff']
    else:  # 'all' - t√ºm feature'larƒ± birle≈ütir
        df = feature_set_1_technical(df)
        df = feature_set_2_simplified(df)
        df = feature_set_3_trends(df)
        features = ['Stochastic_K', 'Stochastic_D', 'ROC', 'Williams_R',
                   'Momentum', 'Disparity_5', 'Disparity_14', 'OSCP',
                   'CCI', 'RSI', 'Pivot_Point', 'S1', 'S2', 'R1', 'R2',
                   'Daily_Momentum', 'Daily_Volatility', 'Index_Momentum_5D',
                   'Index_Volatility_5D', 'Index_Momentum_10D', 'Index_Volatility_10D',
                   'Volume_Momentum_5D', 'Price_Position',
                   'MA5_20_Cross', 'MA5_50_Cross', 'Price_MA20_Ratio',
                   'BB_High', 'BB_Low', 'BB_Width', 'ATR', 'ADX',
                   'MACD', 'MACD_Signal', 'MACD_Diff']

    # Target
    df['Target'] = (df['Close'].shift(-1) > df['Close']).astype(int)
    df = df.iloc[:-1].copy()

    # NaN temizle
    df = df.replace([np.inf, -np.inf], np.nan)
    df = df.dropna(subset=features + ['Target'])

    # ‚úÖ 1. √ñNCE LAG UYGULA (normalization √∂ncesi!)
    lagged_features = []
    for feat in features:
        lagged_col = f'{feat}_lag1'
        df[lagged_col] = df[feat].shift(1)
        lagged_features.append(lagged_col)

    df = df.dropna(subset=lagged_features)

    X = df[lagged_features].copy()
    y = df['Target'].copy()

    # ‚úÖ 2. TRAIN/TEST SPLIT
    n_train = int(len(X) * (1 - test_ratio))
    X_train = X.iloc[:n_train].copy()
    X_test = X.iloc[n_train:].copy()
    y_train = y.iloc[:n_train].values
    y_test = y.iloc[n_train:].values

    # ‚úÖ 3. SCALER SADECE TRAIN'E FIT
    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    X_train_scaled = pd.DataFrame(X_train_scaled, columns=lagged_features, index=X_train.index)
    X_test_scaled = pd.DataFrame(X_test_scaled, columns=lagged_features, index=X_test.index)

    print(f"  Veri: {len(X)} | Train: {len(X_train)} | Test: {len(X_test)}")
    print(f"  Features: {len(lagged_features)} | Up%: {y_train.mean()*100:.1f}%")

    return X_train_scaled, X_test_scaled, y_train, y_test

# ============================================================================
# MODEL Eƒûƒ∞Tƒ∞Mƒ∞
# ============================================================================

def train_model(X_train, y_train, X_test, y_test, model_name):
    """Bayesian Optimization ile SVM eƒüitimi"""

    cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

    svm = SVC(kernel='linear', max_iter=50000, random_state=42)

    search_spaces = {'C': Real(1e-4, 1e3, prior='log-uniform')}

    bayes_search = BayesSearchCV(
        svm, search_spaces, n_iter=50, cv=cv,
        scoring='accuracy', n_jobs=-1, random_state=42
    )

    print(f"\n  {model_name} - Bayesian Optimization...")
    bayes_search.fit(X_train, y_train)

    best_C = bayes_search.best_params_['C']
    cv_score = bayes_search.best_score_

    # Test
    y_pred = bayes_search.best_estimator_.predict(X_test)
    test_acc = accuracy_score(y_test, y_pred)
    test_f1 = f1_score(y_test, y_pred, zero_division=0)

    print(f"  ‚úì Best C: {best_C:.4f}")
    print(f"  ‚úì CV Score: {cv_score:.4f}")
    print(f"  ‚úì Test Acc: {test_acc:.4f}")
    print(f"  ‚úì Test F1: {test_f1:.4f}")

    return {
        'model': bayes_search.best_estimator_,
        'best_C': best_C,
        'cv_score': cv_score,
        'test_acc': test_acc,
        'test_f1': test_f1,
        'y_pred': y_pred
    }

# ============================================================================
# √áALI≈ûTIR
# ============================================================================

print("\n" + "="*80)
print("MODEL Eƒûƒ∞Tƒ∞Mƒ∞ - FEATURE SET KAR≈ûILA≈ûTIRMASI")
print("="*80)

results_all = {}

for index_name in ['KOSPI', 'Nikkei225']:  # ƒ∞ki borsa test
    if index_name not in all_data:
        continue

    print(f"\n{'='*80}")
    print(f"{index_name}")
    print(f"{'='*80}")

    results_all[index_name] = {}

    for feature_set, set_name in [('set1', 'Technical Indicators'),
                                   ('set2', 'Simplified Momentum'),
                                   ('set3', 'Trend & Pattern'),
                                   ('all', 'Combined All')]:

        print(f"\nüìä {set_name}")
        print("-" * 80)

        try:
            X_train, X_test, y_train, y_test = prepare_data_no_leakage(
                all_data[index_name],
                feature_set=feature_set
            )

            result = train_model(X_train, y_train, X_test, y_test, set_name)
            results_all[index_name][feature_set] = result

        except Exception as e:
            print(f"  ‚ùå Error: {e}")

# ============================================================================
# SONU√áLAR
# ============================================================================

print("\n" + "="*80)
print("üìä FINAL RESULTS - FEATURE SET COMPARISON")
print("="*80)

for index_name, results in results_all.items():
    print(f"\n{index_name}")
    print("-" * 80)
    print(f"{'Feature Set':<25} {'Best C':<12} {'CV Score':<12} {'Test Acc':<12} {'Test F1':<12}")
    print("-" * 80)

    for fset in ['set1', 'set2', 'set3', 'all']:
        if fset in results:
            r = results[fset]
            set_names = {'set1': 'Technical', 'set2': 'Simplified',
                        'set3': 'Trend', 'all': 'Combined'}
            print(f"{set_names[fset]:<25} {r['best_C']:<12.4f} {r['cv_score']:<12.4f} "
                  f"{r['test_acc']:<12.4f} {r['test_f1']:<12.4f}")

    # En iyi model
    best = max(results.items(), key=lambda x: x[1]['test_acc'])
    set_names = {'set1': 'Technical', 'set2': 'Simplified',
                'set3': 'Trend', 'all': 'Combined'}
    print(f"\n‚≠ê BEST: {set_names[best[0]]} (Acc: {best[1]['test_acc']:.4f})")

print("\n" + "="*80)
print("‚úÖ T√úM TESTLER TAMAMLANDI")
print("="*80)

üì¶ K√ºt√ºphaneler y√ºkleniyor...
‚úÖ Kurulum tamamlandƒ±!

VERƒ∞ √áEKME
KSE100... ‚úÖ 2346 g√ºn
KOSPI... ‚úÖ 2397 g√ºn
Nikkei225... ‚úÖ 2382 g√ºn
SZSE... ‚úÖ 2366 g√ºn
‚úÖ 4 borsa


MODEL Eƒûƒ∞Tƒ∞Mƒ∞ - FEATURE SET KAR≈ûILA≈ûTIRMASI

KOSPI

üìä Technical Indicators
--------------------------------------------------------------------------------
  Veri: 2376 | Train: 1900 | Test: 476
  Features: 15 | Up%: 51.4%

  Technical Indicators - Bayesian Optimization...
