In [None]:
"""
============================================================================
VERSƒ∞YON 1: NORMALIZATION LAG √ñNCESƒ∞
============================================================================
Hipotez: Normalization indicator'lar hesaplandƒ±ktan hemen sonra yapƒ±lmalƒ±,
         lag'den √ñNCE!
============================================================================
"""

import sys
import subprocess
print("üì¶ VERSƒ∞YON 1: Normalization LAG √∂ncesi")
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q",
                      "yfinance", "ta", "scikit-learn", "pandas", "numpy"])

import yfinance as yf
import pandas as pd
import numpy as np
import ta
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import warnings
warnings.filterwarnings('ignore')

print("‚úÖ Kurulum tamamlandƒ±!\n")

# ============================================================================
# LEVEL 1: VERƒ∞ √áEKME
# ============================================================================
print("="*80)
print("LEVEL 1: VERƒ∞ √áEKME (2011-01-01 to 2020-09-27)")
print("="*80)

tickers = {
    'KSE100': '^KSE',
    'KOSPI': '^KS11',
    'Nikkei225': '^N225',
    'SZSE': '000001.SS'
}

all_data = {}
for name, ticker in tickers.items():
    print(f"\nüìä {name} ({ticker})...", end=" ")
    try:
        data = yf.download(ticker, start="2011-01-01", end="2020-09-27",
                          progress=False, auto_adjust=True)

        if data.empty:
            print("‚ùå VERƒ∞ YOK!")
            continue

        if isinstance(data.columns, pd.MultiIndex):
            data.columns = data.columns.get_level_values(0)

        data = data[['Open', 'High', 'Low', 'Close', 'Volume']].copy()
        data = data.dropna()

        all_data[name] = data
        print(f"‚úÖ {len(data)} g√ºn")
    except Exception as e:
        print(f"‚ùå {e}")

print(f"\n{'='*80}")
print(f"‚úÖ {len(all_data)} borsa √ßekildi")
print(f"{'='*80}\n")

# ============================================================================
# LEVEL 2: TEKNƒ∞K G√ñSTERGELER (Table 1)
# ============================================================================
print("="*80)
print("LEVEL 2: 15 TEKNƒ∞K G√ñSTERGE (Table 1)")
print("="*80)

def hesapla_teknik_gostergeler(df):
    """Makalenin Table 1 form√ºllerine g√∂re"""
    df = df.copy()

    high = df['High'].squeeze()
    low = df['Low'].squeeze()
    close = df['Close'].squeeze()

    # 1-2. Stochastic Oscillator
    stoch = ta.momentum.StochasticOscillator(high, low, close,
                                             window=14, smooth_window=3)
    df['Stochastic_K'] = stoch.stoch()
    df['Stochastic_D'] = stoch.stoch_signal()

    # 3. ROC (10 period)
    df['ROC'] = ta.momentum.ROCIndicator(close, window=10).roc()

    # 4. Williams %R (14 period)
    df['Williams_R'] = ta.momentum.WilliamsRIndicator(high, low, close,
                                                       lbp=14).williams_r()

    # 5. Momentum (4 period: C_t - C_{t-4})
    df['Momentum'] = close.diff(4)

    # 6. Disparity 5
    ma5 = close.rolling(5).mean()
    df['Disparity_5'] = np.where(ma5 != 0, (close / ma5) * 100, 100)

    # 7. Disparity 14 (makalede 15 yazƒ±yor ama 14 kullanƒ±lmƒ±≈ü)
    ma14 = close.rolling(14).mean()
    df['Disparity_14'] = np.where(ma14 != 0, (close / ma14) * 100, 100)

    # 8. OSCP (Price Oscillator)
    ma10 = close.rolling(10).mean()
    df['OSCP'] = np.where(ma5 != 0, ((ma5 - ma10) / ma5), 0)

    # 9. CCI (20 period)
    df['CCI'] = ta.trend.CCIIndicator(high, low, close, window=20).cci()

    # 10. RSI (14 period)
    df['RSI'] = ta.momentum.RSIIndicator(close, window=14).rsi()

    # 11-15. Pivot Points (Table 1 formulas)
    prev_high = high.shift(1)
    prev_low = low.shift(1)
    prev_close = close.shift(1)

    df['Pivot_Point'] = (prev_high + prev_low + prev_close) / 3
    df['S1'] = (df['Pivot_Point'] * 2) - prev_high
    df['S2'] = df['Pivot_Point'] - (prev_high - prev_low)
    df['R1'] = (df['Pivot_Point'] * 2) - prev_low
    df['R2'] = df['Pivot_Point'] + (prev_high - prev_low)

    # Inf/NaN temizleme
    df = df.replace([np.inf, -np.inf], np.nan)

    return df

all_data_indicators = {}
for name, data in all_data.items():
    print(f"\n{name}...", end=" ")
    try:
        result = hesapla_teknik_gostergeler(data)
        all_data_indicators[name] = result
        print(f"‚úÖ {len(result)} satƒ±r")
    except Exception as e:
        print(f"‚ùå {e}")

print(f"\n{'='*80}")
print(f"‚úÖ G√∂stergeler hesaplandƒ±")
print(f"{'='*80}\n")

# ============================================================================
# LEVEL 3: VERƒ∞ HAZIRLAMA - NORMALIZATION LAG √ñNCESƒ∞
# ============================================================================
print("="*80)
print("LEVEL 3: VERƒ∞ HAZIRLAMA (NORMALIZATION LAG √ñNCESƒ∞)")
print("="*80)

def veri_hazirla(df, test_ratio=0.2):
    """
    üéØ VERSƒ∞YON 1: Normalization LAG √∂ncesi

    1. Indicator'larƒ± hesapla
    2. NORMALIZE ET (t√ºm veri seti)
    3. Lag uygula
    4. Train/Test split
    """
    df = df.copy()

    features = ['Stochastic_K', 'Stochastic_D', 'ROC', 'Williams_R',
                'Momentum', 'Disparity_5', 'Disparity_14', 'OSCP',
                'CCI', 'RSI', 'Pivot_Point', 'S1', 'S2', 'R1', 'R2']

    # 1Ô∏è‚É£ Target olu≈ütur (forward-looking)
    df['Next_Close'] = df['Close'].shift(-1)
    df['Target'] = (df['Next_Close'] > df['Close']).astype(int)
    df = df.iloc[:-1].copy()

    # 2Ô∏è‚É£ NaN temizle
    df = df.replace([np.inf, -np.inf], np.nan)
    df = df.dropna(subset=features + ['Target'])

    # 3Ô∏è‚É£ NORMALIZATION √ñNCE (t√ºm veri seti - data leakage var ama makale b√∂yle yapƒ±yor olabilir)
    scaler = MinMaxScaler()
    df[features] = scaler.fit_transform(df[features])

    # 4Ô∏è‚É£ ≈ûƒ∞MDƒ∞ LAG UYGULA
    lagged_features = []
    for feat in features:
        lagged_col = f'{feat}_lag1'
        df[lagged_col] = df[feat].shift(1)
        lagged_features.append(lagged_col)

    # 5Ô∏è‚É£ Lag y√ºz√ºnden ilk satƒ±rƒ± drop
    df = df.dropna(subset=lagged_features)

    X = df[lagged_features].copy()
    y = df['Target'].copy()

    print(f" Veri: {len(X)} satƒ±r | Up: {y.mean()*100:.1f}%")

    # 6Ô∏è‚É£ Zaman bazlƒ± split
    n_train = int(len(X) * (1 - test_ratio))
    X_train = X.iloc[:n_train].copy()
    X_test = X.iloc[n_train:].copy()
    y_train = y.iloc[:n_train].values
    y_test = y.iloc[n_train:].values

    print(f" Train: {len(X_train)} | Test: {len(X_test)}")

    return X_train, X_test, y_train, y_test

prepared_data = {}
for name, data in all_data_indicators.items():
    print(f"\n{name}:")
    try:
        X_train, X_test, y_train, y_test = veri_hazirla(data)
        prepared_data[name] = {
            'X_train': X_train, 'X_test': X_test,
            'y_train': y_train, 'y_test': y_test
        }
    except Exception as e:
        print(f" ‚ùå {e}")

print(f"\n{'='*80}")
print(f"‚úÖ {len(prepared_data)} borsa hazƒ±r")
print(f"{'='*80}\n")

# ============================================================================
# LEVEL 4: SVM SADECE LINEAR (HIZLI TEST)
# ============================================================================
print("="*80)
print("LEVEL 4: SVM LINEAR KERNEL (Hƒ±zlƒ± Test)")
print("="*80)

for name in ['KOSPI']:  # Sadece KOSPI test
    print(f"\n{name}:")
    print(f" {'-'*70}")

    data = prepared_data[name]

    try:
        param_grid = {
            'C': [0.001, 0.01, 0.1, 1, 4, 10, 50, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]
        }

        cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
        svm = SVC(kernel='linear', max_iter=50000, random_state=42)
        grid = GridSearchCV(svm, param_grid, cv=cv, scoring='accuracy', n_jobs=-1, verbose=0)
        grid.fit(data['X_train'], data['y_train'])

        best_model = grid.best_estimator_
        y_pred = best_model.predict(data['X_test'])

        acc = accuracy_score(data['y_test'], y_pred)
        f1 = f1_score(data['y_test'], y_pred, zero_division=0)

        print(f" Best C: {grid.best_params_['C']}")
        print(f" CV Score: {grid.best_score_:.4f}")
        print(f" Test Acc: {acc:.4f} | F1: {f1:.4f}")

    except Exception as e:
        print(f" ‚ùå {e}")

print("\n" + "="*80)
print("‚úÖ VERSƒ∞YON 1 TEST TAMAMLANDI")
print("="*80)

üì¶ VERSƒ∞YON 1: Normalization LAG √∂ncesi
‚úÖ Kurulum tamamlandƒ±!

LEVEL 1: VERƒ∞ √áEKME (2011-01-01 to 2020-09-27)

üìä KSE100 (^KSE)... ‚úÖ 2346 g√ºn

üìä KOSPI (^KS11)... ‚úÖ 2397 g√ºn

üìä Nikkei225 (^N225)... ‚úÖ 2382 g√ºn

üìä SZSE (000001.SS)... ‚úÖ 2366 g√ºn

‚úÖ 4 borsa √ßekildi

LEVEL 2: 15 TEKNƒ∞K G√ñSTERGE (Table 1)

KSE100... ‚úÖ 2346 satƒ±r

KOSPI... ‚úÖ 2397 satƒ±r

Nikkei225... ‚úÖ 2382 satƒ±r

SZSE... ‚úÖ 2366 satƒ±r

‚úÖ G√∂stergeler hesaplandƒ±

LEVEL 3: VERƒ∞ HAZIRLAMA (NORMALIZATION LAG √ñNCESƒ∞)

KSE100:
 Veri: 2325 satƒ±r | Up: 53.5%
 Train: 1860 | Test: 465

KOSPI:
 Veri: 2376 satƒ±r | Up: 52.4%
 Train: 1900 | Test: 476

Nikkei225:
 Veri: 2361 satƒ±r | Up: 53.1%
 Train: 1888 | Test: 473

SZSE:
 Veri: 2345 satƒ±r | Up: 52.8%
 Train: 1876 | Test: 469

‚úÖ 4 borsa hazƒ±r

LEVEL 4: SVM LINEAR KERNEL (Hƒ±zlƒ± Test)

KOSPI:
 ----------------------------------------------------------------------
 Best C: 0.001
 CV Score: 0.5137
 Test Acc: 0.5630 | F1: 0.7204

‚ú

In [None]:
"""
============================================================================
VERSƒ∞YON 2: TARGET SAME-DAY
============================================================================
Hipotez: Target aynƒ± g√ºn i√ßinde olmalƒ±!
         Feature: T-1 g√ºn√º indicator'larƒ±
         Target: T g√ºn√º Close > T g√ºn√º Open mi? (veya T > T-1)
============================================================================
"""

import sys
import subprocess
print("üì¶ VERSƒ∞YON 2: Target Same-Day")
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q",
                      "yfinance", "ta", "scikit-learn", "pandas", "numpy"])

import yfinance as yf
import pandas as pd
import numpy as np
import ta
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import warnings
warnings.filterwarnings('ignore')

print("‚úÖ Kurulum tamamlandƒ±!\n")

# ============================================================================
# LEVEL 1: VERƒ∞ √áEKME
# ============================================================================
print("="*80)
print("LEVEL 1: VERƒ∞ √áEKME (2011-01-01 to 2020-09-27)")
print("="*80)

tickers = {
    'KSE100': '^KSE',
    'KOSPI': '^KS11',
    'Nikkei225': '^N225',
    'SZSE': '000001.SS'
}

all_data = {}
for name, ticker in tickers.items():
    print(f"\nüìä {name} ({ticker})...", end=" ")
    try:
        data = yf.download(ticker, start="2011-01-01", end="2020-09-27",
                          progress=False, auto_adjust=True)

        if data.empty:
            print("‚ùå VERƒ∞ YOK!")
            continue

        if isinstance(data.columns, pd.MultiIndex):
            data.columns = data.columns.get_level_values(0)

        data = data[['Open', 'High', 'Low', 'Close', 'Volume']].copy()
        data = data.dropna()

        all_data[name] = data
        print(f"‚úÖ {len(data)} g√ºn")
    except Exception as e:
        print(f"‚ùå {e}")

print(f"\n{'='*80}")
print(f"‚úÖ {len(all_data)} borsa √ßekildi")
print(f"{'='*80}\n")

# ============================================================================
# LEVEL 2: TEKNƒ∞K G√ñSTERGELER (Table 1)
# ============================================================================
print("="*80)
print("LEVEL 2: 15 TEKNƒ∞K G√ñSTERGE (Table 1)")
print("="*80)

def hesapla_teknik_gostergeler(df):
    """Makalenin Table 1 form√ºllerine g√∂re"""
    df = df.copy()

    high = df['High'].squeeze()
    low = df['Low'].squeeze()
    close = df['Close'].squeeze()

    # 1-2. Stochastic Oscillator
    stoch = ta.momentum.StochasticOscillator(high, low, close,
                                             window=14, smooth_window=3)
    df['Stochastic_K'] = stoch.stoch()
    df['Stochastic_D'] = stoch.stoch_signal()

    # 3. ROC (10 period)
    df['ROC'] = ta.momentum.ROCIndicator(close, window=10).roc()

    # 4. Williams %R (14 period)
    df['Williams_R'] = ta.momentum.WilliamsRIndicator(high, low, close,
                                                       lbp=14).williams_r()

    # 5. Momentum (4 period: C_t - C_{t-4})
    df['Momentum'] = close.diff(4)

    # 6. Disparity 5
    ma5 = close.rolling(5).mean()
    df['Disparity_5'] = np.where(ma5 != 0, (close / ma5) * 100, 100)

    # 7. Disparity 14 (makalede 15 yazƒ±yor ama 14 kullanƒ±lmƒ±≈ü)
    ma14 = close.rolling(14).mean()
    df['Disparity_14'] = np.where(ma14 != 0, (close / ma14) * 100, 100)

    # 8. OSCP (Price Oscillator)
    ma10 = close.rolling(10).mean()
    df['OSCP'] = np.where(ma5 != 0, ((ma5 - ma10) / ma5), 0)

    # 9. CCI (20 period)
    df['CCI'] = ta.trend.CCIIndicator(high, low, close, window=20).cci()

    # 10. RSI (14 period)
    df['RSI'] = ta.momentum.RSIIndicator(close, window=14).rsi()

    # 11-15. Pivot Points (Table 1 formulas)
    prev_high = high.shift(1)
    prev_low = low.shift(1)
    prev_close = close.shift(1)

    df['Pivot_Point'] = (prev_high + prev_low + prev_close) / 3
    df['S1'] = (df['Pivot_Point'] * 2) - prev_high
    df['S2'] = df['Pivot_Point'] - (prev_high - prev_low)
    df['R1'] = (df['Pivot_Point'] * 2) - prev_low
    df['R2'] = df['Pivot_Point'] + (prev_high - prev_low)

    # Inf/NaN temizleme
    df = df.replace([np.inf, -np.inf], np.nan)

    return df

all_data_indicators = {}
for name, data in all_data.items():
    print(f"\n{name}...", end=" ")
    try:
        result = hesapla_teknik_gostergeler(data)
        all_data_indicators[name] = result
        print(f"‚úÖ {len(result)} satƒ±r")
    except Exception as e:
        print(f"‚ùå {e}")

print(f"\n{'='*80}")
print(f"‚úÖ G√∂stergeler hesaplandƒ±")
print(f"{'='*80}\n")

# ============================================================================
# LEVEL 3: VERƒ∞ HAZIRLAMA - TARGET SAME-DAY
# ============================================================================
print("="*80)
print("LEVEL 3: VERƒ∞ HAZIRLAMA (TARGET SAME-DAY)")
print("="*80)

def veri_hazirla(df, test_ratio=0.2):
    """
    üéØ VERSƒ∞YON 2: Target same-day (T g√ºn√º)

    Feature: T-1 g√ºn√º indicator'larƒ±
    Target: T g√ºn√º Close > T-1 g√ºn√º Close mi?
    """
    df = df.copy()

    features = ['Stochastic_K', 'Stochastic_D', 'ROC', 'Williams_R',
                'Momentum', 'Disparity_5', 'Disparity_14', 'OSCP',
                'CCI', 'RSI', 'Pivot_Point', 'S1', 'S2', 'R1', 'R2']

    # 1Ô∏è‚É£ Target: T g√ºn√º > T-1 g√ºn√º mi?
    df['Target'] = (df['Close'] > df['Close'].shift(1)).astype(int)

    # 2Ô∏è‚É£ Feature'larƒ± lag'le (T-1)
    lagged_features = []
    for feat in features:
        lagged_col = f'{feat}_lag1'
        df[lagged_col] = df[feat].shift(1)
        lagged_features.append(lagged_col)

    # 3Ô∏è‚É£ NaN temizle
    df = df.replace([np.inf, -np.inf], np.nan)
    df = df.dropna(subset=lagged_features + ['Target'])

    X = df[lagged_features].copy()
    y = df['Target'].copy()

    print(f" Veri: {len(X)} satƒ±r | Up: {y.mean()*100:.1f}%")

    # 4Ô∏è‚É£ Zaman bazlƒ± split
    n_train = int(len(X) * (1 - test_ratio))
    X_train = X.iloc[:n_train].copy()
    X_test = X.iloc[n_train:].copy()
    y_train = y.iloc[:n_train].copy()
    y_test = y.iloc[n_train:].copy()

    # 5Ô∏è‚É£ Normalization (train/test ayrƒ± ayrƒ±)
    scaler = MinMaxScaler()
    X_train_scaled = pd.DataFrame(
        scaler.fit_transform(X_train),
        columns=lagged_features,
        index=X_train.index
    )
    X_test_scaled = pd.DataFrame(
        scaler.transform(X_test),
        columns=lagged_features,
        index=X_test.index
    )

    print(f" Train: {len(X_train_scaled)} | Test: {len(X_test_scaled)}")

    return X_train_scaled, X_test_scaled, y_train.values, y_test.values

prepared_data = {}
for name, data in all_data_indicators.items():
    print(f"\n{name}:")
    try:
        X_train, X_test, y_train, y_test = veri_hazirla(data)
        prepared_data[name] = {
            'X_train': X_train, 'X_test': X_test,
            'y_train': y_train, 'y_test': y_test
        }
    except Exception as e:
        print(f" ‚ùå {e}")

print(f"\n{'='*80}")
print(f"‚úÖ {len(prepared_data)} borsa hazƒ±r")
print(f"{'='*80}\n")

# ============================================================================
# LEVEL 4: SVM SADECE LINEAR (HIZLI TEST)
# ============================================================================
print("="*80)
print("LEVEL 4: SVM LINEAR KERNEL (Hƒ±zlƒ± Test)")
print("="*80)

for name in ['KOSPI']:  # Sadece KOSPI test
    print(f"\n{name}:")
    print(f" {'-'*70}")

    data = prepared_data[name]

    try:
        param_grid = {
            'C': [0.001, 0.01, 0.1, 1, 4, 10, 50, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]
        }

        cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
        svm = SVC(kernel='linear', max_iter=50000, random_state=42)
        grid = GridSearchCV(svm, param_grid, cv=cv, scoring='accuracy', n_jobs=-1, verbose=0)
        grid.fit(data['X_train'], data['y_train'])

        best_model = grid.best_estimator_
        y_pred = best_model.predict(data['X_test'])

        acc = accuracy_score(data['y_test'], y_pred)
        f1 = f1_score(data['y_test'], y_pred, zero_division=0)

        print(f" Best C: {grid.best_params_['C']}")
        print(f" CV Score: {grid.best_score_:.4f}")
        print(f" Test Acc: {acc:.4f} | F1: {f1:.4f}")

    except Exception as e:
        print(f" ‚ùå {e}")

print("\n" + "="*80)
print("‚úÖ VERSƒ∞YON 2 TEST TAMAMLANDI")
print("="*80)

üì¶ VERSƒ∞YON 2: Target Same-Day
‚úÖ Kurulum tamamlandƒ±!

LEVEL 1: VERƒ∞ √áEKME (2011-01-01 to 2020-09-27)

üìä KSE100 (^KSE)... ‚úÖ 2346 g√ºn

üìä KOSPI (^KS11)... ‚úÖ 2397 g√ºn

üìä Nikkei225 (^N225)... ‚úÖ 2382 g√ºn

üìä SZSE (000001.SS)... ‚úÖ 2366 g√ºn

‚úÖ 4 borsa √ßekildi

LEVEL 2: 15 TEKNƒ∞K G√ñSTERGE (Table 1)

KSE100... ‚úÖ 2346 satƒ±r

KOSPI... ‚úÖ 2397 satƒ±r

Nikkei225... ‚úÖ 2382 satƒ±r

SZSE... ‚úÖ 2366 satƒ±r

‚úÖ G√∂stergeler hesaplandƒ±

LEVEL 3: VERƒ∞ HAZIRLAMA (TARGET SAME-DAY)

KSE100:
 Veri: 2326 satƒ±r | Up: 53.5%
 Train: 1860 | Test: 466

KOSPI:
 Veri: 2377 satƒ±r | Up: 52.3%
 Train: 1901 | Test: 476

Nikkei225:
 Veri: 2362 satƒ±r | Up: 53.1%
 Train: 1889 | Test: 473

SZSE:
 Veri: 2346 satƒ±r | Up: 52.9%
 Train: 1876 | Test: 470

‚úÖ 4 borsa hazƒ±r

LEVEL 4: SVM LINEAR KERNEL (Hƒ±zlƒ± Test)

KOSPI:
 ----------------------------------------------------------------------
 Best C: 900
 CV Score: 0.5187
 Test Acc: 0.5147 | F1: 0.5295

‚úÖ VERSƒ∞YON 2 TEST TAMA

In [None]:
"""
============================================================================
VERSƒ∞YON 3: PIVOT POINTS DOUBLE-LAG D√úZELTMESƒ∞
============================================================================
Hipotez: Pivot Points zaten shift(1) ile hesaplanƒ±yor,
         sonra bir de biz lag yapƒ±nca double-lag oluyor!
============================================================================
"""

import sys
import subprocess
print("üì¶ VERSƒ∞YON 3: Pivot Points Double-Lag Fix")
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q",
                      "yfinance", "ta", "scikit-learn", "pandas", "numpy"])

import yfinance as yf
import pandas as pd
import numpy as np
import ta
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import warnings
warnings.filterwarnings('ignore')

print("‚úÖ Kurulum tamamlandƒ±!\n")

# ============================================================================
# LEVEL 1: VERƒ∞ √áEKME
# ============================================================================
print("="*80)
print("LEVEL 1: VERƒ∞ √áEKME (2011-01-01 to 2020-09-27)")
print("="*80)

tickers = {
    'KSE100': '^KSE',
    'KOSPI': '^KS11',
    'Nikkei225': '^N225',
    'SZSE': '000001.SS'
}

all_data = {}
for name, ticker in tickers.items():
    print(f"\nüìä {name} ({ticker})...", end=" ")
    try:
        data = yf.download(ticker, start="2011-01-01", end="2020-09-27",
                          progress=False, auto_adjust=True)

        if data.empty:
            print("‚ùå VERƒ∞ YOK!")
            continue

        if isinstance(data.columns, pd.MultiIndex):
            data.columns = data.columns.get_level_values(0)

        data = data[['Open', 'High', 'Low', 'Close', 'Volume']].copy()
        data = data.dropna()

        all_data[name] = data
        print(f"‚úÖ {len(data)} g√ºn")
    except Exception as e:
        print(f"‚ùå {e}")

print(f"\n{'='*80}")
print(f"‚úÖ {len(all_data)} borsa √ßekildi")
print(f"{'='*80}\n")

# ============================================================================
# LEVEL 2: TEKNƒ∞K G√ñSTERGELER - PIVOT POINTS D√úZELTƒ∞LDƒ∞
# ============================================================================
print("="*80)
print("LEVEL 2: 15 TEKNƒ∞K G√ñSTERGE (Pivot Points D√ºzeltildi)")
print("="*80)

def hesapla_teknik_gostergeler(df):
    """Makalenin Table 1 form√ºllerine g√∂re - Pivot Points d√ºzeltildi"""
    df = df.copy()

    high = df['High'].squeeze()
    low = df['Low'].squeeze()
    close = df['Close'].squeeze()

    # 1-2. Stochastic Oscillator
    stoch = ta.momentum.StochasticOscillator(high, low, close,
                                             window=14, smooth_window=3)
    df['Stochastic_K'] = stoch.stoch()
    df['Stochastic_D'] = stoch.stoch_signal()

    # 3. ROC (10 period)
    df['ROC'] = ta.momentum.ROCIndicator(close, window=10).roc()

    # 4. Williams %R (14 period)
    df['Williams_R'] = ta.momentum.WilliamsRIndicator(high, low, close,
                                                       lbp=14).williams_r()

    # 5. Momentum (4 period: C_t - C_{t-4})
    df['Momentum'] = close.diff(4)

    # 6. Disparity 5
    ma5 = close.rolling(5).mean()
    df['Disparity_5'] = np.where(ma5 != 0, (close / ma5) * 100, 100)

    # 7. Disparity 14 (makalede 15 yazƒ±yor ama 14 kullanƒ±lmƒ±≈ü)
    ma14 = close.rolling(14).mean()
    df['Disparity_14'] = np.where(ma14 != 0, (close / ma14) * 100, 100)

    # 8. OSCP (Price Oscillator)
    ma10 = close.rolling(10).mean()
    df['OSCP'] = np.where(ma5 != 0, ((ma5 - ma10) / ma5), 0)

    # 9. CCI (20 period)
    df['CCI'] = ta.trend.CCIIndicator(high, low, close, window=20).cci()

    # 10. RSI (14 period)
    df['RSI'] = ta.momentum.RSIIndicator(close, window=14).rsi()

    # 11-15. Pivot Points - ≈ûƒ∞MDƒ∞ SHIFT KULLANMIYORUZ
    # √á√ºnk√º sonra zaten lag yapacaƒüƒ±z!
    df['Pivot_Point'] = (high + low + close) / 3
    df['S1'] = (df['Pivot_Point'] * 2) - high
    df['S2'] = df['Pivot_Point'] - (high - low)
    df['R1'] = (df['Pivot_Point'] * 2) - low
    df['R2'] = df['Pivot_Point'] + (high - low)

    # Inf/NaN temizleme
    df = df.replace([np.inf, -np.inf], np.nan)

    return df

all_data_indicators = {}
for name, data in all_data.items():
    print(f"\n{name}...", end=" ")
    try:
        result = hesapla_teknik_gostergeler(data)
        all_data_indicators[name] = result
        print(f"‚úÖ {len(result)} satƒ±r")
    except Exception as e:
        print(f"‚ùå {e}")

print(f"\n{'='*80}")
print(f"‚úÖ G√∂stergeler hesaplandƒ±")
print(f"{'='*80}\n")

# ============================================================================
# LEVEL 3: VERƒ∞ HAZIRLAMA (Standart)
# ============================================================================
print("="*80)
print("LEVEL 3: VERƒ∞ HAZIRLAMA")
print("="*80)

def veri_hazirla(df, test_ratio=0.2):
    """
    üéØ VERSƒ∞YON 3: Pivot Points double-lag d√ºzeltildi

    Feature: T-1 g√ºn√º indicator'larƒ±
    Target: T g√ºn√º ‚Üí T+1 g√ºn√º y√ºkseli≈ü mi?
    """
    df = df.copy()

    features = ['Stochastic_K', 'Stochastic_D', 'ROC', 'Williams_R',
                'Momentum', 'Disparity_5', 'Disparity_14', 'OSCP',
                'CCI', 'RSI', 'Pivot_Point', 'S1', 'S2', 'R1', 'R2']

    # 1Ô∏è‚É£ Feature'larƒ± lag'le (T-1)
    lagged_features = []
    for feat in features:
        lagged_col = f'{feat}_lag1'
        df[lagged_col] = df[feat].shift(1)
        lagged_features.append(lagged_col)

    # 2Ô∏è‚É£ Target: T g√ºn√º ‚Üí T+1 g√ºn√º y√ºkseli≈ü mi?
    df['Next_Close'] = df['Close'].shift(-1)
    df['Target'] = (df['Next_Close'] > df['Close']).astype(int)
    df = df.iloc[:-1].copy()

    # 3Ô∏è‚É£ NaN temizle
    df = df.replace([np.inf, -np.inf], np.nan)
    df = df.dropna(subset=lagged_features + ['Target'])

    X = df[lagged_features].copy()
    y = df['Target'].copy()

    print(f" Veri: {len(X)} satƒ±r | Up: {y.mean()*100:.1f}%")

    # 4Ô∏è‚É£ Zaman bazlƒ± split
    n_train = int(len(X) * (1 - test_ratio))
    X_train = X.iloc[:n_train].copy()
    X_test = X.iloc[n_train:].copy()
    y_train = y.iloc[:n_train].copy()
    y_test = y.iloc[n_train:].copy()

    # 5Ô∏è‚É£ Normalization
    scaler = MinMaxScaler()
    X_train_scaled = pd.DataFrame(
        scaler.fit_transform(X_train),
        columns=lagged_features,
        index=X_train.index
    )
    X_test_scaled = pd.DataFrame(
        scaler.transform(X_test),
        columns=lagged_features,
        index=X_test.index
    )

    print(f" Train: {len(X_train_scaled)} | Test: {len(X_test_scaled)}")

    return X_train_scaled, X_test_scaled, y_train.values, y_test.values

prepared_data = {}
for name, data in all_data_indicators.items():
    print(f"\n{name}:")
    try:
        X_train, X_test, y_train, y_test = veri_hazirla(data)
        prepared_data[name] = {
            'X_train': X_train, 'X_test': X_test,
            'y_train': y_train, 'y_test': y_test
        }
    except Exception as e:
        print(f" ‚ùå {e}")

print(f"\n{'='*80}")
print(f"‚úÖ {len(prepared_data)} borsa hazƒ±r")
print(f"{'='*80}\n")

# ============================================================================
# LEVEL 4: SVM SADECE LINEAR (HIZLI TEST)
# ============================================================================
print("="*80)
print("LEVEL 4: SVM LINEAR KERNEL (Hƒ±zlƒ± Test)")
print("="*80)

for name in ['KOSPI']:  # Sadece KOSPI test
    print(f"\n{name}:")
    print(f" {'-'*70}")

    data = prepared_data[name]

    try:
        param_grid = {
            'C': [0.001, 0.01, 0.1, 1, 4, 10, 50, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]
        }

        cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
        svm = SVC(kernel='linear', max_iter=50000, random_state=42)
        grid = GridSearchCV(svm, param_grid, cv=cv, scoring='accuracy', n_jobs=-1, verbose=0)
        grid.fit(data['X_train'], data['y_train'])

        best_model = grid.best_estimator_
        y_pred = best_model.predict(data['X_test'])

        acc = accuracy_score(data['y_test'], y_pred)
        f1 = f1_score(data['y_test'], y_pred, zero_division=0)

        print(f" Best C: {grid.best_params_['C']}")
        print(f" CV Score: {grid.best_score_:.4f}")
        print(f" Test Acc: {acc:.4f} | F1: {f1:.4f}")

    except Exception as e:
        print(f" ‚ùå {e}")

print("\n" + "="*80)
print("‚úÖ VERSƒ∞YON 3 TEST TAMAMLANDI")
print("="*80)

üì¶ VERSƒ∞YON 3: Pivot Points Double-Lag Fix
‚úÖ Kurulum tamamlandƒ±!

LEVEL 1: VERƒ∞ √áEKME (2011-01-01 to 2020-09-27)

üìä KSE100 (^KSE)... ‚úÖ 2346 g√ºn

üìä KOSPI (^KS11)... ‚úÖ 2397 g√ºn

üìä Nikkei225 (^N225)... ‚úÖ 2382 g√ºn

üìä SZSE (000001.SS)... ‚úÖ 2366 g√ºn

‚úÖ 4 borsa √ßekildi

LEVEL 2: 15 TEKNƒ∞K G√ñSTERGE (Pivot Points D√ºzeltildi)

KSE100... ‚úÖ 2346 satƒ±r

KOSPI... ‚úÖ 2397 satƒ±r

Nikkei225... ‚úÖ 2382 satƒ±r

SZSE... ‚úÖ 2366 satƒ±r

‚úÖ G√∂stergeler hesaplandƒ±

LEVEL 3: VERƒ∞ HAZIRLAMA

KSE100:
 Veri: 2325 satƒ±r | Up: 53.5%
 Train: 1860 | Test: 465

KOSPI:
 Veri: 2376 satƒ±r | Up: 52.4%
 Train: 1900 | Test: 476

Nikkei225:
 Veri: 2361 satƒ±r | Up: 53.1%
 Train: 1888 | Test: 473

SZSE:
 Veri: 2345 satƒ±r | Up: 52.8%
 Train: 1876 | Test: 469

‚úÖ 4 borsa hazƒ±r

LEVEL 4: SVM LINEAR KERNEL (Hƒ±zlƒ± Test)

KOSPI:
 ----------------------------------------------------------------------
 Best C: 0.001
 CV Score: 0.5137
 Test Acc: 0.5630 | F1: 0.7204

‚úÖ VERSƒ∞YO

In [None]:
"""
============================================================================
VERSƒ∞YON 1 FULL: NORMALIZATION LAG √ñNCESƒ∞ - T√úM SVM KERNELS
============================================================================
Hipotez: Makale normalization'ƒ± LAG √∂ncesi yapƒ±yor (data leakage var)
         Bu y√ºzden y√ºksek accuracy elde ediyorlar!
============================================================================
"""

import sys
import subprocess
print("üì¶ VERSƒ∞YON 1 FULL: Normalization LAG √∂ncesi - T√úM SVM")
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q",
                      "yfinance", "ta", "scikit-learn", "pandas", "numpy"])

import yfinance as yf
import pandas as pd
import numpy as np
import ta
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import warnings
warnings.filterwarnings('ignore')

print("‚úÖ Kurulum tamamlandƒ±!\n")

# ============================================================================
# LEVEL 1: VERƒ∞ √áEKME
# ============================================================================
print("="*80)
print("LEVEL 1: VERƒ∞ √áEKME (2011-01-01 to 2020-09-27)")
print("="*80)

tickers = {
    'KSE100': '^KSE',
    'KOSPI': '^KS11',
    'Nikkei225': '^N225',
    'SZSE': '000001.SS'
}

all_data = {}
for name, ticker in tickers.items():
    print(f"\nüìä {name} ({ticker})...", end=" ")
    try:
        data = yf.download(ticker, start="2011-01-01", end="2020-09-27",
                          progress=False, auto_adjust=True)

        if data.empty:
            print("‚ùå VERƒ∞ YOK!")
            continue

        if isinstance(data.columns, pd.MultiIndex):
            data.columns = data.columns.get_level_values(0)

        data = data[['Open', 'High', 'Low', 'Close', 'Volume']].copy()
        data = data.dropna()

        all_data[name] = data
        print(f"‚úÖ {len(data)} g√ºn")
    except Exception as e:
        print(f"‚ùå {e}")

print(f"\n{'='*80}")
print(f"‚úÖ {len(all_data)} borsa √ßekildi")
print(f"{'='*80}\n")

# ============================================================================
# LEVEL 2: TEKNƒ∞K G√ñSTERGELER (Table 1)
# ============================================================================
print("="*80)
print("LEVEL 2: 15 TEKNƒ∞K G√ñSTERGE (Table 1)")
print("="*80)

def hesapla_teknik_gostergeler(df):
    """Makalenin Table 1 form√ºllerine g√∂re"""
    df = df.copy()

    high = df['High'].squeeze()
    low = df['Low'].squeeze()
    close = df['Close'].squeeze()

    # 1-2. Stochastic Oscillator
    stoch = ta.momentum.StochasticOscillator(high, low, close,
                                             window=14, smooth_window=3)
    df['Stochastic_K'] = stoch.stoch()
    df['Stochastic_D'] = stoch.stoch_signal()

    # 3. ROC (10 period)
    df['ROC'] = ta.momentum.ROCIndicator(close, window=10).roc()

    # 4. Williams %R (14 period)
    df['Williams_R'] = ta.momentum.WilliamsRIndicator(high, low, close,
                                                       lbp=14).williams_r()

    # 5. Momentum (4 period: C_t - C_{t-4})
    df['Momentum'] = close.diff(4)

    # 6. Disparity 5
    ma5 = close.rolling(5).mean()
    df['Disparity_5'] = np.where(ma5 != 0, (close / ma5) * 100, 100)

    # 7. Disparity 14 (makalede 15 yazƒ±yor ama 14 kullanƒ±lmƒ±≈ü)
    ma14 = close.rolling(14).mean()
    df['Disparity_14'] = np.where(ma14 != 0, (close / ma14) * 100, 100)

    # 8. OSCP (Price Oscillator)
    ma10 = close.rolling(10).mean()
    df['OSCP'] = np.where(ma5 != 0, ((ma5 - ma10) / ma5), 0)

    # 9. CCI (20 period)
    df['CCI'] = ta.trend.CCIIndicator(high, low, close, window=20).cci()

    # 10. RSI (14 period)
    df['RSI'] = ta.momentum.RSIIndicator(close, window=14).rsi()

    # 11-15. Pivot Points (Table 1 formulas)
    prev_high = high.shift(1)
    prev_low = low.shift(1)
    prev_close = close.shift(1)

    df['Pivot_Point'] = (prev_high + prev_low + prev_close) / 3
    df['S1'] = (df['Pivot_Point'] * 2) - prev_high
    df['S2'] = df['Pivot_Point'] - (prev_high - prev_low)
    df['R1'] = (df['Pivot_Point'] * 2) - prev_low
    df['R2'] = df['Pivot_Point'] + (prev_high - prev_low)

    # Inf/NaN temizleme
    df = df.replace([np.inf, -np.inf], np.nan)

    return df

all_data_indicators = {}
for name, data in all_data.items():
    print(f"\n{name}...", end=" ")
    try:
        result = hesapla_teknik_gostergeler(data)
        all_data_indicators[name] = result
        print(f"‚úÖ {len(result)} satƒ±r")
    except Exception as e:
        print(f"‚ùå {e}")

print(f"\n{'='*80}")
print(f"‚úÖ G√∂stergeler hesaplandƒ±")
print(f"{'='*80}\n")

# ============================================================================
# LEVEL 3: VERƒ∞ HAZIRLAMA - NORMALIZATION LAG √ñNCESƒ∞
# ============================================================================
print("="*80)
print("LEVEL 3: VERƒ∞ HAZIRLAMA (NORMALIZATION LAG √ñNCESƒ∞)")
print("="*80)

def veri_hazirla(df, test_ratio=0.2):
    """
    üéØ VERSƒ∞YON 1: Normalization LAG √∂ncesi (DATA LEAKAGE VAR!)

    1. Indicator'larƒ± hesapla
    2. NORMALIZE ET (t√ºm veri seti - data leakage!)
    3. Lag uygula
    4. Train/Test split
    """
    df = df.copy()

    features = ['Stochastic_K', 'Stochastic_D', 'ROC', 'Williams_R',
                'Momentum', 'Disparity_5', 'Disparity_14', 'OSCP',
                'CCI', 'RSI', 'Pivot_Point', 'S1', 'S2', 'R1', 'R2']

    # 1Ô∏è‚É£ Target olu≈ütur (forward-looking)
    df['Next_Close'] = df['Close'].shift(-1)
    df['Target'] = (df['Next_Close'] > df['Close']).astype(int)
    df = df.iloc[:-1].copy()

    # 2Ô∏è‚É£ NaN temizle
    df = df.replace([np.inf, -np.inf], np.nan)
    df = df.dropna(subset=features + ['Target'])

    # 3Ô∏è‚É£ ‚ö†Ô∏è NORMALIZATION √ñNCE (t√ºm veri seti - data leakage!)
    scaler = MinMaxScaler()
    df[features] = scaler.fit_transform(df[features])

    # 4Ô∏è‚É£ ≈ûƒ∞MDƒ∞ LAG UYGULA
    lagged_features = []
    for feat in features:
        lagged_col = f'{feat}_lag1'
        df[lagged_col] = df[feat].shift(1)
        lagged_features.append(lagged_col)

    # 5Ô∏è‚É£ Lag y√ºz√ºnden ilk satƒ±rƒ± drop
    df = df.dropna(subset=lagged_features)

    X = df[lagged_features].copy()
    y = df['Target'].copy()

    print(f" Veri: {len(X)} satƒ±r | Up: {y.mean()*100:.1f}%")

    # 6Ô∏è‚É£ Zaman bazlƒ± split
    n_train = int(len(X) * (1 - test_ratio))
    X_train = X.iloc[:n_train].copy()
    X_test = X.iloc[n_train:].copy()
    y_train = y.iloc[:n_train].values
    y_test = y.iloc[n_train:].values

    print(f" Train: {len(X_train)} | Test: {len(X_test)}")

    return X_train, X_test, y_train, y_test

prepared_data = {}
for name, data in all_data_indicators.items():
    print(f"\n{name}:")
    try:
        X_train, X_test, y_train, y_test = veri_hazirla(data)
        prepared_data[name] = {
            'X_train': X_train, 'X_test': X_test,
            'y_train': y_train, 'y_test': y_test
        }
    except Exception as e:
        print(f" ‚ùå {e}")

print(f"\n{'='*80}")
print(f"‚úÖ {len(prepared_data)} borsa hazƒ±r")
print(f"{'='*80}\n")

# ============================================================================
# LEVEL 4: SVM T√úM KERNELS (Linear, RBF, Poly)
# ============================================================================
print("="*80)
print("LEVEL 4: SVM T√úM KERNELS (10-Fold CV + Grid Search)")
print("="*80)

def grid_search_svm(X_train, y_train, kernel='linear', n_folds=10):
    """Makalenin Figures 3-6'daki exact aralƒ±klar"""

    if kernel == 'linear':
        param_grid = {
            'C': [0.001, 0.01, 0.1, 1, 4, 10, 50, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]
        }


    cv = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)
    svm = SVC(kernel=kernel, max_iter=50000, random_state=42)
    grid = GridSearchCV(svm, param_grid, cv=cv, scoring='accuracy',
                       n_jobs=-1, verbose=0)
    grid.fit(X_train, y_train)

    return grid.best_estimator_, grid.best_params_, grid.best_score_

svm_results = {}

for name in prepared_data.keys():
    print(f"\n{'='*80}")
    print(f"{name}")
    print(f"{'='*80}")

    data = prepared_data[name]
    svm_results[name] = {}

    for kernel in ['linear', 'rbf', 'poly']:
        print(f"\n üîç {kernel.upper()} Kernel:")
        print(f" {'-'*70}")

        try:
            best_model, best_params, cv_score = grid_search_svm(
                data['X_train'], data['y_train'], kernel=kernel
            )

            y_pred = best_model.predict(data['X_test'])

            acc = accuracy_score(data['y_test'], y_pred)
            prec = precision_score(data['y_test'], y_pred, zero_division=0)
            rec = recall_score(data['y_test'], y_pred, zero_division=0)
            f1 = f1_score(data['y_test'], y_pred, zero_division=0)

            svm_results[name][kernel] = {
                'model': best_model,
                'params': best_params,
                'cv_score': cv_score,
                'acc': acc,
                'precision': prec,
                'recall': rec,
                'f1': f1
            }

            print(f" Best Params: {best_params}")
            print(f" CV Score: {cv_score:.4f}")
            print(f" Test Acc: {acc:.4f} | F1: {f1:.4f}")

        except Exception as e:
            print(f" ‚ùå {e}")
            svm_results[name][kernel] = None

print(f"\n{'='*80}")
print("‚úÖ SVM tamamlandƒ±")
print(f"{'='*80}\n")

# ============================================================================
# SONU√áLAR (Table 11 Format - Makale Kar≈üƒ±la≈ütƒ±rmasƒ±)
# ============================================================================
print("\n" + "="*80)
print(" "*20 + "üìä FINAL SONU√áLAR - SVM")
print(" "*15 + "(Table 11 Makale Kar≈üƒ±la≈ütƒ±rmasƒ±)")
print("="*80)

# Her borsa i√ßin sonu√ßlar
for name in prepared_data.keys():
    print(f"\n{'='*80}")
    print(f" {name}")
    print(f"{'='*80}")

    print(f"\n {'Kernel':<15} {'Accuracy':<12} {'F-Score':<12} {'Best Params'}")
    print(f" {'-'*70}")

    for kernel in ['linear', 'rbf', 'poly']:
        if svm_results[name][kernel] is not None:
            r = svm_results[name][kernel]
            params_str = str(r['params'])[:35] + "..." if len(str(r['params'])) > 35 else str(r['params'])
            print(f" {kernel.upper():<15} {r['acc']:<12.4f} {r['f1']:<12.4f} {params_str}")

# Ortalama sonu√ßlar
print(f"\n{'='*80}")
print(" üìà ORTALAMA PERFORMANS (4 Borsa)")
print(f"{'='*80}\n")

if len(prepared_data) > 0:
    avg_svm_linear_acc = np.mean([svm_results[n]['linear']['acc'] for n in prepared_data.keys() if svm_results[n]['linear']])
    avg_svm_rbf_acc = np.mean([svm_results[n]['rbf']['acc'] for n in prepared_data.keys() if svm_results[n]['rbf']])
    avg_svm_poly_acc = np.mean([svm_results[n]['poly']['acc'] for n in prepared_data.keys() if svm_results[n]['poly']])

    avg_svm_linear_f1 = np.mean([svm_results[n]['linear']['f1'] for n in prepared_data.keys() if svm_results[n]['linear']])
    avg_svm_rbf_f1 = np.mean([svm_results[n]['rbf']['f1'] for n in prepared_data.keys() if svm_results[n]['rbf']])
    avg_svm_poly_f1 = np.mean([svm_results[n]['poly']['f1'] for n in prepared_data.keys() if svm_results[n]['poly']])

    print(f" {'Kernel':<15} {'Avg Accuracy':<15} {'Avg F-Score':<15}")
    print(f" {'-'*50}")
    print(f" {'Linear':<15} {avg_svm_linear_acc:<15.4f} {avg_svm_linear_f1:<15.4f}")
    print(f" {'RBF':<15} {avg_svm_rbf_acc:<15.4f} {avg_svm_rbf_f1:<15.4f}")
    print(f" {'Poly':<15} {avg_svm_poly_acc:<15.4f} {avg_svm_poly_f1:<15.4f}")

# Makale sonu√ßlarƒ± (Table 11)
print(f"\n{'='*80}")
print(" üéØ MAKALE SONU√áLARI (Table 11 - SVM)")
print(f"{'='*80}\n")
print(" Index       Linear   RBF      Poly")
print(" " + "-"*45)
print(" KSE-100     0.8519   0.7688   0.8438")
print(" KOSPI       0.8022   0.7626   0.7828")
print(" Nikkei 225  0.8022   0.7626   0.7828")
print(" SZSE        0.8998   0.8720   0.8941")

print(f"\n{'='*80}")
print("‚úÖ VERSƒ∞YON 1 FULL SVM ANALƒ∞Zƒ∞ TAMAMLANDI")
print(f"{'='*80}\n")

print("üí° √ñNEMLƒ∞ NOT:")
print(" ‚ö†Ô∏è Bu versiyonda DATA LEAKAGE var!")
print(" ‚ö†Ô∏è Normalization t√ºm veri setinde yapƒ±ldƒ± (train+test)")
print(" ‚ö†Ô∏è Eƒüer sonu√ßlar makaleye yakƒ±nsa, makale de aynƒ± hatayƒ± yapƒ±yor olabilir!")

üì¶ VERSƒ∞YON 1 FULL: Normalization LAG √∂ncesi - T√úM SVM
‚úÖ Kurulum tamamlandƒ±!

LEVEL 1: VERƒ∞ √áEKME (2011-01-01 to 2020-09-27)

üìä KSE100 (^KSE)... ‚úÖ 2346 g√ºn

üìä KOSPI (^KS11)... ‚úÖ 2397 g√ºn

üìä Nikkei225 (^N225)... ‚úÖ 2382 g√ºn

üìä SZSE (000001.SS)... ‚úÖ 2366 g√ºn

‚úÖ 4 borsa √ßekildi

LEVEL 2: 15 TEKNƒ∞K G√ñSTERGE (Table 1)

KSE100... ‚úÖ 2346 satƒ±r

KOSPI... ‚úÖ 2397 satƒ±r

Nikkei225... ‚úÖ 2382 satƒ±r

SZSE... ‚úÖ 2366 satƒ±r

‚úÖ G√∂stergeler hesaplandƒ±

LEVEL 3: VERƒ∞ HAZIRLAMA (NORMALIZATION LAG √ñNCESƒ∞)

KSE100:
 Veri: 2325 satƒ±r | Up: 53.5%
 Train: 1860 | Test: 465

KOSPI:
 Veri: 2376 satƒ±r | Up: 52.4%
 Train: 1900 | Test: 476

Nikkei225:
 Veri: 2361 satƒ±r | Up: 53.1%
 Train: 1888 | Test: 473

SZSE:
 Veri: 2345 satƒ±r | Up: 52.8%
 Train: 1876 | Test: 469

‚úÖ 4 borsa hazƒ±r

LEVEL 4: SVM T√úM KERNELS (10-Fold CV + Grid Search)

KSE100

 üîç LINEAR Kernel:
 ----------------------------------------------------------------------
 Best Params: {'C'

In [None]:
"""
============================================================================
ADVANCED GRID SEARCH: Two-Stage + Bayesian + Randomized
============================================================================
Hipotez: Best parametre k√ºs√ºratlƒ± olabilir, akƒ±llƒ± search stratejileri kullanalƒ±m!
============================================================================
"""

import sys
import subprocess
print("üì¶ Advanced Grid Search Y√ºkleniyor...")
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q",
                      "yfinance", "ta", "scikit-learn", "pandas", "numpy",
                      "scikit-optimize"])  # Bayesian i√ßin

import yfinance as yf
import pandas as pd
import numpy as np
import ta
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, StratifiedKFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from skopt import BayesSearchCV
from skopt.space import Real, Integer
from scipy.stats import uniform, loguniform
import warnings
warnings.filterwarnings('ignore')

print("‚úÖ Kurulum tamamlandƒ±!\n")

# ============================================================================
# LEVEL 1: VERƒ∞ √áEKME
# ============================================================================
print("="*80)
print("LEVEL 1: VERƒ∞ √áEKME (2011-01-01 to 2020-09-27)")
print("="*80)

tickers = {
    'KSE100': '^KSE',
    'KOSPI': '^KS11',
    'Nikkei225': '^N225',
    'SZSE': '000001.SS'
}

all_data = {}
for name, ticker in tickers.items():
    print(f"\nüìä {name} ({ticker})...", end=" ")
    try:
        data = yf.download(ticker, start="2011-01-01", end="2020-09-27",
                          progress=False, auto_adjust=True)

        if data.empty:
            print("‚ùå VERƒ∞ YOK!")
            continue

        if isinstance(data.columns, pd.MultiIndex):
            data.columns = data.columns.get_level_values(0)

        data = data[['Open', 'High', 'Low', 'Close', 'Volume']].copy()
        data = data.dropna()

        all_data[name] = data
        print(f"‚úÖ {len(data)} g√ºn")
    except Exception as e:
        print(f"‚ùå {e}")

print(f"\n{'='*80}")
print(f"‚úÖ {len(all_data)} borsa √ßekildi")
print(f"{'='*80}\n")

# ============================================================================
# LEVEL 2: TEKNƒ∞K G√ñSTERGELER (Table 1)
# ============================================================================
print("="*80)
print("LEVEL 2: 15 TEKNƒ∞K G√ñSTERGE (Table 1)")
print("="*80)

def hesapla_teknik_gostergeler(df):
    """Makalenin Table 1 form√ºllerine g√∂re"""
    df = df.copy()

    high = df['High'].squeeze()
    low = df['Low'].squeeze()
    close = df['Close'].squeeze()

    # 1-2. Stochastic Oscillator
    stoch = ta.momentum.StochasticOscillator(high, low, close,
                                             window=14, smooth_window=3)
    df['Stochastic_K'] = stoch.stoch()
    df['Stochastic_D'] = stoch.stoch_signal()

    # 3. ROC (10 period)
    df['ROC'] = ta.momentum.ROCIndicator(close, window=10).roc()

    # 4. Williams %R (14 period)
    df['Williams_R'] = ta.momentum.WilliamsRIndicator(high, low, close,
                                                       lbp=14).williams_r()

    # 5. Momentum (4 period: C_t - C_{t-4})
    df['Momentum'] = close.diff(4)

    # 6. Disparity 5
    ma5 = close.rolling(5).mean()
    df['Disparity_5'] = np.where(ma5 != 0, (close / ma5) * 100, 100)

    # 7. Disparity 14
    ma14 = close.rolling(14).mean()
    df['Disparity_14'] = np.where(ma14 != 0, (close / ma14) * 100, 100)

    # 8. OSCP (Price Oscillator)
    ma10 = close.rolling(10).mean()
    df['OSCP'] = np.where(ma5 != 0, ((ma5 - ma10) / ma5), 0)

    # 9. CCI (20 period)
    df['CCI'] = ta.trend.CCIIndicator(high, low, close, window=20).cci()

    # 10. RSI (14 period)
    df['RSI'] = ta.momentum.RSIIndicator(close, window=14).rsi()

    # 11-15. Pivot Points
    prev_high = high.shift(1)
    prev_low = low.shift(1)
    prev_close = close.shift(1)

    df['Pivot_Point'] = (prev_high + prev_low + prev_close) / 3
    df['S1'] = (df['Pivot_Point'] * 2) - prev_high
    df['S2'] = df['Pivot_Point'] - (prev_high - prev_low)
    df['R1'] = (df['Pivot_Point'] * 2) - prev_low
    df['R2'] = df['Pivot_Point'] + (prev_high - prev_low)

    df = df.replace([np.inf, -np.inf], np.nan)

    return df

all_data_indicators = {}
for name, data in all_data.items():
    print(f"\n{name}...", end=" ")
    try:
        result = hesapla_teknik_gostergeler(data)
        all_data_indicators[name] = result
        print(f"‚úÖ {len(result)} satƒ±r")
    except Exception as e:
        print(f"‚ùå {e}")

print(f"\n{'='*80}")
print(f"‚úÖ G√∂stergeler hesaplandƒ±")
print(f"{'='*80}\n")

# ============================================================================
# LEVEL 3: VERƒ∞ HAZIRLAMA
# ============================================================================
print("="*80)
print("LEVEL 3: VERƒ∞ HAZIRLAMA (NORMALIZATION LAG √ñNCESƒ∞)")
print("="*80)

def veri_hazirla(df, test_ratio=0.2):
    """Normalization LAG √∂ncesi (Data leakage var)"""
    df = df.copy()

    features = ['Stochastic_K', 'Stochastic_D', 'ROC', 'Williams_R',
                'Momentum', 'Disparity_5', 'Disparity_14', 'OSCP',
                'CCI', 'RSI', 'Pivot_Point', 'S1', 'S2', 'R1', 'R2']

    # Target
    df['Next_Close'] = df['Close'].shift(-1)
    df['Target'] = (df['Next_Close'] > df['Close']).astype(int)
    df = df.iloc[:-1].copy()

    # NaN temizle
    df = df.replace([np.inf, -np.inf], np.nan)
    df = df.dropna(subset=features + ['Target'])

    # Normalization √ñNCE
    scaler = MinMaxScaler()
    df[features] = scaler.fit_transform(df[features])

    # Lag uygula
    lagged_features = []
    for feat in features:
        lagged_col = f'{feat}_lag1'
        df[lagged_col] = df[feat].shift(1)
        lagged_features.append(lagged_col)

    df = df.dropna(subset=lagged_features)

    X = df[lagged_features].copy()
    y = df['Target'].copy()

    print(f" Veri: {len(X)} satƒ±r | Up: {y.mean()*100:.1f}%")

    # Split
    n_train = int(len(X) * (1 - test_ratio))
    X_train = X.iloc[:n_train].copy()
    X_test = X.iloc[n_train:].copy()
    y_train = y.iloc[:n_train].values
    y_test = y.iloc[n_train:].values

    print(f" Train: {len(X_train)} | Test: {len(X_test)}")

    return X_train, X_test, y_train, y_test

prepared_data = {}
for name, data in all_data_indicators.items():
    print(f"\n{name}:")
    try:
        X_train, X_test, y_train, y_test = veri_hazirla(data)
        prepared_data[name] = {
            'X_train': X_train, 'X_test': X_test,
            'y_train': y_train, 'y_test': y_test
        }
    except Exception as e:
        print(f" ‚ùå {e}")

print(f"\n{'='*80}")
print(f"‚úÖ {len(prepared_data)} borsa hazƒ±r")
print(f"{'='*80}\n")

# ============================================================================
# LEVEL 4: ADVANCED PARAMETER SEARCH
# ============================================================================
print("="*80)
print("LEVEL 4: ADVANCED PARAMETER SEARCH")
print("="*80)

def advanced_search_linear_svm(X_train, y_train, X_test, y_test, index_name):
    """
    üéØ Multi-Strategy Search:
    1. Two-Stage Grid Search (Coarse ‚Üí Fine)
    2. Randomized Search (100 iterations)
    3. Bayesian Optimization (50 iterations)
    """

    cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
    results = {}

    print(f"\n {'='*70}")
    print(f" STRATEGY 1: TWO-STAGE GRID SEARCH")
    print(f" {'='*70}")

    # STAGE 1: Coarse Grid
    print("\n [Stage 1] Coarse Grid...")
    coarse_grid = {
        'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]
    }

    svm = SVC(kernel='linear', max_iter=50000, random_state=42)
    grid_coarse = GridSearchCV(svm, coarse_grid, cv=cv, scoring='accuracy', n_jobs=-1)
    grid_coarse.fit(X_train, y_train)

    best_C_coarse = grid_coarse.best_params_['C']
    print(f" ‚úì Best C (Coarse): {best_C_coarse}")
    print(f" ‚úì CV Score: {grid_coarse.best_score_:.4f}")

    # STAGE 2: Fine Grid around best
    print(f"\n [Stage 2] Fine-tuning around {best_C_coarse}...")

    # Akƒ±llƒ± fine grid olu≈ütur
    if best_C_coarse < 1:
        fine_range = np.linspace(best_C_coarse * 0.5, best_C_coarse * 2, 15)
    elif best_C_coarse < 100:
        fine_range = np.linspace(best_C_coarse - 50, best_C_coarse + 50, 20)
    else:
        fine_range = np.linspace(best_C_coarse - 200, best_C_coarse + 200, 30)

    fine_range = [max(0.0001, c) for c in fine_range]  # Pozitif tut

    fine_grid = {'C': fine_range}

    grid_fine = GridSearchCV(svm, fine_grid, cv=cv, scoring='accuracy', n_jobs=-1)
    grid_fine.fit(X_train, y_train)

    best_C_fine = grid_fine.best_params_['C']
    print(f" ‚úì Best C (Fine): {best_C_fine:.4f}")
    print(f" ‚úì CV Score: {grid_fine.best_score_:.4f}")

    # Test
    y_pred = grid_fine.best_estimator_.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, zero_division=0)

    results['two_stage'] = {
        'best_C': best_C_fine,
        'cv_score': grid_fine.best_score_,
        'test_acc': acc,
        'test_f1': f1,
        'model': grid_fine.best_estimator_
    }

    print(f" ‚úì Test Acc: {acc:.4f} | F1: {f1:.4f}")

    # ========================================================================

    print(f"\n {'='*70}")
    print(f" STRATEGY 2: RANDOMIZED SEARCH")
    print(f" {'='*70}")

    print("\n [Randomized] 100 iterations with log-uniform distribution...")

    param_dist = {
        'C': loguniform(1e-4, 1e3)  # Log-uniform between 0.0001 and 1000
    }

    random_search = RandomizedSearchCV(
        svm, param_dist, n_iter=100, cv=cv,
        scoring='accuracy', n_jobs=-1, random_state=42
    )
    random_search.fit(X_train, y_train)

    best_C_random = random_search.best_params_['C']
    print(f" ‚úì Best C (Random): {best_C_random:.4f}")
    print(f" ‚úì CV Score: {random_search.best_score_:.4f}")

    # Test
    y_pred = random_search.best_estimator_.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, zero_division=0)

    results['randomized'] = {
        'best_C': best_C_random,
        'cv_score': random_search.best_score_,
        'test_acc': acc,
        'test_f1': f1,
        'model': random_search.best_estimator_
    }

    print(f" ‚úì Test Acc: {acc:.4f} | F1: {f1:.4f}")

    # ========================================================================

    print(f"\n {'='*70}")
    print(f" STRATEGY 3: BAYESIAN OPTIMIZATION")
    print(f" {'='*70}")

    print("\n [Bayesian] 50 iterations with intelligent exploration...")

    search_spaces = {
        'C': Real(1e-4, 1e3, prior='log-uniform')  # Bayesian log-uniform
    }

    bayes_search = BayesSearchCV(
        svm, search_spaces, n_iter=50, cv=cv,
        scoring='accuracy', n_jobs=-1, random_state=42
    )
    bayes_search.fit(X_train, y_train)

    best_C_bayes = bayes_search.best_params_['C']
    print(f" ‚úì Best C (Bayes): {best_C_bayes:.4f}")
    print(f" ‚úì CV Score: {bayes_search.best_score_:.4f}")

    # Test
    y_pred = bayes_search.best_estimator_.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, zero_division=0)

    results['bayesian'] = {
        'best_C': best_C_bayes,
        'cv_score': bayes_search.best_score_,
        'test_acc': acc,
        'test_f1': f1,
        'model': bayes_search.best_estimator_
    }

    print(f" ‚úì Test Acc: {acc:.4f} | F1: {f1:.4f}")

    return results

# ============================================================================
# √áALI≈ûTIR
# ============================================================================

all_results = {}

for name in ['KOSPI']:  # √ñnce sadece KOSPI test
    print(f"\n{'='*80}")
    print(f" {name}")
    print(f"{'='*80}")

    data = prepared_data[name]

    try:
        results = advanced_search_linear_svm(
            data['X_train'],
            data['y_train'],
            data['X_test'],
            data['y_test'],
            name
        )
        all_results[name] = results

    except Exception as e:
        print(f"\n ‚ùå Error: {e}")
        import traceback
        traceback.print_exc()

# ============================================================================
# FINAL COMPARISON
# ============================================================================

print("\n" + "="*80)
print(" "*20 + "üìä STRATEGY COMPARISON")
print("="*80)

for name, results in all_results.items():
    print(f"\n {name}")
    print(" " + "-"*75)
    print(f" {'Strategy':<20} {'Best C':<15} {'CV Score':<12} {'Test Acc':<12} {'F1':<12}")
    print(" " + "-"*75)

    for strategy in ['two_stage', 'randomized', 'bayesian']:
        r = results[strategy]
        print(f" {strategy.upper():<20} {r['best_C']:<15.4f} {r['cv_score']:<12.4f} {r['test_acc']:<12.4f} {r['test_f1']:<12.4f}")

    # Find best
    best_strategy = max(results.items(), key=lambda x: x[1]['test_acc'])
    print(f"\n ‚≠ê WINNER: {best_strategy[0].upper()} (Acc: {best_strategy[1]['test_acc']:.4f})")

print("\n" + "="*80)
print("‚úÖ ADVANCED SEARCH TAMAMLANDI")
print("="*80)

üì¶ Advanced Grid Search Y√ºkleniyor...
‚úÖ Kurulum tamamlandƒ±!

LEVEL 1: VERƒ∞ √áEKME (2011-01-01 to 2020-09-27)

üìä KSE100 (^KSE)... ‚úÖ 2346 g√ºn

üìä KOSPI (^KS11)... ‚úÖ 2397 g√ºn

üìä Nikkei225 (^N225)... ‚úÖ 2382 g√ºn

üìä SZSE (000001.SS)... ‚úÖ 2366 g√ºn

‚úÖ 4 borsa √ßekildi

LEVEL 2: 15 TEKNƒ∞K G√ñSTERGE (Table 1)

KSE100... ‚úÖ 2346 satƒ±r

KOSPI... ‚úÖ 2397 satƒ±r

Nikkei225... ‚úÖ 2382 satƒ±r

SZSE... ‚úÖ 2366 satƒ±r

‚úÖ G√∂stergeler hesaplandƒ±

LEVEL 3: VERƒ∞ HAZIRLAMA (NORMALIZATION LAG √ñNCESƒ∞)

KSE100:
 Veri: 2325 satƒ±r | Up: 53.5%
 Train: 1860 | Test: 465

KOSPI:
 Veri: 2376 satƒ±r | Up: 52.4%
 Train: 1900 | Test: 476

Nikkei225:
 Veri: 2361 satƒ±r | Up: 53.1%
 Train: 1888 | Test: 473

SZSE:
 Veri: 2345 satƒ±r | Up: 52.8%
 Train: 1876 | Test: 469

‚úÖ 4 borsa hazƒ±r

LEVEL 4: ADVANCED PARAMETER SEARCH

 KOSPI

 STRATEGY 1: TWO-STAGE GRID SEARCH

 [Stage 1] Coarse Grid...
 ‚úì Best C (Coarse): 0.001
 ‚úì CV Score: 0.5137

 [Stage 2] Fine-tuning around 0.001

In [None]:
"""
============================================================================
VERSƒ∞YON 4: RETURNS-BASED STATIONARY APPROACH
============================================================================
Hipotez: Financial time series non-stationary!
         Price-based deƒüil RETURNS-BASED indicator'lar kullanmalƒ±yƒ±z!

Deƒüi≈üiklikler:
- Close price ‚Üí Log Returns
- Technical indicators returns bazlƒ±
- Volatility normalization
- Stationary features
============================================================================
"""

import sys
import subprocess
print("üì¶ VERSƒ∞YON 4: Returns-Based Stationary Approach")
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q",
                      "yfinance", "ta", "scikit-learn", "pandas", "numpy"])

import yfinance as yf
import pandas as pd
import numpy as np
import ta
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import warnings
warnings.filterwarnings('ignore')

print("‚úÖ Kurulum tamamlandƒ±!\n")

# ============================================================================
# LEVEL 1: VERƒ∞ √áEKME
# ============================================================================
print("="*80)
print("LEVEL 1: VERƒ∞ √áEKME (2011-01-01 to 2020-09-27)")
print("="*80)

tickers = {
    'KSE100': '^KSE',
    'KOSPI': '^KS11',
    'Nikkei225': '^N225',
    'SZSE': '000001.SS'
}

all_data = {}
for name, ticker in tickers.items():
    print(f"\nüìä {name} ({ticker})...", end=" ")
    try:
        data = yf.download(ticker, start="2011-01-01", end="2020-09-27",
                          progress=False, auto_adjust=True)

        if data.empty:
            print("‚ùå VERƒ∞ YOK!")
            continue

        if isinstance(data.columns, pd.MultiIndex):
            data.columns = data.columns.get_level_values(0)

        data = data[['Open', 'High', 'Low', 'Close', 'Volume']].copy()
        data = data.dropna()

        all_data[name] = data
        print(f"‚úÖ {len(data)} g√ºn")
    except Exception as e:
        print(f"‚ùå {e}")

print(f"\n{'='*80}")
print(f"‚úÖ {len(all_data)} borsa √ßekildi")
print(f"{'='*80}\n")

# ============================================================================
# LEVEL 2: RETURNS-BASED STATIONARY INDICATORS
# ============================================================================
print("="*80)
print("LEVEL 2: RETURNS-BASED STATIONARY INDICATORS")
print("="*80)

def hesapla_stationary_indicators(df):
    """
    üéØ Returns-based, stationary technical indicators

    Financial time series preprocessing:
    1. Log returns (stationary)
    2. Volatility adjustment
    3. Returns-based technical indicators
    """
    df = df.copy()

    # ========================================================================
    # BASE: RETURNS & VOLATILITY
    # ========================================================================

    # 1. Log Returns (stationary!)
    df['Returns'] = np.log(df['Close'] / df['Close'].shift(1))

    # 2. Percentage Returns (alternative)
    df['Pct_Returns'] = df['Close'].pct_change()

    # 3. Volatility (20-day rolling std of returns)
    df['Volatility'] = df['Returns'].rolling(20).std()

    # 4. Volatility-adjusted returns (Sharpe-like signal)
    df['Vol_Adj_Returns'] = df['Returns'] / (df['Volatility'] + 1e-8)

    # ========================================================================
    # MOMENTUM INDICATORS (Returns-based)
    # ========================================================================

    # 5. Returns Momentum (4-period diff)
    df['Returns_Momentum'] = df['Returns'].diff(4)

    # 6. Return Acceleration (2nd derivative)
    df['Return_Accel'] = df['Returns'].diff()

    # 7. Cumulative Returns (10-day)
    df['Cum_Returns_10'] = df['Returns'].rolling(10).sum()

    # ========================================================================
    # MOVING AVERAGE INDICATORS (Returns-based)
    # ========================================================================

    # 8-9. EMA on Returns
    ema_short = df['Returns'].ewm(span=5, adjust=False).mean()
    ema_long = df['Returns'].ewm(span=14, adjust=False).mean()
    df['MACD_Returns'] = ema_short - ema_long
    df['MACD_Signal'] = df['MACD_Returns'].ewm(span=3, adjust=False).mean()

    # 10. Simple MA difference (returns)
    ma5_ret = df['Returns'].rolling(5).mean()
    ma10_ret = df['Returns'].rolling(10).mean()
    df['MA_Diff_Returns'] = ma5_ret - ma10_ret

    # ========================================================================
    # RSI & OSCILLATORS (Returns-based)
    # ========================================================================

    # 11. RSI on Returns (not price!)
    # Returns zaten +/- olduƒüu i√ßin daha meaningful
    returns_series = df['Returns'].fillna(0) * 100  # Scale for RSI
    df['RSI_Returns'] = ta.momentum.RSIIndicator(
        returns_series, window=14
    ).rsi()

    # 12-13. Stochastic on Returns
    # Returns'√º min-max normalize et
    returns_norm = (df['Returns'] - df['Returns'].rolling(14).min()) / \
                   (df['Returns'].rolling(14).max() - df['Returns'].rolling(14).min() + 1e-8)
    df['Stoch_Returns'] = returns_norm * 100
    df['Stoch_Returns_MA'] = df['Stoch_Returns'].rolling(3).mean()

    # ========================================================================
    # VOLATILITY INDICATORS
    # ========================================================================

    # 14. Volatility ratio (current vs historical)
    vol_ma = df['Volatility'].rolling(20).mean()
    df['Vol_Ratio'] = df['Volatility'] / (vol_ma + 1e-8)

    # 15. High-Low range (normalized by close)
    df['HL_Range'] = (df['High'] - df['Low']) / (df['Close'] + 1e-8)

    # ========================================================================
    # PRICE-BASED (but normalized) - Keep some original indicators
    # ========================================================================

    # 16. Williams %R (original but useful)
    df['Williams_R'] = ta.momentum.WilliamsRIndicator(
        df['High'], df['Low'], df['Close'], lbp=14
    ).williams_r()

    # 17. CCI (price-based but captures momentum)
    df['CCI'] = ta.trend.CCIIndicator(
        df['High'], df['Low'], df['Close'], window=20
    ).cci()

    # Inf/NaN temizleme
    df = df.replace([np.inf, -np.inf], np.nan)

    return df

all_data_indicators = {}
for name, data in all_data.items():
    print(f"\n{name}...", end=" ")
    try:
        result = hesapla_stationary_indicators(data)
        all_data_indicators[name] = result
        print(f"‚úÖ {len(result)} satƒ±r")
    except Exception as e:
        print(f"‚ùå {e}")

print(f"\n{'='*80}")
print(f"‚úÖ Stationary indicators hesaplandƒ±")
print(f"{'='*80}\n")

# ============================================================================
# LEVEL 3: VERƒ∞ HAZIRLAMA (Returns-based features)
# ============================================================================
print("="*80)
print("LEVEL 3: VERƒ∞ HAZIRLAMA (Returns-based features)")
print("="*80)

def veri_hazirla_stationary(df, test_ratio=0.2):
    """
    üéØ Returns-based feature preparation

    Features: Stationary returns-based indicators
    Target: Next day return direction
    """
    df = df.copy()

    # Returns-based features
    features = [
        'Returns', 'Pct_Returns', 'Volatility', 'Vol_Adj_Returns',
        'Returns_Momentum', 'Return_Accel', 'Cum_Returns_10',
        'MACD_Returns', 'MACD_Signal', 'MA_Diff_Returns',
        'RSI_Returns', 'Stoch_Returns', 'Stoch_Returns_MA',
        'Vol_Ratio', 'HL_Range', 'Williams_R', 'CCI'
    ]

    # 1Ô∏è‚É£ Target: Next day return direction (positive = 1, negative = 0)
    df['Next_Return'] = df['Returns'].shift(-1)
    df['Target'] = (df['Next_Return'] > 0).astype(int)
    df = df.iloc[:-1].copy()  # Son satƒ±r NaN

    # 2Ô∏è‚É£ NaN temizle
    df = df.replace([np.inf, -np.inf], np.nan)
    df = df.dropna(subset=features + ['Target'])

    # 3Ô∏è‚É£ Normalization √ñNCE (data leakage ama makale ile kar≈üƒ±la≈ütƒ±rmak i√ßin)
    scaler = MinMaxScaler()
    df[features] = scaler.fit_transform(df[features])

    # 4Ô∏è‚É£ Lag uygula (T-1 features ile T predict et)
    lagged_features = []
    for feat in features:
        lagged_col = f'{feat}_lag1'
        df[lagged_col] = df[feat].shift(1)
        lagged_features.append(lagged_col)

    # 5Ô∏è‚É£ Lag sonrasƒ± NaN temizle
    df = df.dropna(subset=lagged_features)

    X = df[lagged_features].copy()
    y = df['Target'].copy()

    print(f" Veri: {len(X)} satƒ±r | Up: {y.mean()*100:.1f}%")

    # 6Ô∏è‚É£ Zaman bazlƒ± split
    n_train = int(len(X) * (1 - test_ratio))
    X_train = X.iloc[:n_train].copy()
    X_test = X.iloc[n_train:].copy()
    y_train = y.iloc[:n_train].values
    y_test = y.iloc[n_train:].values

    print(f" Train: {len(X_train)} | Test: {len(X_test)}")

    return X_train, X_test, y_train, y_test

prepared_data = {}
for name, data in all_data_indicators.items():
    print(f"\n{name}:")
    try:
        X_train, X_test, y_train, y_test = veri_hazirla_stationary(data)
        prepared_data[name] = {
            'X_train': X_train, 'X_test': X_test,
            'y_train': y_train, 'y_test': y_test
        }
    except Exception as e:
        print(f" ‚ùå {e}")

print(f"\n{'='*80}")
print(f"‚úÖ {len(prepared_data)} borsa hazƒ±r (Returns-based)")
print(f"{'='*80}\n")

# ============================================================================
# LEVEL 4: SVM LINEAR KERNEL (Quick Test)
# ============================================================================
print("="*80)
print("LEVEL 4: SVM LINEAR KERNEL (Returns-based Test)")
print("="*80)

def test_svm_linear(X_train, y_train, X_test, y_test, name):
    """Quick SVM test"""

    print(f"\n {name}")
    print(" " + "-"*70)

    # Grid search
    param_grid = {
        'C': [0.001, 0.01, 0.1, 1, 4, 10, 50, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]
    }

    cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
    svm = SVC(kernel='linear', max_iter=50000, random_state=42)

    grid = GridSearchCV(svm, param_grid, cv=cv, scoring='accuracy', n_jobs=-1, verbose=0)
    grid.fit(X_train, y_train)

    # Test
    best_model = grid.best_estimator_
    y_pred = best_model.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, zero_division=0)
    rec = recall_score(y_test, y_pred, zero_division=0)
    f1 = f1_score(y_test, y_pred, zero_division=0)

    print(f" Best C: {grid.best_params_['C']}")
    print(f" CV Score: {grid.best_score_:.4f} ({grid.best_score_*100:.2f}%)")
    print(f" Test Acc: {acc:.4f} ({acc*100:.2f}%)")
    print(f" Precision: {prec:.4f}")
    print(f" Recall: {rec:.4f}")
    print(f" F1-Score: {f1:.4f}")

    return {
        'best_C': grid.best_params_['C'],
        'cv_score': grid.best_score_,
        'test_acc': acc,
        'precision': prec,
        'recall': rec,
        'f1': f1
    }

results = {}

for name in prepared_data.keys():
    data = prepared_data[name]
    try:
        result = test_svm_linear(
            data['X_train'], data['y_train'],
            data['X_test'], data['y_test'],
            name
        )
        results[name] = result
    except Exception as e:
        print(f" ‚ùå {name}: {e}")

# ============================================================================
# SONU√áLAR
# ============================================================================
print("\n" + "="*80)
print(" "*20 + "üìä RETURNS-BASED SONU√áLAR")
print("="*80)

print(f"\n {'Index':<15} {'Best C':<10} {'CV Score':<12} {'Test Acc':<12} {'F1-Score':<12}")
print(" " + "-"*70)

for name, result in results.items():
    print(f" {name:<15} {result['best_C']:<10} {result['cv_score']:<12.4f} {result['test_acc']:<12.4f} {result['f1']:<12.4f}")

# Ortalama
if len(results) > 0:
    avg_cv = np.mean([r['cv_score'] for r in results.values()])
    avg_test = np.mean([r['test_acc'] for r in results.values()])
    avg_f1 = np.mean([r['f1'] for r in results.values()])

    print(" " + "-"*70)
    print(f" {'AVERAGE':<15} {'-':<10} {avg_cv:<12.4f} {avg_test:<12.4f} {avg_f1:<12.4f}")

# Makale kar≈üƒ±la≈ütƒ±rma
print(f"\n {'='*80}")
print(" üéØ MAKALE SONU√áLARI (Table 11)")
print(f" {'='*80}\n")
print(" Index       Linear SVM (Paper)")
print(" " + "-"*35)
print(" KSE-100     0.8519")
print(" KOSPI       0.8022")
print(" Nikkei 225  0.8022")
print(" SZSE        0.8998")

print("\n" + "="*80)
print("‚úÖ RETURNS-BASED ANALƒ∞Z TAMAMLANDI")
print("="*80)

print("\nüí° KRƒ∞Tƒ∞K FARK:")
print(" ‚úÖ Returns-based stationary features kullanƒ±ldƒ±")
print(" ‚úÖ Log returns ‚Üí Non-stationary problem √ß√∂z√ºld√º")
print(" ‚úÖ Volatility adjustment eklendi")
print(" ‚úÖ Financial time series best practices uygulandƒ±")
print("\nüî¨ Eƒüer sonu√ßlar hala d√º≈ü√ºkse:")
print(" ‚Üí Veri kalitesi problemi (Yahoo Finance vs Professional data)")
print(" ‚Üí Makale metodolojisinde gizli adƒ±mlar olabilir")
print(" ‚Üí Farklƒ± model (Random Forest, Neural Net) denenebilir")

üì¶ VERSƒ∞YON 4: Returns-Based Stationary Approach
‚úÖ Kurulum tamamlandƒ±!

LEVEL 1: VERƒ∞ √áEKME (2011-01-01 to 2020-09-27)

üìä KSE100 (^KSE)... ‚úÖ 2346 g√ºn

üìä KOSPI (^KS11)... ‚úÖ 2397 g√ºn

üìä Nikkei225 (^N225)... ‚úÖ 2382 g√ºn

üìä SZSE (000001.SS)... ‚úÖ 2366 g√ºn

‚úÖ 4 borsa √ßekildi

LEVEL 2: RETURNS-BASED STATIONARY INDICATORS

KSE100... ‚úÖ 2346 satƒ±r

KOSPI... ‚úÖ 2397 satƒ±r

Nikkei225... ‚úÖ 2382 satƒ±r

SZSE... ‚úÖ 2366 satƒ±r

‚úÖ Stationary indicators hesaplandƒ±

LEVEL 3: VERƒ∞ HAZIRLAMA (Returns-based features)

KSE100:
 Veri: 2305 satƒ±r | Up: 53.6%
 Train: 1844 | Test: 461

KOSPI:
 Veri: 2356 satƒ±r | Up: 52.5%
 Train: 1884 | Test: 472

Nikkei225:
 Veri: 2341 satƒ±r | Up: 53.0%
 Train: 1872 | Test: 469

SZSE:
 Veri: 2325 satƒ±r | Up: 52.7%
 Train: 1860 | Test: 465

‚úÖ 4 borsa hazƒ±r (Returns-based)

LEVEL 4: SVM LINEAR KERNEL (Returns-based Test)

 KSE100
 ----------------------------------------------------------------------
 Best C: 0.001
 CV Score: 

In [None]:
"""
============================================================================
VERSƒ∞YON 5: MULTIPLE LAGS APPROACH
============================================================================
Hipotez: Her indicator i√ßin SADECE 1 lag deƒüil, MULTIPLE LAGS kullanmalƒ±yƒ±z!
         SVM zaman serisini anlamƒ±yor, temporal pattern'i lag'lerle √∂ƒüretmeliyiz!

Medium'dan √∂ƒürendiƒüimiz:
"SVMs do not inherently understand time series data"
‚Üí Solution: Create multiple lag features (lag_1, lag_2, ..., lag_N)

Deƒüi≈üiklik:
- ESKƒ∞: 15 indicators √ó 1 lag = 15 features
- YENƒ∞: 15 indicators √ó 10 lags = 150 features!
============================================================================
"""

import sys
import subprocess
print("üì¶ VERSƒ∞YON 5: Multiple Lags (10 lags per indicator)")
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q",
                      "yfinance", "ta", "scikit-learn", "pandas", "numpy"])

import yfinance as yf
import pandas as pd
import numpy as np
import ta
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import warnings
warnings.filterwarnings('ignore')

print("‚úÖ Kurulum tamamlandƒ±!\n")

# ============================================================================
# LEVEL 1: VERƒ∞ √áEKME
# ============================================================================
print("="*80)
print("LEVEL 1: VERƒ∞ √áEKME (2011-01-01 to 2020-09-27)")
print("="*80)

tickers = {
    'KSE100': '^KSE',
    'KOSPI': '^KS11',
    'Nikkei225': '^N225',
    'SZSE': '000001.SS'
}

all_data = {}
for name, ticker in tickers.items():
    print(f"\nüìä {name} ({ticker})...", end=" ")
    try:
        data = yf.download(ticker, start="2011-01-01", end="2020-09-27",
                          progress=False, auto_adjust=True)

        if data.empty:
            print("‚ùå VERƒ∞ YOK!")
            continue

        if isinstance(data.columns, pd.MultiIndex):
            data.columns = data.columns.get_level_values(0)

        data = data[['Open', 'High', 'Low', 'Close', 'Volume']].copy()
        data = data.dropna()

        all_data[name] = data
        print(f"‚úÖ {len(data)} g√ºn")
    except Exception as e:
        print(f"‚ùå {e}")

print(f"\n{'='*80}")
print(f"‚úÖ {len(all_data)} borsa √ßekildi")
print(f"{'='*80}\n")

# ============================================================================
# LEVEL 2: TEKNƒ∞K G√ñSTERGELER (Table 1 - Original)
# ============================================================================
print("="*80)
print("LEVEL 2: 15 TEKNƒ∞K G√ñSTERGE (Table 1 - Original)")
print("="*80)

def hesapla_teknik_gostergeler(df):
    """Makalenin Table 1 form√ºllerine g√∂re (original price-based)"""
    df = df.copy()

    high = df['High'].squeeze()
    low = df['Low'].squeeze()
    close = df['Close'].squeeze()

    # 1-2. Stochastic Oscillator
    stoch = ta.momentum.StochasticOscillator(high, low, close,
                                             window=14, smooth_window=3)
    df['Stochastic_K'] = stoch.stoch()
    df['Stochastic_D'] = stoch.stoch_signal()

    # 3. ROC (10 period)
    df['ROC'] = ta.momentum.ROCIndicator(close, window=10).roc()

    # 4. Williams %R (14 period)
    df['Williams_R'] = ta.momentum.WilliamsRIndicator(high, low, close,
                                                       lbp=14).williams_r()

    # 5. Momentum (4 period: C_t - C_{t-4})
    df['Momentum'] = close.diff(4)

    # 6. Disparity 5
    ma5 = close.rolling(5).mean()
    df['Disparity_5'] = np.where(ma5 != 0, (close / ma5) * 100, 100)

    # 7. Disparity 14
    ma14 = close.rolling(14).mean()
    df['Disparity_14'] = np.where(ma14 != 0, (close / ma14) * 100, 100)

    # 8. OSCP (Price Oscillator)
    ma10 = close.rolling(10).mean()
    df['OSCP'] = np.where(ma5 != 0, ((ma5 - ma10) / ma5), 0)

    # 9. CCI (20 period)
    df['CCI'] = ta.trend.CCIIndicator(high, low, close, window=20).cci()

    # 10. RSI (14 period)
    df['RSI'] = ta.momentum.RSIIndicator(close, window=14).rsi()

    # 11-15. Pivot Points
    prev_high = high.shift(1)
    prev_low = low.shift(1)
    prev_close = close.shift(1)

    df['Pivot_Point'] = (prev_high + prev_low + prev_close) / 3
    df['S1'] = (df['Pivot_Point'] * 2) - prev_high
    df['S2'] = df['Pivot_Point'] - (prev_high - prev_low)
    df['R1'] = (df['Pivot_Point'] * 2) - prev_low
    df['R2'] = df['Pivot_Point'] + (prev_high - prev_low)

    df = df.replace([np.inf, -np.inf], np.nan)

    return df

all_data_indicators = {}
for name, data in all_data.items():
    print(f"\n{name}...", end=" ")
    try:
        result = hesapla_teknik_gostergeler(data)
        all_data_indicators[name] = result
        print(f"‚úÖ {len(result)} satƒ±r")
    except Exception as e:
        print(f"‚ùå {e}")

print(f"\n{'='*80}")
print(f"‚úÖ G√∂stergeler hesaplandƒ±")
print(f"{'='*80}\n")

# ============================================================================
# LEVEL 3: MULTIPLE LAGS FEATURE ENGINEERING
# ============================================================================
print("="*80)
print("LEVEL 3: MULTIPLE LAGS FEATURE ENGINEERING")
print("="*80)

def veri_hazirla_multiple_lags(df, test_ratio=0.2, n_lags=10):
    """
    üéØ MULTIPLE LAGS: Her indicator i√ßin N lag olu≈ütur

    √ñrnek:
    - RSI ‚Üí RSI_lag1, RSI_lag2, ..., RSI_lag10
    - CCI ‚Üí CCI_lag1, CCI_lag2, ..., CCI_lag10

    Total features: 15 indicators √ó 10 lags = 150 features!
    """
    df = df.copy()

    # Base indicators
    base_features = [
        'Stochastic_K', 'Stochastic_D', 'ROC', 'Williams_R',
        'Momentum', 'Disparity_5', 'Disparity_14', 'OSCP',
        'CCI', 'RSI', 'Pivot_Point', 'S1', 'S2', 'R1', 'R2'
    ]

    # 1Ô∏è‚É£ Target: Next day direction
    df['Next_Close'] = df['Close'].shift(-1)
    df['Target'] = (df['Next_Close'] > df['Close']).astype(int)
    df = df.iloc[:-1].copy()

    # 2Ô∏è‚É£ NaN temizle
    df = df.replace([np.inf, -np.inf], np.nan)
    df = df.dropna(subset=base_features + ['Target'])

    # 3Ô∏è‚É£ ‚ö†Ô∏è NORMALIZATION √ñNCE (data leakage var ama makale ile kar≈üƒ±la≈ütƒ±rmak i√ßin)
    scaler = MinMaxScaler()
    df[base_features] = scaler.fit_transform(df[base_features])

    # 4Ô∏è‚É£ üéØ MULTIPLE LAGS OLU≈ûTUR
    print(f" Creating {n_lags} lags for each of {len(base_features)} indicators...")

    lagged_features = []
    for indicator in base_features:
        for lag in range(1, n_lags + 1):
            lagged_col = f'{indicator}_lag{lag}'
            df[lagged_col] = df[indicator].shift(lag)
            lagged_features.append(lagged_col)

    print(f" Total lagged features created: {len(lagged_features)}")

    # 5Ô∏è‚É£ Lag sonrasƒ± NaN temizle (en b√ºy√ºk lag kadar satƒ±r kaybedilir)
    df = df.dropna(subset=lagged_features)

    X = df[lagged_features].copy()
    y = df['Target'].copy()

    print(f" Veri: {len(X)} satƒ±r | Features: {X.shape[1]} | Up: {y.mean()*100:.1f}%")

    # 6Ô∏è‚É£ Zaman bazlƒ± split
    n_train = int(len(X) * (1 - test_ratio))
    X_train = X.iloc[:n_train].copy()
    X_test = X.iloc[n_train:].copy()
    y_train = y.iloc[:n_train].values
    y_test = y.iloc[n_train:].values

    print(f" Train: {len(X_train)} | Test: {len(X_test)}")

    return X_train, X_test, y_train, y_test

prepared_data = {}
for name, data in all_data_indicators.items():
    print(f"\n{name}:")
    try:
        X_train, X_test, y_train, y_test = veri_hazirla_multiple_lags(data, n_lags=10)
        prepared_data[name] = {
            'X_train': X_train, 'X_test': X_test,
            'y_train': y_train, 'y_test': y_test
        }
    except Exception as e:
        print(f" ‚ùå {e}")

print(f"\n{'='*80}")
print(f"‚úÖ {len(prepared_data)} borsa hazƒ±r (Multiple lags)")
print(f"{'='*80}\n")

# ============================================================================
# LEVEL 4: SVM LINEAR KERNEL
# ============================================================================
print("="*80)
print("LEVEL 4: SVM LINEAR KERNEL (Multiple Lags Test)")
print("="*80)

def test_svm_linear(X_train, y_train, X_test, y_test, name):
    """SVM Linear kernel test"""

    print(f"\n {name}")
    print(" " + "-"*70)

    # Grid search
    param_grid = {
        'C': [0.001, 0.01, 0.1, 1, 4, 10, 50, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]
    }

    cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
    svm = SVC(kernel='linear', max_iter=50000, random_state=42)

    grid = GridSearchCV(svm, param_grid, cv=cv, scoring='accuracy', n_jobs=-1, verbose=0)
    grid.fit(X_train, y_train)

    # Test
    best_model = grid.best_estimator_
    y_pred = best_model.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, zero_division=0)
    rec = recall_score(y_test, y_pred, zero_division=0)
    f1 = f1_score(y_test, y_pred, zero_division=0)

    print(f" Best C: {grid.best_params_['C']}")
    print(f" CV Score: {grid.best_score_:.4f} ({grid.best_score_*100:.2f}%)")
    print(f" Test Acc: {acc:.4f} ({acc*100:.2f}%)")
    print(f" Precision: {prec:.4f}")
    print(f" Recall: {rec:.4f}")
    print(f" F1-Score: {f1:.4f}")

    return {
        'best_C': grid.best_params_['C'],
        'cv_score': grid.best_score_,
        'test_acc': acc,
        'precision': prec,
        'recall': rec,
        'f1': f1
    }

results = {}

for name in prepared_data.keys():
    data = prepared_data[name]
    try:
        result = test_svm_linear(
            data['X_train'], data['y_train'],
            data['X_test'], data['y_test'],
            name
        )
        results[name] = result
    except Exception as e:
        print(f" ‚ùå {name}: {e}")

# ============================================================================
# SONU√áLAR
# ============================================================================
print("\n" + "="*80)
print(" "*20 + "üìä MULTIPLE LAGS SONU√áLAR")
print("="*80)

print(f"\n {'Index':<15} {'Best C':<10} {'CV Score':<12} {'Test Acc':<12} {'F1-Score':<12}")
print(" " + "-"*70)

for name, result in results.items():
    print(f" {name:<15} {result['best_C']:<10} {result['cv_score']:<12.4f} {result['test_acc']:<12.4f} {result['f1']:<12.4f}")

# Ortalama
if len(results) > 0:
    avg_cv = np.mean([r['cv_score'] for r in results.values()])
    avg_test = np.mean([r['test_acc'] for r in results.values()])
    avg_f1 = np.mean([r['f1'] for r in results.values()])

    print(" " + "-"*70)
    print(f" {'AVERAGE':<15} {'-':<10} {avg_cv:<12.4f} {avg_test:<12.4f} {avg_f1:<12.4f}")

# √ñnceki versiyonlarla kar≈üƒ±la≈ütƒ±rma
print(f"\n {'='*80}")
print(" üìà VERSƒ∞YON KAR≈ûILA≈ûTIRMASI")
print(f" {'='*80}\n")

print(" Versiyon                        Avg Test Acc")
print(" " + "-"*50)
print(" V1: Norm LAG √∂ncesi (1 lag)    0.5630")
print(" V2: Target same-day             0.5147")
print(" V3: Pivot double-lag fix        0.5630")
print(" V4: Returns-based               0.5323")
if len(results) > 0:
    print(f" V5: Multiple lags (10)          {avg_test:.4f}")

# Makale kar≈üƒ±la≈ütƒ±rma
print(f"\n {'='*80}")
print(" üéØ MAKALE SONU√áLARI (Table 11)")
print(f" {'='*80}\n")
print(" Index       Linear SVM (Paper)")
print(" " + "-"*35)
print(" KSE-100     0.8519")
print(" KOSPI       0.8022")
print(" Nikkei 225  0.8022")
print(" SZSE        0.8998")

print("\n" + "="*80)
print("‚úÖ MULTIPLE LAGS ANALƒ∞Z TAMAMLANDI")
print("="*80)

print("\nüí° KRƒ∞Tƒ∞K FARK:")
print(" ‚úÖ Her indicator i√ßin 10 lag kullanƒ±ldƒ± (150 features)")
print(" ‚úÖ Temporal pattern'leri yakalamak i√ßin SVM'e 'ge√ßmi≈ü' verildi")
print(" ‚úÖ Medium makalesindeki 'lag features' yakla≈üƒ±mƒ± uygulandƒ±")
print("\nüî¨ Sonu√ßlarƒ±n yorumu:")
print(" ‚Üí Eƒüer accuracy ARTTI: Multiple lags i≈üe yarƒ±yor! ‚úÖ")
print(" ‚Üí Eƒüer accuracy AYNI: Problem ba≈üka yerde (veri kalitesi)")
print(" ‚Üí Eƒüer accuracy D√ú≈ûT√ú: Overfitting (150 feature √ßok fazla)")

üì¶ VERSƒ∞YON 5: Multiple Lags (10 lags per indicator)
‚úÖ Kurulum tamamlandƒ±!

LEVEL 1: VERƒ∞ √áEKME (2011-01-01 to 2020-09-27)

üìä KSE100 (^KSE)... ‚úÖ 2346 g√ºn

üìä KOSPI (^KS11)... ‚úÖ 2397 g√ºn

üìä Nikkei225 (^N225)... ‚úÖ 2382 g√ºn

üìä SZSE (000001.SS)... ‚úÖ 2366 g√ºn

‚úÖ 4 borsa √ßekildi

LEVEL 2: 15 TEKNƒ∞K G√ñSTERGE (Table 1 - Original)

KSE100... ‚úÖ 2346 satƒ±r

KOSPI... ‚úÖ 2397 satƒ±r

Nikkei225... ‚úÖ 2382 satƒ±r

SZSE... ‚úÖ 2366 satƒ±r

‚úÖ G√∂stergeler hesaplandƒ±

LEVEL 3: MULTIPLE LAGS FEATURE ENGINEERING

KSE100:
 Creating 10 lags for each of 15 indicators...
 Total lagged features created: 150
 Veri: 2316 satƒ±r | Features: 150 | Up: 53.6%
 Train: 1852 | Test: 464

KOSPI:
 Creating 10 lags for each of 15 indicators...
 Total lagged features created: 150
 Veri: 2367 satƒ±r | Features: 150 | Up: 52.4%
 Train: 1893 | Test: 474

Nikkei225:
 Creating 10 lags for each of 15 indicators...
 Total lagged features created: 150
 Veri: 2352 satƒ±r | Features: 150 |