the best combination of 3 futures

In [None]:
import pandas as pd
import numpy as np
from itertools import combinations
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler

# === Excel einlesen ===
file_path = r"C:\Users\41799\Desktop\Kopie von market_data.xlsx"
df = pd.read_excel(file_path)

# === Zielvariable & CNN-geeignete Feature-Kandidaten ===
target_col = "_MKT"
allowed_features = [
    "GDP", "UN", "CPI", "M2",          # Makrotrends
    "Y02", "STP", "IR", "RR",          # Kurzfristige & dynamische Zinsen
    "DIL", "MOV ", "NYF",               # Sentiment / Volatilität
    "_TY", "_OIL", "_DXY", "_LCP", "_AU"  # Märkte
]

# === Datum verarbeiten ===
if "Date" in df.columns:
    df["Date"] = pd.to_datetime(df["Date"])
    df = df.sort_values("Date")
    df = df.set_index("Date")

# === Nur numerische Daten & Normalisieren ===
df = df.select_dtypes(include=["number"]).dropna()
scaler = MinMaxScaler()
df_scaled = pd.DataFrame(scaler.fit_transform(df), columns=df.columns, index=df.index)

# === Split: 15% Training, Rest Validierung ===
split_index = int(len(df_scaled) * 0.15)
train_df = df_scaled[:split_index]
val_df = df_scaled[split_index:]

# === Zeitreihen-Daten generieren ===
def create_dataset(X, y, seq_len=5):
    Xs, ys = [], []
    for i in range(len(X) - seq_len):
        Xs.append(X[i:i + seq_len])
        ys.append(y[i + seq_len])
    return np.array(Xs), np.array(ys)

# === CNN testen mit allen 3er-Kombinationen ===
results = []
for combo in combinations(allowed_features, 3):
    combo = list(combo)
    try:
        X_train, y_train = create_dataset(train_df[combo].values, train_df[target_col].values)
        X_val, y_val = create_dataset(val_df[combo].values, val_df[target_col].values)

        model = tf.keras.Sequential([
            tf.keras.layers.Conv1D(32, kernel_size=2, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(64, activation='relu'),
            tf.keras.layers.Dense(1)
        ])
        model.compile(optimizer="adam", loss="mse")
        early_stop = tf.keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)

        history = model.fit(X_train, y_train,
                            validation_data=(X_val, y_val),
                            epochs=50,
                            batch_size=16,
                            verbose=0,
                            callbacks=[early_stop])

        val_loss = min(history.history["val_loss"])
        results.append((combo, val_loss))
        print(f"✅ Getestet: {combo} | val_loss: {val_loss:.5f}")

    except Exception as e:
        print(f"⚠️ Fehler bei Kombination {combo}: {str(e)}")

# === Beste 5 Kombinationen anzeigen ===
results.sort(key=lambda x: x[1])
print("\n🏆 Beste 5 Kombinationen mit genau 3 Features:")
for i, (combo, loss) in enumerate(results[:5], 1):
    print(f"{i}. {combo} ➞ val_loss: {loss:.5f}")

1. ['Y02', '_TY', '_AU'] ➞ val_loss: 0.01596
2. ['GDP', '_TY', '_DXY'] ➞ val_loss: 0.03067
3. ['IR', '_TY', '_LCP'] ➞ val_loss: 0.03293
4. ['UN', '_TY', '_OIL'] ➞ val_loss: 0.03300
5. ['Y02', 'IR', '_TY'] ➞ val_loss: 0.03404


thind the best configuration  for each future combination


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from scipy.stats import pearsonr
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Dense, GlobalAveragePooling1D, BatchNormalization, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
from itertools import product
import random
from sklearn.linear_model import LinearRegression

# === Fixe Seeds für Reproduzierbarkeit ===
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

# === Excel-Daten laden ===
df = pd.read_excel(r"C:\Users\41799\Desktop\Kopie von market_data.xlsx")
df = df.dropna()
df["Date"] = pd.to_datetime(df["Date"])
df = df.sort_values("Date").reset_index(drop=True)

# === Nur numerische Spalten + Ziel definieren ===
df_numeric = df.select_dtypes(include=[np.number])
target_col = "_MKT"

# === Feature-Kombinationen zum Testen ===
combinations_to_test = [
    ['Y02', '_TY', '_AU'],
    ['GDP', '_TY', '_DXY'],
    ['IR', '_TY', '_LCP'],
    ['UN', '_TY', '_OIL'],
    ['Y02', 'IR', '_TY']
]

# === WindowGenerator-Klasse für Zeitfenster ===
class WindowGenerator():
    def __init__(self, input_width, label_width, shift, input_columns=None, label_columns=None, df_train=None):
        self.label_columns = label_columns
        self.input_columns = input_columns
        self.input_width = input_width
        self.label_width = label_width
        self.shift = shift
        self.total_window_size = input_width + shift
        self.input_slice = slice(0, input_width)
        self.label_start = self.total_window_size - self.label_width

        if df_train is not None:
            self.train_input_indices = {name: i for i, name in enumerate(df_train.columns)}
            self.train_label_indices = {name: i for i, name in enumerate(df_train.columns)}

    def split_window(self, features):
        inputs = features[:, self.input_slice, :]
        labels = features[:, self.label_start:, :]
        if self.input_columns:
            inputs = tf.stack([inputs[:, :, self.train_input_indices[name]] for name in self.input_columns], axis=-1)
        if self.label_columns:
            labels = tf.stack([labels[:, :, self.train_label_indices[name]] for name in self.label_columns], axis=-1)
        return inputs, labels

    def make_dataset(self, data, shuffle=False, batchsize=64):
        data = np.array(data, dtype=np.float32)
        ds = tf.keras.preprocessing.timeseries_dataset_from_array(
            data=data,
            targets=None,
            sequence_length=self.total_window_size,
            sequence_stride=1,
            sampling_rate=1,
            shuffle=shuffle,
            batch_size=batchsize
        )
        return ds.map(self.split_window)

# === Hyperparameter-Raum ===
hyperparams = list(product(
    [10, 20, 30, 45],                    # input window sizes
    [(2, 3, 5), (3, 5, 7), (2, 4, 6)],   # kernel sizes per layer
    [(32, 64, 128)],                    # filters per Conv1D layer
    [(0.1, 0.3), (0.2, 0.4)],           # dropout values
    [32, 64, 128]                  # number of neurons in dense layer
))
hyperparams = random.sample(hyperparams, 30)

# === Ergebnisse speichern ===
final_results = []

# === Modelle pro Feature-Kombination testen ===
for features in combinations_to_test:
    print(f"\n🧪 Testing combination: {features}")
    selected_cols = features + [target_col]
    data = df_numeric[selected_cols].copy()
    scaler = MinMaxScaler()
    data_scaled = pd.DataFrame(scaler.fit_transform(data), columns=selected_cols)

    split = int(len(data_scaled) * 0.8)
    train_df = data_scaled[:split]
    val_df = data_scaled[split:]

    best_loss = np.inf
    best_model = None
    best_config = None
    best_corr = -1

    for input_width, ksizes, filters, drops, dense in hyperparams:
        window = WindowGenerator(input_width=input_width, label_width=1, shift=1,
                                 input_columns=features, label_columns=[target_col], df_train=train_df)
        train_data = window.make_dataset(train_df, shuffle=True)
        val_data = window.make_dataset(val_df)

        model = Sequential([
            Conv1D(filters[0], kernel_size=ksizes[0], activation='relu', padding='causal'),
            BatchNormalization(),
            Dropout(drops[0]),

            Conv1D(filters[1], kernel_size=ksizes[1], activation='relu', padding='causal'),
            BatchNormalization(),
            Dropout(drops[1]),

            Conv1D(filters[2], kernel_size=ksizes[2], activation='relu', padding='causal'),
            BatchNormalization(),

            GlobalAveragePooling1D(),
            Dense(dense, activation='relu'),
            Dropout(0.2),
            Dense(1)
        ])

        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005), loss='mse')
        early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

        history = model.fit(
            train_data,
            validation_data=val_data,
            epochs=50,
            callbacks=[early_stop],
            verbose=0
        )

        y_pred_val = model.predict(val_data)
        y_true_val = np.concatenate([y for x, y in val_data], axis=0)

        if y_pred_val.ndim == 3:
            y_pred_val = y_pred_val[:, -1, :]
        if y_true_val.ndim == 3:
            y_true_val = y_true_val[:, -1, :]

        corr, _ = pearsonr(np.ravel(y_true_val), np.ravel(y_pred_val))
        val_loss = min(history.history['val_loss'])

        if corr > best_corr or (corr == best_corr and val_loss < best_loss):
            best_loss = val_loss
            best_model = model
            best_config = (input_width, ksizes, filters, drops, dense)
            best_corr = corr

    print(f"✅ Best Config: {best_config} | val_loss: {best_loss:.5f} | corr: {best_corr:.3f}")
    final_results.append((features, best_loss, best_corr, best_config))

# === Ergebnisse sortiert nach Korrelation anzeigen ===
final_results.sort(key=lambda x: -x[2])

Testing combination: ['Y02', '_TY', '_AU']
Best Config: (10, (3, 5, 7), (32, 64, 128), (0.2, 0.4), 128) | val_loss: 0.21160 | corr: 0.855
Testing combination: ['GDP', '_TY', '_DXY']
Best Config: (45, (3, 5, 7), (32, 64, 128), (0.1, 0.3), 128) | val_loss: 0.24084 | corr: 0.535
Testing combination: ['IR', '_TY', '_LCP']
Best Config: (45, (3, 5, 7), (32, 64, 128), (0.2, 0.4), 128) | val_loss: 0.18293 | corr: 0.851
Testing combination: ['UN', '_TY', '_OIL']
Best Config: (30, (2, 4, 6), (32, 64, 128), (0.2, 0.4), 128) | val_loss: 0.32756 | corr: 0.747
Testing combination: ['Y02', 'IR', '_TY']
Best Config: (45, (3, 5, 7), (32, 64, 128), (0.2, 0.4), 64) | val_loss: 0.37596 | corr: 0.872




now we seleckt only  ['Y02', '_TY', '_AU']  and ['UN', '_TY', '_OIL'] because 2 best ones


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2
from scipy.stats import spearmanr

# === Excel-Datei laden ===
file_path = r"C:\Users\41799\Desktop\Kopie von market_data.xlsx"
df = pd.read_excel(file_path)

# === Konfigurationen ===
experiments = [
    {
        "features": ['Y02', '_TY', '_AU'],
        "config": (10, (3, 5, 7), (32, 64, 128), (0.2, 0.4), 128)
    },
    {
        "features": ['UN', '_TY', '_OIL'],
        "config": (45, (3, 5, 7), (32, 64, 128), (0.2, 0.4), 128)
    }
]

target = '_MKT'

for exp in experiments:
    features = exp["features"]
    seq_len, kernel_sizes, filters, dropouts, batch_size = exp["config"]

    print("=" * 70)
    print(f"🔍 Features: {features}")
    print(f"🧠 CNN Config: Kernels={kernel_sizes}, Filters={filters}, Dropouts={dropouts}, Seq={seq_len}")
    print("=" * 70)

    # --- Daten vorbereiten ---
    df_clean = df[features + [target]].dropna().reset_index(drop=True)
    df_ret = df_clean.diff().dropna()
    df_ret[target] = (df_ret[target] > 0).astype(int)

    scaler = StandardScaler()
    scaled = scaler.fit_transform(df_ret[features])
    scaled_df = pd.DataFrame(scaled, columns=features)
    scaled_df[target] = df_ret[target].values

    X, y = [], []
    for i in range(len(scaled_df) - seq_len):
        X.append(scaled_df.iloc[i:i + seq_len][features].values)
        y.append(scaled_df.iloc[i + seq_len][target])
    X, y = np.array(X), np.array(y)

    # --- Split ---
    split = int(len(X) * 0.7)
    X_train, X_test = X[:split], X[split:]
    y_train, y_test = y[:split], y[split:]

    # --- Modell ---
    model = Sequential()
    model.add(Conv1D(filters=filters[0], kernel_size=kernel_sizes[0], padding='same', activation='relu',
                     input_shape=(seq_len, len(features)), kernel_regularizer=l2(0.003)))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(dropouts[0]))
    model.add(Conv1D(filters=filters[1], kernel_size=kernel_sizes[1], padding='same', activation='relu',
                     kernel_regularizer=l2(0.003)))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(dropouts[1]))
    model.add(Conv1D(filters=filters[2], kernel_size=kernel_sizes[2], padding='same', activation='relu',
                     kernel_regularizer=l2(0.003)))
    model.add(Flatten())
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer=Adam(0.001), loss='binary_crossentropy', metrics=['accuracy'])
    early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

    history = model.fit(X_train, y_train, validation_split=0.2, epochs=50,
                        batch_size=batch_size, callbacks=[early_stop], verbose=0)

    # --- Evaluation ---
    y_pred_proba = model.predict(X_test).flatten()
    signal = 2 * y_pred_proba - 1
    true_returns = df_ret.iloc[-len(y_test):][target].values
    strategy_return = signal * true_returns

    ic, _ = spearmanr(y_test, y_pred_proba)
    mean_ret = np.mean(strategy_return)
    std_ret = np.std(strategy_return)
    sharpe = mean_ret / std_ret if std_ret > 0 else 0
    val_loss = history.history['val_loss'][-1]

    print(f"✅ Done for {features}")
    print(f"Sharpe Ratio: {sharpe:.3f} | Expected Return: {mean_ret:.4f} | Volatility: {std_ret:.4f}")
    print(f"Corr(raw): {ic:.3f}")
    print(f"Return Series Sample: {strategy_return[:5]}")
    print()

    # --- PLOTS ---
    plt.figure(figsize=(10, 5))
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Val Loss')
    plt.title(f"Train vs Validation Loss\n{features}")
    plt.xlabel("Epochs")
    plt.ylabel("Loss (binary_crossentropy)")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

    plt.figure(figsize=(14, 6))
    plt.plot(y_pred_proba, label="Predicted Signal", color='blue')
    plt.plot(y_test, label="True Label", alpha=0.5, color='orange')
    plt.axhline(np.mean(y_pred_proba), linestyle='--', color='gray', label="Mean Prediction")
    plt.title(f"Prediction vs. True (Test)\n{features}")
    plt.xlabel("Zeitindex")
    plt.ylabel("Signal-Wahrscheinlichkeit")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

    cum_strat = np.cumsum(strategy_return)
    cum_mkt = np.cumsum(true_returns)
    plt.figure(figsize=(14, 6))
    plt.plot(cum_strat, label="Strategy PnL", linewidth=2)
    plt.plot(cum_mkt, label="Market Return", linewidth=2)
    plt.axhline(np.mean(cum_strat), linestyle='--', color='gray', label="Mean Strategy PnL")
    plt.title(f"Strategy Backtest\n{features}")
    plt.xlabel("Zeitindex")
    plt.ylabel("Kumulierte Rendite")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

    rolling = pd.Series(strategy_return)
    rolling_sharpe = rolling.rolling(20).mean() / rolling.rolling(20).std()
    plt.figure(figsize=(14, 6))
    plt.plot(rolling_sharpe, label='Rolling Sharpe Ratio')
    plt.axhline(rolling_sharpe.mean(), linestyle='--', color='gray', label='⟨Sharpe⟩')
    plt.title(f"Rolling Sharpe Ratio\n{features}")
    plt.xlabel("Zeitindex")
    plt.ylabel("Sharpe Ratio (20d)")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()


✅ Done for ['Y02', '_TY', '_AU']
Sharpe Ratio: 0.635 | Expected Return: 0.0578 | Volatility: 0.0911
Corr(raw): 0.029
Return Series Sample: [-0.03940409  0.          0.         -0.0462988   0.        ]

======================================================================
🔍 Features: ['UN', '_TY', '_OIL']
🧠 CNN Config: Kernels=(3, 5, 7), Filters=(32, 64, 128), Dropouts=(0.2, 0.4), Seq=45
======================================================================
18/18 [==============================] - 0s 2ms/step
✅ Done for ['UN', '_TY', '_OIL']
Sharpe Ratio: 0.536 | Expected Return: 0.1017 | Volatility: 0.1897
Corr(raw): -0.038
Return Series Sample: [0.00513327 0.02170122 0.0868088  0.         0.12348986]
