In [None]:



import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, LSTM, Bidirectional, Dense, Dropout, Flatten, concatenate

# === Budowa modelu CNN+BiLSTM ===
def build_cnn_bilstm_model(seq_len_blstm=60, seq_len_cnn=30, lstm_units=64, cnn_filters=32, dropout_rate=0.3):
    input_blstm = Input(shape=(seq_len_blstm, 1))
    x_blstm = Bidirectional(LSTM(lstm_units, return_sequences=False))(input_blstm)

    input_cnn = Input(shape=(seq_len_cnn, 1))
    x_cnn = Conv1D(cnn_filters, kernel_size=3, activation='relu')(input_cnn)
    x_cnn = Flatten()(x_cnn)

    x = concatenate([x_blstm, x_cnn])
    x = Dense(64, activation='relu')(x)
    x = Dropout(dropout_rate)(x)
    output = Dense(1, activation='linear')(x)

    model = Model(inputs=[input_blstm, input_cnn], outputs=output)
    model.compile(optimizer='adam', loss='mse')

    return model


def prepare_weekly_prediction_data(pivoted_prices, forecast_horizon=5, seq_len_blstm=60, seq_len_cnn=30):
    X_blstm, X_cnn, y = [], [], []
    tickers = pivoted_prices.columns
    for ticker in tickers:
        series = pivoted_prices[ticker].dropna().values
        if len(series) < seq_len_blstm + forecast_horizon:
            continue
        returns = np.diff(series) / series[:-1]
        for i in range(seq_len_blstm, len(returns) - forecast_horizon):
            blstm_window = returns[i - seq_len_blstm:i]
            cnn_window = returns[i - seq_len_cnn:i]
            future_return = (series[i + forecast_horizon] - series[i]) / series[i]
            X_blstm.append(blstm_window)
            X_cnn.append(cnn_window)
            y.append(future_return)
    X_blstm = np.array(X_blstm)[..., np.newaxis]
    X_cnn = np.array(X_cnn)[..., np.newaxis]
    y = np.array(y)
    return X_blstm, X_cnn, y


def select_top_50_assets(pivot_df, window_size=60):
    log_ret = np.log(pivot_df / pivot_df.shift(1)).dropna()
    ret = pivot_df.iloc[-1] / pivot_df.iloc[-window_size] - 1
    vol = log_ret.tail(window_size).std()
    sharpe = ret / (vol + 1e-8)
    score_df = pd.DataFrame({"RET": ret, "VOL": vol, "SHARPE": sharpe})
    score_df["SCORE"] = score_df["SHARPE"]
    selected = score_df.sort_values("SCORE", ascending=False).head(50)
    return selected.index.tolist()


def test_top50_with_cnn_bilstm(pivot_df, params, forecast_horizon=5, rebalance_period=5, start_date="2025-01-01", end_date="2025-04-30"):
    tickers = select_top_50_assets(pivot_df)
    pivot_top = pivot_df[tickers].copy()
    returns = pivot_top.pct_change().dropna()
    test_returns = returns[(returns.index >= start_date) & (returns.index <= end_date)]
    fee_series = pd.Series(index=test_returns.index, dtype=float)
    capital_series = pd.Series(index=test_returns.index, dtype=float)
    capital = 1.0
    previous_weights = pd.Series(1.0 / len(tickers), index=tickers)

    for start in range(0, len(test_returns), rebalance_period):
        date = test_returns.index[start]
        data_until_now = pivot_top[pivot_top.index < date]
        if len(data_until_now) < 90:
            continue

        model = build_cnn_bilstm_model(**params)
        X_blstm, X_cnn, y = prepare_weekly_prediction_data(data_until_now, forecast_horizon, params['seq_len_blstm'], params['seq_len_cnn'])
        if len(X_blstm) < 100:
            continue
        model.fit([X_blstm, X_cnn], y, epochs=10, batch_size=64, verbose=0)

        preds = {}
        for ticker in tickers:
            series = data_until_now[ticker].dropna().values
            if len(series) >= max(params['seq_len_blstm'], params['seq_len_cnn']):
                returns_series = np.diff(series) / series[:-1]
                Xb = returns_series[-params['seq_len_blstm']:].reshape(1, -1, 1)
                Xc = returns_series[-params['seq_len_cnn']:].reshape(1, -1, 1)
                pred = model.predict([Xb, Xc], verbose=0).flatten()[0]
                preds[ticker] = pred

        preds = pd.Series(preds).sort_values(ascending=False)
        top_preds = preds[preds > 0]
        if top_preds.empty:
            weights = previous_weights.copy()
        else:
            weights = top_preds / top_preds.sum()
            weights = weights.clip(upper=0.2)
            weights /= weights.sum()
            weights = weights.reindex(tickers).fillna(0)
            turnover = (weights - previous_weights).abs().sum()
            fee = 0.001 * turnover * capital
            capital -= fee
            fee_series.loc[date] = fee

        ret_slice = test_returns.iloc[start:start + rebalance_period]
        for date_i in ret_slice.index:
            daily_return = (ret_slice.loc[date_i] * weights).sum()
            capital *= (1 + daily_return)
            capital_series.loc[date_i] = capital

        previous_weights = weights.copy()

    return capital_series.dropna()


def load_data(base_dir, sample_n=500):
    all_data = []
    tickers_seen = set()
    for root, _, files in os.walk(base_dir):
        np.random.shuffle(files)
        for file in files:
            if file.endswith('.csv'):
                path = os.path.join(root, file)
                try:
                    ticker = os.path.splitext(file)[0]
                    if ticker in tickers_seen:
                        continue
                    df = pd.read_csv(path, skiprows=2, header=None,
                                     names=["Date", "Close", "High", "Low", "Open", "Volume"],
                                     usecols=["Date", "Close"])
                    df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
                    df = df.dropna(subset=["Date", "Close"])
                    if df.shape[0] > 100:
                        df["TICKER"] = ticker
                        all_data.append(df)
                        tickers_seen.add(ticker)
                        if len(tickers_seen) >= sample_n:
                            break
                except:
                    continue
        if len(tickers_seen) >= sample_n:
            break
    if not all_data:
        raise Exception("Brak danych")
    return pd.concat(all_data, ignore_index=True)

def prepare_pivot(df_all, start_year=2023, end_year=2025):
    df_all = df_all[(df_all["Date"].dt.year >= start_year) & (df_all["Date"].dt.year <= end_year)]
    pivot = df_all.pivot(index="Date", columns="TICKER", values="Close").sort_index()
    pivot = pivot[pivot.index.weekday < 5]
    pivot = pivot.ffill().bfill()
    return pivot




In [None]:

historical_window = 60
historical_data = pivot_top.loc[: "2025-04-30"].tail(historical_window)

stress_price_paths = []

for stress_path in generate_bootstrap_stress_paths(
    stress_data_real[top_50_tickers],
    path_length=22,
    n_paths=n_iterations,
    block_size=block_size):

    stress_test_dates = pd.date_range(start="2025-05-01", periods=22, freq="B")
    stress_prices_future = pd.DataFrame(
        data=np.cumprod(1 + stress_path.values, axis=0) * last_prices.values,
        columns=top_50_tickers,
        index=stress_test_dates
    )

    stress_prices_full = pd.concat([historical_data, stress_prices_future])
    stress_price_paths.append(stress_prices_full)
stress_price_paths

[             ABEV3.SA    EOAN.DE    S63.SI     IDR.MC    CHILE.SN  \
 2025-02-06  10.662843  11.060604  4.771421  16.530001  114.302124   
 2025-02-07  10.653025  11.210007  4.939359  16.100000  114.839363   
 2025-02-10  10.770846  11.210007  4.840572  16.440001  115.135773   
 2025-02-11  10.761027  11.060604  4.880087  16.620001  115.321022   
 2025-02-12  10.721754  10.959395  4.899844  16.559999  116.432549   
 ...               ...        ...       ...        ...         ...   
 2025-05-26  14.225311  14.793769  6.828320  28.258223  146.834507   
 2025-05-27  14.283810  14.662850  6.809508  27.617499  148.155641   
 2025-05-28  14.381311  14.694021  6.865940  27.507027  149.099304   
 2025-05-29  14.441686  14.839266  6.989810  27.400706  147.861452   
 2025-05-30  14.429612  14.984512  6.989810  27.172870  145.552639   
 
                001800.KQ    TPE.WA      ABEV     ORA.PA   0775.HK  ...  \
 2025-02-06  14365.375977  4.259000  1.822701  10.450000  0.530000  ...   
 2025-02

In [20]:
for i, stress_prices in enumerate(stress_price_paths):
    capital_series = test_top50_with_cnn_bilstm(
        pivot_df=stress_prices,
        params=best_params,
        forecast_horizon=5,
        rebalance_period=5,
        start_date="2025-05-01",  # <- tylko maj
        end_date="2025-06-01"
    )
    capital_series.to_csv(f"{output_dir}/capital_bilstm_iter_{i+1}.csv")
    results.append(capital_series)


In [None]:
def select_top_50_assets(pivot_df, window_size=60):
    if len(pivot_df) < window_size + 1:
        return pivot_df.columns.tolist()
    log_ret = np.log(pivot_df / pivot_df.shift(1)).dropna()
    ret = pivot_df.iloc[-1] / pivot_df.iloc[-window_size] - 1
    vol = log_ret.tail(window_size).std()
    sharpe = ret / (vol + 1e-8)
    score_df = pd.DataFrame({"RET": ret, "VOL": vol, "SHARPE": sharpe})
    score_df["SCORE"] = score_df["SHARPE"]
    selected = score_df.sort_values("SCORE", ascending=False).head(50)
    return selected.index.tolist()


In [None]:


n_iterations = 100
path_length = 22
block_size = 5
output_dir = "rolling_results_bilstm_v2/"
os.makedirs(output_dir, exist_ok=True)


df_all = load_data(data_dir, sample_n=771)
pivot = prepare_pivot(df_all)
top_50_tickers = select_top_50_assets(pivot.loc[: "2025-04-30"])
pivot_top = pivot[top_50_tickers]


statistical_stress_periods = generate_statistical_stress_periods(pivot_top)
stress_data_real = generate_stress_data(pivot_top, statistical_stress_periods)
last_prices = pivot_top.loc[pivot_top.index < "2025-05-01"].iloc[-1]
historical_data = pivot_top.loc[: "2025-04-30"].tail(60)


results = []

for i in tqdm(range(n_iterations), desc="Iteracje BILSTM+CNN"):
    stress_paths = generate_bootstrap_stress_paths(
        stress_data_real[top_50_tickers],
        path_length=path_length,
        n_paths=1,
        block_size=block_size
    )
    stress_path = stress_paths[0]
    stress_test_dates = pd.date_range(start="2025-05-01", periods=path_length, freq="B")
    stress_prices_future = pd.DataFrame(
        data=np.cumprod(1 + stress_path.values, axis=0) * last_prices.values,
        columns=top_50_tickers,
        index=stress_test_dates
    )

    combined_data = pd.concat([pivot_top.loc[: "2025-04-30"], stress_prices_future])
    
    try:
        cap_series = test_top50_with_cnn_bilstm(
            pivot_df=combined_data,
            params=best_params,
            forecast_horizon=5,
            rebalance_period=5,
            start_date="2025-05-01",
            end_date="2025-05-31"
        )
        cap_series.name = f"iter_{i+1}"
        cap_series.to_csv(os.path.join(output_dir, f"capital_bilstm_iter_{i+1}.csv"))
        results.append(cap_series)
    except Exception as e:
        print(f"Błąd w iteracji {i+1}: {e}")

# Zbiorczy zapis
if results:
    combined_df = pd.concat(results, axis=1)
    combined_df.to_csv(os.path.join(output_dir, "capital_bilstm_all.csv"))
  

  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce