In [None]:


import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, LSTM, Bidirectional, Dense, Dropout, Flatten, concatenate

# === Budowa modelu CNN+BiLSTM ===
def build_cnn_bilstm_model(seq_len_blstm=60, seq_len_cnn=30, lstm_units=64, cnn_filters=32, dropout_rate=0.3):
    input_blstm = Input(shape=(seq_len_blstm, 1))
    x_blstm = Bidirectional(LSTM(lstm_units, return_sequences=False))(input_blstm)

    input_cnn = Input(shape=(seq_len_cnn, 1))
    x_cnn = Conv1D(cnn_filters, kernel_size=3, activation='relu')(input_cnn)
    x_cnn = Flatten()(x_cnn)

    x = concatenate([x_blstm, x_cnn])
    x = Dense(64, activation='relu')(x)
    x = Dropout(dropout_rate)(x)
    output = Dense(1, activation='linear')(x)

    model = Model(inputs=[input_blstm, input_cnn], outputs=output)
    model.compile(optimizer='adam', loss='mse')

    return model


def prepare_weekly_prediction_data(pivoted_prices, forecast_horizon=5, seq_len_blstm=60, seq_len_cnn=30):
    X_blstm, X_cnn, y = [], [], []
    tickers = pivoted_prices.columns
    for ticker in tickers:
        series = pivoted_prices[ticker].dropna().values
        if len(series) < seq_len_blstm + forecast_horizon:
            continue
        returns = np.diff(series) / series[:-1]
        for i in range(seq_len_blstm, len(returns) - forecast_horizon):
            blstm_window = returns[i - seq_len_blstm:i]
            cnn_window = returns[i - seq_len_cnn:i]
            future_return = (series[i + forecast_horizon] - series[i]) / series[i]
            X_blstm.append(blstm_window)
            X_cnn.append(cnn_window)
            y.append(future_return)
    X_blstm = np.array(X_blstm)[..., np.newaxis]
    X_cnn = np.array(X_cnn)[..., np.newaxis]
    y = np.array(y)
    return X_blstm, X_cnn, y


def select_top_50_assets(pivot_df, window_size=60):
    log_ret = np.log(pivot_df / pivot_df.shift(1)).dropna()
    ret = pivot_df.iloc[-1] / pivot_df.iloc[-window_size] - 1
    vol = log_ret.tail(window_size).std()
    sharpe = ret / (vol + 1e-8)
    score_df = pd.DataFrame({"RET": ret, "VOL": vol, "SHARPE": sharpe})
    score_df["SCORE"] = score_df["SHARPE"]
    selected = score_df.sort_values("SCORE", ascending=False).head(200)
    return selected.index.tolist()


def test_top50_with_cnn_bilstm(pivot_df, params, forecast_horizon=5, rebalance_period=5, start_date="2025-01-01", end_date="2025-04-30"):
    tickers = select_top_50_assets(pivot_df)
    pivot_top = pivot_df[tickers].copy()
    returns = pivot_top.pct_change().dropna()
    test_returns = returns[(returns.index >= start_date) & (returns.index <= end_date)]
    fee_series = pd.Series(index=test_returns.index, dtype=float)
    capital_series = pd.Series(index=test_returns.index, dtype=float)
    capital = 1.0
    previous_weights = pd.Series(1.0 / len(tickers), index=tickers)

    for start in range(0, len(test_returns), rebalance_period):
        date = test_returns.index[start]
        data_until_now = pivot_top[pivot_top.index < date]
        if len(data_until_now) < 90:
            continue

        model = build_cnn_bilstm_model(**params)
        X_blstm, X_cnn, y = prepare_weekly_prediction_data(data_until_now, forecast_horizon, params['seq_len_blstm'], params['seq_len_cnn'])
        if len(X_blstm) < 100:
            continue
        model.fit([X_blstm, X_cnn], y, epochs=10, batch_size=64, verbose=0)

        preds = {}
        for ticker in tickers:
            series = data_until_now[ticker].dropna().values
            if len(series) >= max(params['seq_len_blstm'], params['seq_len_cnn']):
                returns_series = np.diff(series) / series[:-1]
                Xb = returns_series[-params['seq_len_blstm']:].reshape(1, -1, 1)
                Xc = returns_series[-params['seq_len_cnn']:].reshape(1, -1, 1)
                pred = model.predict([Xb, Xc], verbose=0).flatten()[0]
                preds[ticker] = pred

        preds = pd.Series(preds).sort_values(ascending=False)
        top_preds = preds[preds > 0]
        if top_preds.empty:
            weights = previous_weights.copy()
        else:
            weights = top_preds / top_preds.sum()
            weights = weights.clip(upper=0.2)
            weights /= weights.sum()
            weights = weights.reindex(tickers).fillna(0)
            turnover = (weights - previous_weights).abs().sum()
            fee = 0.001 * turnover * capital
            capital -= fee
            fee_series.loc[date] = fee

        ret_slice = test_returns.iloc[start:start + rebalance_period]
        for date_i in ret_slice.index:
            daily_return = (ret_slice.loc[date_i] * weights).sum()
            capital *= (1 + daily_return)
            capital_series.loc[date_i] = capital

        previous_weights = weights.copy()

    return capital_series.dropna()


def load_data(base_dir, sample_n=500):
    all_data = []
    tickers_seen = set()
    for root, _, files in os.walk(base_dir):
        np.random.shuffle(files)
        for file in files:
            if file.endswith('.csv'):
                path = os.path.join(root, file)
                try:
                    ticker = os.path.splitext(file)[0]
                    if ticker in tickers_seen:
                        continue
                    df = pd.read_csv(path, skiprows=2, header=None,
                                     names=["Date", "Close", "High", "Low", "Open", "Volume"],
                                     usecols=["Date", "Close"])
                    df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
                    df = df.dropna(subset=["Date", "Close"])
                    if df.shape[0] > 100:
                        df["TICKER"] = ticker
                        all_data.append(df)
                        tickers_seen.add(ticker)
                        if len(tickers_seen) >= sample_n:
                            break
                except:
                    continue
        if len(tickers_seen) >= sample_n:
            break
    if not all_data:
        raise Exception("Brak danych")
    return pd.concat(all_data, ignore_index=True)

def prepare_pivot(df_all, start_year=2023, end_year=2025):
    df_all = df_all[(df_all["Date"].dt.year >= start_year) & (df_all["Date"].dt.year <= end_year)]
    pivot = df_all.pivot(index="Date", columns="TICKER", values="Close").sort_index()
    pivot = pivot[pivot.index.weekday < 5]
    pivot = pivot.ffill().bfill()
    return pivot

data_dir = r"C:\Users\Basia\Do przejrzenia\am_sem2\mgr\kody\downloaded_data"
df_all = load_data(data_dir, sample_n=771)
pivot = prepare_pivot(df_all)
#cnn_filters	dropout_rate	lstm_units	seq_len_blstm	seq_len_cnn	final_value	total_return	sharpe_ratio	volatility	max_drawdown
#32	0.1	64	120	30	1.063423	0.063454	11.654336	0.060710	-0.003108
best_params = {
    'cnn_filters': 32,
    'dropout_rate': 0.1,
    'lstm_units': 64,
    'seq_len_blstm': 120,
    'seq_len_cnn': 30 
}

capital_series = test_top50_with_cnn_bilstm(pivot_df=pivot, params=best_params)

# Porównanie z równymi wagami
returns = pivot.pct_change().dropna()
equal_weights = pd.Series(1.0 / len(pivot.columns), index=pivot.columns)
returns_benchmark = returns.loc[capital_series.index]
capital_eq = (1 + (returns_benchmark * equal_weights).sum(axis=1)).cumprod()

plt.figure(figsize=(12, 6))
capital_series.plot(label="CNN+BiLSTM Top 50", linewidth=2)
capital_eq.plot(label="Equal Weights", linewidth=2)
plt.title("Porównanie kapitału: CNN+BiLSTM Top 50 vs Equal Weights")
plt.xlabel("Data")
plt.ylabel("Kapitał")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()




  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce

In [None]:
capital_series.to_csv("cnn_bilstm_roll1.csv", header=True)
capital_series

Date
2025-01-01    0.999164
2025-01-02    0.996705
2025-01-03    0.989969
2025-01-06    0.997580
2025-01-07    1.006249
                ...   
2025-04-24    1.317665
2025-04-25    1.327387
2025-04-28    1.333826
2025-04-29    1.341005
2025-04-30    1.341512
Length: 86, dtype: float64

In [None]:
import pandas as pd
from sklearn.model_selection import ParameterGrid
import os

def compute_stats(series):
    returns = series.pct_change().dropna()
    sharpe = np.mean(returns) / np.std(returns) * np.sqrt(252) if np.std(returns) > 0 else 0
    max_drawdown = (series / series.cummax() - 1).min()
    total_return = series.iloc[-1] / series.iloc[0] - 1 if series.iloc[0] > 0 else 0
    volatility = np.std(returns) * np.sqrt(252)
    return {
        "final_value": series.dropna().iloc[-1] if not series.dropna().empty else 0,
        "total_return": total_return,
        "sharpe_ratio": sharpe,
        "volatility": volatility,
        "max_drawdown": max_drawdown
    }
def grid_search(pivoted_prices, output_path="cnn_bilstm_grid_search_partial.csv"):
    param_grid = {
        'seq_len_blstm': [60, 120],
        'seq_len_cnn': [10,  30],
        'lstm_units': [32, 64],
        'cnn_filters': [16, 32],
        'dropout_rate': [0.1,  0.3]
    }
    all_results = []

    for i, params in enumerate(ParameterGrid(param_grid), start=1):
        print(f"🔍 Iteracja {i} | Parametry: {params}")
        try:
            cap_series = test_top50_with_cnn_bilstm(
                pivot_df=pivoted_prices,
                params=params,
                forecast_horizon=5,
                rebalance_period=5,
                start_date="2025-01-01",
                end_date="2025-01-31"
            )
            stats = compute_stats(cap_series)
            result_row = {**params, **stats}
            all_results.append(result_row)

   
            pd.DataFrame([result_row]).to_csv(
                output_path,
                mode='a',
                index=False,
                header=not os.path.exists(output_path)
            )

        except Exception as e:
            print(f"❌ Błąd przy parametrach {params}: {e}")
            continue

 
    df_all = pd.DataFrame(all_results)
    df_all_sorted = df_all.sort_values("sharpe_ratio", ascending=False)
    return df_all_sorted


data_dir = r"C:\Users\Basia\Do przejrzenia\am_sem2\mgr\kody\downloaded_data"
df_all = load_data(data_dir, sample_n=771)
pivot = prepare_pivot(df_all)
results = grid_search(pivot, output_path="cnn_bilstm_partial_results.csv")


results.to_csv("cnn_bilstm_final_sorted_results.csv", index=False)
print(results.head())

  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
  df["Date"] = pd.to_datetime(df["Date"], errors="coerce

🔍 Iteracja 1 | Parametry: {'cnn_filters': 16, 'dropout_rate': 0.1, 'lstm_units': 32, 'seq_len_blstm': 60, 'seq_len_cnn': 10}
🔍 Iteracja 2 | Parametry: {'cnn_filters': 16, 'dropout_rate': 0.1, 'lstm_units': 32, 'seq_len_blstm': 60, 'seq_len_cnn': 30}
🔍 Iteracja 3 | Parametry: {'cnn_filters': 16, 'dropout_rate': 0.1, 'lstm_units': 32, 'seq_len_blstm': 120, 'seq_len_cnn': 10}
🔍 Iteracja 4 | Parametry: {'cnn_filters': 16, 'dropout_rate': 0.1, 'lstm_units': 32, 'seq_len_blstm': 120, 'seq_len_cnn': 30}
🔍 Iteracja 5 | Parametry: {'cnn_filters': 16, 'dropout_rate': 0.1, 'lstm_units': 64, 'seq_len_blstm': 60, 'seq_len_cnn': 10}
🔍 Iteracja 6 | Parametry: {'cnn_filters': 16, 'dropout_rate': 0.1, 'lstm_units': 64, 'seq_len_blstm': 60, 'seq_len_cnn': 30}
🔍 Iteracja 7 | Parametry: {'cnn_filters': 16, 'dropout_rate': 0.1, 'lstm_units': 64, 'seq_len_blstm': 120, 'seq_len_cnn': 10}
🔍 Iteracja 8 | Parametry: {'cnn_filters': 16, 'dropout_rate': 0.1, 'lstm_units': 64, 'seq_len_blstm': 120, 'seq_len_cnn': 