In [2]:
import os
import pandas as pd
import numpy as np
import os
import pandas as pd
import numpy as np

def load_data(base_dir, sample_n=50):
    all_data = []
    tickers_seen = set()

    for root, _, files in os.walk(base_dir):
        np.random.shuffle(files)
        for file in files:
            if file.endswith('.csv'):
                path = os.path.join(root, file)
                try:
                    # ticker z nazwy pliku
                    ticker = os.path.splitext(file)[0]
                    if ticker in tickers_seen:
                        continue

                    # wczytanie danych zgodnie z nowym formatem
                    df = pd.read_csv(path, skiprows=2, header=None,
                                     names=["Date", "Close", "High", "Low", "Open", "Volume"],
                                     usecols=["Date", "Close"])
                    df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d", errors="coerce")
                    df = df.dropna(subset=["Date", "Close"])

                    if df.shape[0] > 100:
                        df["TICKER"] = ticker
                        all_data.append(df)
                        tickers_seen.add(ticker)

                        if len(tickers_seen) >= sample_n:
                            break
                except Exception:
                    continue
        if len(tickers_seen) >= sample_n:
            break

    if not all_data:
        raise Exception("Brak danych")

    return pd.concat(all_data, ignore_index=True)


def prepare_pivot(df, start_year=2020, end_year=2025, max_nan_percent=5.0):
    df = df[(df["Date"].dt.year >= start_year) & (df["Date"].dt.year <= end_year)]
    pivot = df.pivot(index="Date", columns="TICKER", values="Close").sort_index()
    pivot = pivot[pivot.index.weekday < 5]  # usuń weekendy
    nans = pivot.isna().sum() / len(pivot) * 100
    good_tickers = nans[nans <= max_nan_percent].index.tolist()
    pivot = pivot[good_tickers].fillna(method="ffill").fillna(method="bfill")
    return pivot


In [3]:
def extract_features(price_df: pd.DataFrame, window: int = 20):
    returns = price_df.pct_change().dropna()
    momentum = returns.tail(window).mean()
    volatility = returns.tail(window).std()
    sharpe = momentum / volatility
    cum_returns = (1 + returns.tail(window)).cumprod()
    peak = cum_returns.cummax()
    drawdown = (cum_returns - peak) / peak
    mdd = drawdown.min()
    corr_matrix = returns.tail(window).corr()
    total_corr = corr_matrix.sum() - 1
    features = pd.DataFrame({
        'MOM': momentum,
        'VOL': volatility,
        'SHARPE': sharpe,
        'MDD': mdd,
        'CORR': total_corr
    })
    return features.fillna(0)


In [4]:
def pso_fitness_advanced(weights, features, lambda_mom=0.0, lambda_vol=0.4, lambda_mdd=0.3, lambda_sharpe=0.2, lambda_corr=0.1):
    port_mom = np.sum(weights * features["MOM"])
    port_vol = np.sqrt(np.sum((weights * features["VOL"])**2))
    port_sharpe = port_mom / port_vol if port_vol != 0 else 0
    port_mdd = np.sum(weights * features["MDD"])
    port_corr = np.sum(weights * features["CORR"])
    fitness = (
        - lambda_mom * port_mom +
        lambda_vol * port_vol +
        lambda_mdd * abs(port_mdd) -
        lambda_sharpe * port_sharpe +
        lambda_corr * port_corr
    )
    return fitness

def pso_optimize_advanced(features: pd.DataFrame, n_particles=100, n_iterations=300, max_weight=0.2,
                          lambda_mom=0.0, lambda_vol=0.4, lambda_mdd=0.3, lambda_sharpe=0.2, lambda_corr=0.1):
    n_assets = len(features)
    particles = np.random.dirichlet(np.ones(n_assets), size=n_particles)
    velocities = np.zeros_like(particles)
    personal_best = particles.copy()
    personal_scores = np.array([
        pso_fitness_advanced(p, features, lambda_mom, lambda_vol, lambda_mdd, lambda_sharpe, lambda_corr)
        for p in particles
    ])
    global_best_idx = np.argmin(personal_scores)
    global_best = personal_best[global_best_idx].copy()

    for _ in range(n_iterations):
        for i in range(n_particles):
            r1, r2 = np.random.rand(n_assets), np.random.rand(n_assets)
            velocities[i] = 0.5 * velocities[i] + 1.4 * r1 * (personal_best[i] - particles[i]) + 1.4 * r2 * (global_best - particles[i])
            particles[i] += velocities[i]
            particles[i] = np.clip(particles[i], 0, max_weight)
            particles[i] /= particles[i].sum() if particles[i].sum() > 0 else 1
            score = pso_fitness_advanced(particles[i], features, lambda_mom, lambda_vol, lambda_mdd, lambda_sharpe, lambda_corr)
            if score < personal_scores[i]:
                personal_best[i] = particles[i].copy()
                personal_scores[i] = score
                if score < personal_scores[global_best_idx]:
                    global_best = particles[i].copy()
                    global_best_idx = i

    return global_best


In [None]:
def realistic_pso_ema_advanced(pivoted_prices: pd.DataFrame,
                                feature_window: int = 20,
                                rebalance_period: int = 5,
                                n_particles: int = 100,
                                n_iterations: int = 300,
                                max_weight: float = 0.2,
                                lambda_mom: float = 0.0,
                                lambda_vol: float = 0.4,
                                lambda_mdd: float = 0.3,
                                lambda_sharpe: float = 0.2,
                                lambda_corr: float = 0.1,
                                fee_rate: float = 0.001,
                                ema_span: int = 20):  

    smoothed_prices = pivoted_prices.ewm(span=ema_span, adjust=False).mean()

    returns = smoothed_prices.pct_change().dropna()
    returns_test = returns[returns.index.year == 2025]
    dates = returns_test.index

    capital = 1.0
    capital_timeline = []
    weights_log = []
    shares = None
    prev_weights = pd.Series(0.0, index=pivoted_prices.columns)

    for start in range(0, len(dates), rebalance_period):
        end = min(start + rebalance_period, len(dates))
        rebalance_date = dates[start]
        period_dates = dates[start:end]
        current_prices = smoothed_prices.loc[rebalance_date]

        price_history = smoothed_prices.loc[:rebalance_date].tail(feature_window + 1)
        features = extract_features(price_history, window=feature_window)

        new_weights = pso_optimize_advanced(
            features,
            n_particles=n_particles,
            n_iterations=n_iterations,
            max_weight=max_weight,
            lambda_mom=lambda_mom,
            lambda_vol=lambda_vol,
            lambda_mdd=lambda_mdd,
            lambda_sharpe=lambda_sharpe,
            lambda_corr=lambda_corr
        )

        total_value = (shares * current_prices).sum() if shares is not None else capital
        cost = fee_rate * total_value * np.sum(np.abs(new_weights - prev_weights))
        total_value_after_cost = total_value - cost
        capital = total_value_after_cost

        shares = (new_weights * total_value_after_cost) / current_prices
        prev_weights = new_weights

        for date in period_dates:
            prices_today = smoothed_prices.loc[date]
            portfolio_value = np.sum(shares * prices_today)
            capital_timeline.append((date, portfolio_value))

        for ticker, w in zip(pivoted_prices.columns, new_weights):
            weights_log.append({
                "Date": rebalance_date,
                "Ticker": ticker,
                "Weight": w
            })

    capital_series = pd.Series(dict(capital_timeline))
    weights_df = pd.DataFrame(weights_log)
    return capital_series, weights_df


In [None]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
base_dir = r"C:\Users\Basia\Do przejrzenia\am_sem2\mgr\kody\downloaded_data"
df_all = load_data(base_dir, sample_n=771)
pivoted = prepare_pivot(df_all)




  pivot = pivot[good_tickers].fillna(method="ffill").fillna(method="bfill")


In [None]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm


n_iterations = 100
path_length = 22
block_size = 5
output_dir = "rolling_results_ema/"
os.makedirs(output_dir, exist_ok=True)


def generate_stress_data(pivoted_df):
    returns = pivoted_df.pct_change().dropna()
    returns["SCENARIO"] = "Dummy"
    return returns


def generate_bootstrap_stress_paths(returns_df, path_length, n_paths, block_size):
    returns_df = returns_df.drop(columns=["SCENARIO"], errors="ignore")
    returns_df = returns_df.dropna(axis=1)

    block_starts = np.arange(len(returns_df) - block_size + 1)
    paths = []

    for _ in range(n_paths):
        idx = np.random.choice(block_starts, size=(path_length // block_size + 1), replace=True)
        sampled_blocks = [returns_df.iloc[i:i + block_size] for i in idx]
        sampled_path = pd.concat(sampled_blocks).iloc[:path_length]
        paths.append(sampled_path.reset_index(drop=True))

    return paths


last_prices = pivoted.iloc[-1]
historical_prices = pivoted.copy()

stress_data_real = generate_stress_data(pivoted)
sample_columns = pivoted.columns
results = []

for i in tqdm(range(n_iterations), desc="Rolling stress test iterations - PSO EMA"):
    stress_paths = generate_bootstrap_stress_paths(
        stress_data_real[sample_columns],
        path_length=path_length,
        n_paths=1,
        block_size=block_size
    )
    stress_path = stress_paths[0]

    stress_prices = pd.DataFrame(
        data=np.cumprod(1 + stress_path.values, axis=0) * last_prices.values,
        columns=sample_columns,
        index=pd.date_range(start="2025-05-01", periods=path_length, freq="B")
    )

    capital_series, weights_df = realistic_pso_ema_advanced(
        pivoted_prices=stress_prices,
        feature_window=20,
        rebalance_period=5,
        n_particles=200,
        n_iterations=300,
        max_weight=0.15,
        lambda_mom=0.2,
        lambda_vol=0.3,
        lambda_mdd=0.3,
        lambda_sharpe=0.3,
        lambda_corr=0.3,
        fee_rate=0.001
    )

    capital_series.to_csv(f"{output_dir}/capital_iteration_{i+1}.csv")
    weights_df.to_csv(f"{output_dir}/weights_iteration_{i+1}.csv")
    results.append(capital_series)

capital_series_combined = pd.concat(results, axis=1)

import matplotlib.pyplot as plt
capital_series_combined.plot(figsize=(14, 7), title="Kapitał w czasie - PSO EMA z okresami stresowymi")
plt.grid(True)
plt.tight_layout()
plt.show()


Rolling stress test iterations - PSO EMA:  30%|███       | 30/100 [1:59:08<5:11:06, 266.66s/it]