Sieć perceptronowa - nasz

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Ustawienie ziarna dla powtarzalności
np.random.seed(42)

# ---- WCZYTYWANIE DANYCH ----
data = pd.read_csv('XAU_1d_data.csv', delimiter=";")
data['Date'] = pd.to_datetime(data['Date']).dt.date

# ---- FEATURE ENGINEERING (Zgodnie z wytycznymi dla szeregów czasowych) ----
# Przewidujemy 'Close' na jutro na podstawie dzisiejszych danych
data['Target'] = data['Close'].shift(-1)
data = data.dropna()

features = ['Open', 'High', 'Low', 'Volume', 'Close']
target = 'Target'

# Usuwanie outlierów 
for col in features + [target]:
    mean = data[col].mean()
    std = data[col].std()
    data = data[(data[col] >= mean - 3*std) & (data[col] <= mean + 3*std)]

X_raw = data[features].values
y_raw = data[target].values.reshape(-1, 1)

# ---- PODZIAŁ CHRONOLOGICZNY (Wymóg: pierwsze x% train, potem y% val, z% test) ----
def split_time_series(X, y, train_ratio=0.7, val_ratio=0.15):
    n = len(X)
    train_end = int(n * train_ratio)
    val_end = int(n * (train_ratio + val_ratio))
    
    return (X[:train_end], y[:train_end], 
            X[train_end:val_end], y[train_end:val_end], 
            X[val_end:], y[val_end:])

X_train_raw, y_train_raw, X_val_raw, y_val_raw, X_test_raw, y_test_raw = split_time_series(X_raw, y_raw)

# ---- SKALOWANIE ----
scaler_x = StandardScaler()
scaler_y = StandardScaler()

X_train = scaler_x.fit_transform(X_train_raw)
X_val = scaler_x.transform(X_val_raw)
X_test = scaler_x.transform(X_test_raw)

y_train = scaler_y.fit_transform(y_train_raw)
y_val = scaler_y.transform(y_val_raw)
y_test = scaler_y.transform(y_test_raw)

print(f"Rozmiary: Train: {X_train.shape}, Val: {X_val.shape}, Test: {X_test.shape}")

Rozmiary: Train: (3669, 5), Val: (786, 5), Test: (787, 5)


In [None]:
# Funkcje aktywacji i pomocnicze 
def relu(x, derivative=False):
    if derivative:
        return (x > 0).astype(float)
    return np.maximum(0, x)

def initialize_weights_bias(input_size, hidden_layers_sizes, output_size):
    weights = []
    biases = []
    layer_sizes = [input_size] + hidden_layers_sizes + [output_size]
    for i in range(len(layer_sizes) - 1):
        weights.append(np.random.randn(layer_sizes[i], layer_sizes[i+1]) * np.sqrt(2 / layer_sizes[i]))
        biases.append(np.zeros((1, layer_sizes[i+1])))
    return weights, biases

def adjust_learning_rate(learning_rate, mse, previous_mse, learning_rate_adjust, threshold=1e-6):
    if mse < previous_mse:
        learning_rate *= 1.05
    else:
        learning_rate *= 0.7
    if abs(mse - previous_mse) < threshold:
        learning_rate *= learning_rate_adjust
    return learning_rate

def regression_metrics(y_true, y_pred):
    return {
        'mse': mean_squared_error(y_true, y_pred),
        'mae': mean_absolute_error(y_true, y_pred),
        'r2':  r2_score(y_true, y_pred)
    }

def train_mlp_custom(X, y, learning_rate, learning_rate_adjust, epochs, hidden_layers_sizes, optimizer='gd', momentum=0.9, beta1=0.9, beta2=0.999, epsilon=1e-8):
    input_size = X.shape[1]
    output_size = y.shape[1]
    weights, biases = initialize_weights_bias(input_size, hidden_layers_sizes, output_size)

    velocities_w = [np.zeros_like(w) for w in weights]
    velocities_b = [np.zeros_like(b) for b in biases]

    m_w = [np.zeros_like(w) for w in weights]
    v_w = [np.zeros_like(w) for w in weights]
    m_b = [np.zeros_like(b) for b in biases]
    v_b = [np.zeros_like(b) for b in biases]

    prev_loss = np.inf

    for epoch in range(1, epochs + 1):
        # ---- Forward pass ----
        activations = [X]
        zs = []
        for i in range(len(weights)):
            z = np.dot(activations[-1], weights[i]) + biases[i]
            zs.append(z)
            activations.append(z if i == len(weights)-1 else relu(z))

        # ---- Backpropagation ----
        error = activations[-1] - y
        loss = np.mean(error ** 2)
        learning_rate = adjust_learning_rate(learning_rate, loss, prev_loss, learning_rate_adjust)
        prev_loss = loss

        deltas = [error]
        for i in range(len(weights)-1, 0, -1):
            delta = deltas[-1].dot(weights[i].T) * relu(activations[i], derivative=True)
            deltas.append(delta)
        deltas.reverse()

        # ---- Aktualizacja wag i biasów ----
        for i in range(len(weights)):
            grad_w = activations[i].T.dot(deltas[i]) / X.shape[0]
            grad_b = np.mean(deltas[i], axis=0, keepdims=True)

            if optimizer == 'gd':
                weights[i] -= learning_rate * grad_w
                biases[i]  -= learning_rate * grad_b
            elif optimizer == 'momentum':
                velocities_w[i] = momentum * velocities_w[i] - learning_rate * grad_w
                velocities_b[i] = momentum * velocities_b[i] - learning_rate * grad_b
                weights[i] += velocities_w[i]
                biases[i]  += velocities_b[i]
            elif optimizer == 'adam':
                # dla wag
                m_w[i] = beta1 * m_w[i] + (1 - beta1) * grad_w
                v_w[i] = beta2 * v_w[i] + (1 - beta2) * (grad_w ** 2)
                m_hat_w = m_w[i] / (1 - beta1 ** epoch)
                v_hat_w = v_w[i] / (1 - beta2 ** epoch)
                weights[i] -= learning_rate * m_hat_w / (np.sqrt(v_hat_w) + epsilon)

                # dla biasów
                m_b[i] = beta1 * m_b[i] + (1 - beta1) * grad_b
                v_b[i] = beta2 * v_b[i] + (1 - beta2) * (grad_b ** 2)
                m_hat_b = m_b[i] / (1 - beta1 ** epoch)
                v_hat_b = v_b[i] / (1 - beta2 ** epoch)
                biases[i]  -= learning_rate * m_hat_b / (np.sqrt(v_hat_b) + epsilon)

    return weights, biases

def predict_mlp_custom(X, weights, biases):
    output = X
    for i in range(len(weights)):
        output = np.dot(output, weights[i]) + biases[i]
        if i < len(weights) - 1:
            output = relu(output)
    return output

# ---- EKSPERYMENTY MLP (Zgodnie z wymogami: 4 wartości parametrów, 5 powtórzeń) ----
results_mlp = []
lrs = [0.001, 0.01, 0.05, 0.1] # 4 wartości
epochs_options = [500, 1000, 1500, 2000] # 4 wartości
hidden_configs = [[10], [10, 10], [20, 10], [32, 16, 8]] # 4 wartości

print("Rozpoczynam trening MLP (Własna implementacja)...")
for lr in lrs: 
    for hid in hidden_configs:
        for r in range(5): # Wymóg: 5 powtórzeń
            w, b = train_mlp_custom(
                X_train, y_train, lr, 0.0005, 1000, hid, 'momentum', 0.9)
            # --- PREDYKCJE ---
            pred_train = predict_mlp_custom(X_train, w, b)
            pred_val   = predict_mlp_custom(X_val, w, b)
            pred_test  = predict_mlp_custom(X_test, w, b)

            # --- ODSKALOWANIE ---
            y_train_real = scaler_y.inverse_transform(y_train)
            y_val_real   = scaler_y.inverse_transform(y_val)
            y_test_real  = scaler_y.inverse_transform(y_test)

            pred_train_real = scaler_y.inverse_transform(pred_train)
            pred_val_real   = scaler_y.inverse_transform(pred_val)
            pred_test_real  = scaler_y.inverse_transform(pred_test)

            # --- METRYKI ---
            train_metrics = regression_metrics(y_train_real, pred_train_real)
            val_metrics   = regression_metrics(y_val_real,   pred_val_real)
            test_metrics  = regression_metrics(y_test_real,  pred_test_real)

            # --- ZAPIS WYNIKÓW ---
            results_mlp.append({
                'model': 'Custom_MLP',
                'lr': lr,
                'layers': str(hid),
                'run': r,

                # TRAIN
                'train_mse': train_metrics['mse'],
                'train_mae': train_metrics['mae'],
                'train_r2':  train_metrics['r2'],

                # VALIDATION
                'val_mse': val_metrics['mse'],
                'val_mae': val_metrics['mae'],
                'val_r2':  val_metrics['r2'],

                # TEST
                'test_mse': test_metrics['mse'],
                'test_mae': test_metrics['mae'],
                'test_r2':  test_metrics['r2'],
            })


Rozpoczynam trening MLP (Własna implementacja)...


In [None]:
# ---- Tworzenie DataFrame dla szczegółowych wyników ----
df_mlp = pd.DataFrame(results_mlp)

# ---- ZAPIS SZCZEGÓŁOWY (każdy run osobno) ----
df_mlp.to_excel('wyniki_regresja_szczegolowe.xlsx', sheet_name='MLP_szczegolowy', index=False)

# ---- AGREGACJA (średnie, min, max) po wszystkich run i konfiguracjach ----
summary_mlp = df_mlp.groupby(['lr', 'layers']).agg({
    # TRAIN
    'train_mse': ['mean', 'min', 'max'],
    'train_mae': ['mean', 'min', 'max'],
    'train_r2':  ['mean', 'min', 'max'],
    # VALIDATION
    'val_mse': ['mean', 'min', 'max'],
    'val_mae': ['mean', 'min', 'max'],
    'val_r2':  ['mean', 'min', 'max'],
    # TEST
    'test_mse': ['mean', 'min', 'max'],
    'test_mae': ['mean', 'min', 'max'],
    'test_r2':  ['mean', 'min', 'max']
}).reset_index()

# ---- ZAPIS ZAGREGOWANY ----
summary_mlp.to_excel('wyniki_regresja_zagregowane.xlsx', sheet_name='MLP_zagregowany', index=False)

print("Pliki Excel zostały wygenerowane:")
print("1. wyniki_regresja_szczegolowe.xlsx  (wszystkie runy)")
print("2. wyniki_regresja_zagregowane.xlsx  (średnie, min, max)")

In [4]:
def create_sequences(X, y, time_steps=5):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:(i + time_steps)])
        ys.append(y[i + time_steps])
    return np.array(Xs), np.array(ys)

TIME_STEPS = 7 # Analiza ostatniego tygodnia (7 dni)
X_train_seq, y_train_seq = create_sequences(X_train, y_train, TIME_STEPS)
X_val_seq, y_val_seq = create_sequences(X_val, y_val, TIME_STEPS)
X_test_seq, y_test_seq = create_sequences(X_test, y_test, TIME_STEPS)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Conv1D, Flatten, Input

def run_keras_experiment(model_type, param_list, param_name):
    results = []
    for val in param_list:
        for r in range(5):
            model = Sequential()
            model.add(Input(shape=(TIME_STEPS, X_train.shape[1])))
            
            if model_type == 'LSTM':
                model.add(LSTM(val, activation='relu'))
            elif model_type == 'CNN':
                model.add(Conv1D(filters=val, kernel_size=3, activation='relu'))
                model.add(Flatten())
            
            model.add(Dense(1))
            model.compile(optimizer='adam', loss='mse')
            
            model.fit(X_train_seq, y_train_seq, epochs=20, verbose=0, batch_size=32)
            
            pred = model.predict(X_test_seq, verbose=0)
            p_real = scaler_y.inverse_transform(pred)
            t_real = scaler_y.inverse_transform(y_test_seq)
            
            mse = mean_squared_error(t_real, p_real)
            r2 = r2_score(t_real, p_real)
            results.append({param_name: val, 'run': r, 'mse': mse, 'r2': r2})
    return results

# Wykonanie badań
lstm_results = run_keras_experiment('LSTM', [16, 32, 64, 128], 'units')
cnn_results = run_keras_experiment('CNN', [16, 32, 64, 128], 'filters')

2026-01-02 20:09:22.317457: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [7]:
# Łączenie wyników w tabele
df_mlp = pd.DataFrame(results_mlp)
df_lstm = pd.DataFrame(lstm_results)
df_cnn = pd.DataFrame(cnn_results)

# wyciągania średnich i najlepszych wyników 
summary_lstm = df_lstm.groupby('units').agg({'mse': ['mean', 'min'], 'r2': 'mean'})
print("PODSUMOWANIE LSTM:")
print(summary_lstm)

# Eksport do Excela dla sprawozdania
with pd.ExcelWriter('wyniki_regresja_final.xlsx') as writer:
    df_mlp.to_excel(writer, sheet_name='MLP_Wlasny')
    df_lstm.to_excel(writer, sheet_name='LSTM')
    df_cnn.to_excel(writer, sheet_name='CNN')

print("\nPlik 'wyniki_regresja_final.xlsx' został wygenerowany.")

PODSUMOWANIE LSTM:
                mse                     r2
               mean          min      mean
units                                     
16      9787.000649  3153.343998  0.899299
32     10159.380027  5340.790149  0.895468
64     11804.270649  3274.055325  0.878543
128     9370.512709  2503.672161  0.903584

Plik 'wyniki_regresja_final.xlsx' został wygenerowany.
