In [None]:
import numpy as np
import pandas as pd
import time
from itertools import product
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from multiprocessing import Pool, cpu_count
from tqdm import tqdm

# Ustawienie ziarna dla powtarzalności podziałów
RANDOM_STATE = 42

In [None]:
def relu(x): 
    return np.maximum(0, x)

def relu_derivative(x): 
    return (x > 0).astype(np.float32)

def softmax(x):
    # Stabilność numeryczna (odejmowanie max)
    exps = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exps / np.sum(exps, axis=1, keepdims=True)

def initialize_parameters(layer_sizes):
    weights, biases = [], []
    for i in range(len(layer_sizes) - 1):
        # Inicjalizacja He
        W = np.random.randn(layer_sizes[i], layer_sizes[i + 1]).astype(np.float32) * np.sqrt(2 / layer_sizes[i])
        b = np.zeros((1, layer_sizes[i + 1]), dtype=np.float32)
        weights.append(W)
        biases.append(b)
    return weights, biases

def train_custom_mlp(X, y_oh, hidden_layers, lr, epochs, optimizer="sgd", momentum=0.9):
    layer_sizes = [X.shape[1]] + hidden_layers + [y_oh.shape[1]]
    weights, biases = initialize_parameters(layer_sizes)
    
    # Inicjalizacja buforów (raz, przed pętlą epok)
    vW = [np.zeros_like(w) for w in weights]; vb = [np.zeros_like(b) for b in biases]
    mW = [np.zeros_like(w) for w in weights]; mb = [np.zeros_like(b) for b in biases]
    sW = [np.zeros_like(w) for w in weights]; sb = [np.zeros_like(b) for b in biases]
    
    for epoch in range(1, epochs + 1):
        # --- Forward Pass ---
        activations = [X]
        for i in range(len(weights) - 1):
            activations.append(relu(activations[-1] @ weights[i] + biases[i]))
        output = softmax(activations[-1] @ weights[-1] + biases[-1])
        activations.append(output)

        # --- Backward Pass ---
        deltas = [output - y_oh]
        for i in reversed(range(len(weights) - 1)):
            delta = (deltas[0] @ weights[i + 1].T) * relu_derivative(activations[i + 1])
            deltas.insert(0, delta)

        # --- Update Weights ---
        for i in range(len(weights)):
            grad_W = (activations[i].T @ deltas[i]) / X.shape[0]
            grad_b = np.mean(deltas[i], axis=0, keepdims=True)

            if optimizer == "sgd":
                weights[i] -= lr * grad_W
                biases[i] -= lr * grad_b
            elif optimizer == "momentum":
                vW[i] = momentum * vW[i] - lr * grad_W
                vb[i] = momentum * vb[i] - lr * grad_b
                weights[i] += vW[i]
                biases[i] += vb[i]
            elif optimizer == "adam":
                # Uproszczony Adam dla wydajności
                mW[i] = 0.9 * mW[i] + (1 - 0.9) * grad_W
                mb[i] = 0.9 * mb[i] + (1 - 0.9) * grad_b
                sW[i] = 0.999 * sW[i] + (1 - 0.999) * (grad_W**2)
                sb[i] = 0.999 * sb[i] + (1 - 0.999) * (grad_b**2)
                m_hat = mW[i] / (1 - 0.9**epoch)
                mb_hat = mb[i] / (1 - 0.9**epoch)
                s_hat = sW[i] / (1 - 0.999**epoch)
                sb_hat = sb[i] / (1 - 0.999**epoch)
                weights[i] -= lr * m_hat / (np.sqrt(s_hat) + 1e-8)
                biases[i] -= lr * mb_hat / (np.sqrt(sb_hat) + 1e-8)
                
    return weights, biases

def predict_custom(X, weights, biases):
    A = X
    for i in range(len(weights) - 1):
        A = relu(A @ weights[i] + biases[i])
    return np.argmax(A @ weights[-1] + biases[-1], axis=1)

In [None]:
def load_data_variants(filepath):
    df = pd.read_excel(filepath)
    features = [f"US{i}" for i in range(1, 25)]
    X = df[features].values.astype(np.float32)
    
    le = LabelEncoder()
    y = le.fit_transform(df['Class'])
    num_classes = len(le.classes_)
    
    # WARIANT 1: 80% UCZĄCY, 20% TESTOWY
    X_train80, X_test20, y_train80, y_test20 = train_test_split(
        X, y, test_size=0.20, stratify=y, random_state=RANDOM_STATE
    )
    
    # WARIANT 2: 70% UCZĄCY, 15% WALIDACYJNY, 15% TESTOWY
    X_train70, X_temp, y_train70, y_temp = train_test_split(
        X, y, test_size=0.30, stratify=y, random_state=RANDOM_STATE
    )
    X_val15, X_test15, y_val15, y_test15 = train_test_split(
        X_temp, y_temp, test_size=0.50, stratify=y_temp, random_state=RANDOM_STATE
    )
    
    scaler = StandardScaler()
    
    def process_bundle(xt, yt, xv=None, yv=None, xte=None, yte=None):
        xt_s = scaler.fit_transform(xt)
        y_oh = np.eye(num_classes, dtype=np.float32)[yt]
        return (xt_s, y_oh, yt, 
                scaler.transform(xv) if xv is not None else None, yv,
                scaler.transform(xte) if xte is not None else None, yte)

    bundle8020 = process_bundle(X_train80, y_train80, xte=X_test20, yte=y_test20)
    bundle7015 = process_bundle(X_train70, y_train70, xv=X_val15, yv=y_val15, xte=X_test15, yte=y_test15)
    
    return bundle8020, bundle7015, num_classes

def get_all_metrics(y_true, y_pred, prefix):
    return {
        f"{prefix}_acc": accuracy_score(y_true, y_pred),
        f"{prefix}_prec": precision_score(y_true, y_pred, average='macro', zero_division=0),
        f"{prefix}_rec": recall_score(y_true, y_pred, average='macro', zero_division=0),
        f"{prefix}_f1": f1_score(y_true, y_pred, average='macro', zero_division=0)
    }

In [None]:
def run_full_experiment(params):
    (split_name, lr, ep, hid, opt, mom, rep_idx, bundle, num_classes) = params
    (X_tr, y_tr_oh, y_tr_idx, X_val, y_val_idx, X_te, y_te_idx) = bundle

    # 1. TRENING WŁASNEJ SIECI
    w, b = train_custom_mlp(X_tr, y_tr_oh, hid, lr, ep, opt, mom if mom else 0.9)
    
    # Metryki własnej sieci dla wszystkich zbiorów
    res = {"split": split_name, "lr": lr, "epochs": ep, "hidden": str(hid), "opt": opt, "mom": mom, "rep": rep_idx}
    res.update(get_all_metrics(y_tr_idx, predict_custom(X_tr, w, b), "custom_train"))
    res.update(get_all_metrics(y_te_idx, predict_custom(X_te, w, b), "custom_test"))
    if X_val is not None:
        res.update(get_all_metrics(y_val_idx, predict_custom(X_val, w, b), "custom_val"))

    # 2. GOTOWA BIBLIOTEKA (SKLEARN)
    sk_opt = 'sgd' if opt in ['sgd', 'momentum'] else 'adam'
    clf = MLPClassifier(hidden_layer_sizes=tuple(hid), learning_rate_init=lr, 
                        max_iter=ep, solver=sk_opt, momentum=(mom if mom else 0.9),
                        nesterovs_momentum=(opt == 'momentum'), random_state=rep_idx, tol=1e-4)
    clf.fit(X_tr, y_tr_idx)
    res.update({"sklearn_test_acc": accuracy_score(y_te_idx, clf.predict(X_te))})

    return res

In [None]:
if __name__ == "__main__":
    b8020, b7015, n_cls = load_data_variants("sensor_readings_24.xlsx")
    
    # Parametry zgodne z wymogiem (min. 4 wartości)
    lrs = [0.001, 0.01, 0.05, 0.1]
    epochs = [100, 250, 500, 1000]
    configs = [[64, 32], [32, 16], [16, 8], [64, 32, 16]]
    opts = ["sgd", "momentum", "adam"]
    moms = [0.7, 0.9]
    repeats = range(5) 

    all_tasks = []
    for split_name, bundle in [("80/20", b8020), ("70/15/15", b7015)]:
        for lr, ep, hid, opt in product(lrs, epochs, configs, opts):
            current_moms = moms if opt == "momentum" else [None]
            for mom in current_moms:
                for r in repeats:
                    all_tasks.append((split_name, lr, ep, hid, opt, mom, r, bundle, n_cls))

    print(f"Start: {len(all_tasks)} eksperymentów.")
    
    with Pool(cpu_count()) as pool:
        results = list(tqdm(pool.imap(run_full_experiment, all_tasks), total=len(all_tasks)))

    # Zapis wszystkich wyników do Excela
    df_results = pd.DataFrame(results)
    df_results.to_excel("wyniki_koncowe_projekt.xlsx", index=False)
    
    # Agregacja do wniosków (średnie i najlepsze wyniki)
    summary = df_results.groupby(['split', 'hidden', 'opt', 'lr']).agg({
        'custom_test_acc': ['mean', 'max'],
        'custom_test_f1': ['mean', 'max'],
        'sklearn_test_acc': ['mean', 'max']
    }).reset_index()
    summary.to_excel("podsumowanie_do_raportu.xlsx")
    
    print("Zakończono! Pliki 'wyniki_koncowe_projekt.xlsx' i 'podsumowanie_do_raportu.xlsx' są gotowe.")

Start: 2560 eksperymentów.




KeyboardInterrupt: 