In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split

nazwa_pliku = 'sensor_readings_24.data'

try:

    df = pd.read_csv(nazwa_pliku, sep=',', header=None)
    
    nowe_nazwy = [f"US{i}" for i in range(1, 25)]
    nowe_nazwy.append('Class')
    df.columns = nowe_nazwy
    
    print("\n--- Informacje o wymiarach ---")
    wiersze, kolumny = df.shape
    print(f"Liczba wierszy: {wiersze}")
    print(f"Liczba kolumn: {kolumny}")
    
    print("\n--- Analiza kolumny class ---")
    liczba_unikalnych = df.iloc[:, -1].nunique()
    print(f"Liczba unikalnych wartości w ostatniej kolumnie: {liczba_unikalnych}")
    
    print(f"Te wartości to: {df.iloc[:, -1].unique()}")
    
    print("\n" + "="*40)
    print("ROZKŁAD KLAS W CAŁYM ZBIORZE:")
    print("="*40)
    
    counts = df['Class'].value_counts()
    percents = df['Class'].value_counts(normalize=True) * 100
    
    for klasa in counts.index:
        liczba = counts[klasa]
        procent = percents[klasa]
        print(f"-- {klasa}: {liczba} samples ({procent:.2f}%).")

    df['Set'] = ""
    
    train_indices, test_indices = train_test_split(
        df.index, 
        test_size=0.2, # dla proporcji 80/20
        stratify=df['Class'], # dla zachowania proporcjonalnego rozkładu klas w zbiorach train i test
        random_state=42 # inaczej seed
    )
    
    df.loc[train_indices, 'Set'] = 'train'
    df.loc[test_indices, 'Set'] = 'test'

    print("\n--- Podgląd po zmianach (pierwsze 5 wierszy) ---")
    print(df.head())
    
    
    print("\n" + "="*40)
    print("PODSUMOWANIE PODZIAŁU (TRAIN vs TEST):")
    print("="*40)
    
    set_counts = df['Set'].value_counts()
    set_percents = df['Set'].value_counts(normalize=True) * 100
    
    for nazwa_setu in set_counts.index:
        liczba = set_counts[nazwa_setu]
        procent = set_percents[nazwa_setu]
        print(f"Zbiór '{nazwa_setu}': {liczba} wierszy ({procent:.2f}% całości)")

    # Dodatkowe sprawdzenie: Czy proporcje klas zostały zachowane wewnątrz zbiorów?
    print("\n--- Weryfikacja proporcji klas w poszczególnych zbiorach (%) ---")
    proporcje = pd.crosstab(df['Class'], df['Set'], normalize='columns') * 100
    print(proporcje.round(2))
    
    df.to_excel("sensor_readings_24.xlsx", index=False)
    print("\n--> Zapisano gotowy plik: sensor_readings_24.xlsx")
    

except FileNotFoundError:
    print("Nie znaleziono pliku. Sprawdź czy nazwa i ścieżka są poprawne.")
except Exception as e:
    print(f"Wystąpił błąd: {e}")


--- Informacje o wymiarach ---
Liczba wierszy: 5456
Liczba kolumn: 25

--- Analiza kolumny class ---
Liczba unikalnych wartości w ostatniej kolumnie: 4
Te wartości to: ['Slight-Right-Turn' 'Sharp-Right-Turn' 'Move-Forward' 'Slight-Left-Turn']

ROZKŁAD KLAS W CAŁYM ZBIORZE:
-- Move-Forward: 2205 samples (40.41%).
-- Sharp-Right-Turn: 2097 samples (38.43%).
-- Slight-Right-Turn: 826 samples (15.14%).
-- Slight-Left-Turn: 328 samples (6.01%).

--- Podgląd po zmianach (pierwsze 5 wierszy) ---
     US1    US2    US3    US4  US5    US6  US7    US8    US9   US10  ...  \
0  0.438  0.498  3.625  3.645  5.0  2.918  5.0  2.351  2.332  2.643  ...   
1  0.438  0.498  3.625  3.648  5.0  2.918  5.0  2.637  2.332  2.649  ...   
2  0.438  0.498  3.625  3.629  5.0  2.918  5.0  2.637  2.334  2.643  ...   
3  0.437  0.501  3.625  3.626  5.0  2.918  5.0  2.353  2.334  2.642  ...   
4  0.438  0.498  3.626  3.629  5.0  2.918  5.0  2.640  2.334  2.639  ...   

    US17   US18   US19   US20   US21   US22   US

## Nasze wczytanie danych itp

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Wczytanie danych z gotowym podziałem train/test
df = pd.read_excel("sensor_readings_24.xlsx")

features = [f"US{i}" for i in range(1, 25)]

# Podział na zbiory według kolumny 'Set'
X_train = df[df['Set']=='train'][features].values
X_test = df[df['Set']=='test'][features].values

y_train_labels = df[df['Set']=='train']['Class'].values
y_test_labels = df[df['Set']=='test']['Class'].values

# LabelEncoder dla zamiany nazw klas na liczby
le = LabelEncoder()
y_train_int = le.fit_transform(y_train_labels)
y_test_int = le.transform(y_test_labels)

# One-hot encoding dla sieci
num_classes = len(le.classes_)
y_train = np.eye(num_classes)[y_train_int]
y_test = np.eye(num_classes)[y_test_int]

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [12]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from imblearn.over_sampling import RandomOverSampler
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Funkcja aktywacji: Sigmoid
def sigmoid(x, derivative=False):
    if derivative:
        return x * (1 - x)
    return 1 / (1 + np.exp(-x))

def softmax(x):
    exps = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exps / np.sum(exps, axis=1, keepdims=True)

def relu(x, derivative=False):
    if derivative:
        return (x > 0).astype(float)
    return np.maximum(0, x)

# Inicjalizacja wag sieci dla wielu warstw ukrytych
# def initialize_weights(input_size, hidden_layers_sizes, output_size):
#     weights = []
#     layer_sizes = [input_size] + hidden_layers_sizes + [output_size]
#     for i in range(len(layer_sizes) - 1):
#         weights.append(2 * np.random.random((layer_sizes[i], layer_sizes[i+1])) - 1)
#     return weights

def initialize_weights(input_size, hidden_layers_sizes, output_size):
    weights = []
    layer_sizes = [input_size] + hidden_layers_sizes + [output_size]
    for i in range(len(layer_sizes) - 1):
        weights.append(np.random.randn(layer_sizes[i], layer_sizes[i+1]) * np.sqrt(2 / layer_sizes[i]))
    return weights

# Podział danych na zbiory treningowe, generalizacyjne i walidacyjne
def split_data(data, train_ratio=0.6, validation_ratio=0.2):
    np.random.shuffle(data) # tasowanie danych
    
    train_size = int(len(data) * train_ratio) 
    validation_size = int(len(data) * validation_ratio)

    train_data = data[:train_size] # wybiera obserwacje do liczby "train_size"
    validation_data = data[train_size:train_size + validation_size] # wybiera obserwacje od "train_size" do sumy "train_size" i "validation_size"
    test_data = data[train_size + validation_size:] # wybiera obserwacje od powyzszej sumy do końca

    return train_data, validation_data, test_data

# Funkcja dostosowująca tempa nauki
def adjust_learning_rate(learning_rate, mse, previous_mse, learning_rate_adjust, threshold=0.001):
    if mse < previous_mse:
        learning_rate *= 1.1  # jeśli błąd maleje, to zwiększa współczynnik uczenia o 10%
    else:
        learning_rate *= 0.5  # jeśli błąd nie maleje, to zmniejszamy współczynnik uczenia o 50%

    if np.abs(mse - previous_mse) < threshold: #jeśli różnica między błędami jest mniejsza niż dany próg to zmniejszamy learning rate, bo zbliżamy się do optymalnej konfiguracji
        learning_rate *= learning_rate_adjust

    return learning_rate

# Trening sieci z wieloma warstwami ukrytymi
def train(X, y, learning_rate, learning_rate_adjust, epochs, hidden_layers_sizes):
    input_size = X.shape[1]
    output_size = y.shape[1]
    weights = initialize_weights(input_size, hidden_layers_sizes, output_size)

    for epoch in range(epochs):
        # Forward pass
        activations = [X]
        zs = []
        for i, w in enumerate(weights):
            z = np.dot(activations[-1], w)
            if i == len(weights) - 1:
                activations.append(softmax(z))
            else:
                activations.append(relu(z))
        predicted_output = activations[-1]

        # Backpropagation
        error = predicted_output - y  # cross-entropy gradient
        deltas = [error] 
        for i in range(len(weights) - 1, 0, -1):
            delta = deltas[-1].dot(weights[i].T) * relu(activations[i], derivative=True)
            deltas.append(delta)

        deltas.reverse() 

        # Aktualizacja wag
        # for i in range(len(weights)):
        #     weights[i] += activations[i].T.dot(deltas[i]) * learning_rate

        # # Dostosowanie learning rate
        # learning_rate = adjust_learning_rate(
        #     learning_rate,
        #     np.mean(error ** 2),
        #     np.mean((y - predicted_output) ** 2),
        #     learning_rate_adjust
        # )

        for i in range(len(weights)):
            weights[i] -= learning_rate * activations[i].T.dot(deltas[i]) / X.shape[0]

    return weights


def predict(X, weights):
    output = X
    for i, w in enumerate(weights):
        output = np.dot(output, w)
        if i < len(weights) - 1:
            output = relu(output)
        else:
            output = softmax(output)
    return output

# Funkcja obliczająca błąd predykcji
def calculate_error(predictions, labels):
    return np.mean(np.abs(predictions - labels))

def correct(y, predictions):
    # y i predictions są one-hot encoded
    y_class = np.argmax(y, axis=1)
    pred_class = np.argmax(predictions, axis=1)
    correct_predictions = np.sum(y_class == pred_class)
    return correct_predictions / len(y)

# Parametry sieci
learning_rates = [0.01]
learning_rate_adjusts = [0.5]
epochses = [1000]
repeat = 3

# Warstwy
hidden_layers_sizes_list = [
    [24],         
    [24, 24]       
]

# Funkcja obliczająca wyniki dla accuracy, precision, recall, F1
def calculate_metrics(y_test_class, y_pred_class):
    accuracy = accuracy_score(y_test_class, y_pred_class)
    precision = precision_score(y_test_class, y_pred_class, average='macro')
    recall = recall_score(y_test_class, y_pred_class, average='macro')
    f1 = f1_score(y_test_class, y_pred_class, average='macro')
    
    return accuracy, precision, recall, f1

# Przechowywanie wyników dla różnych konfiguracji warstw
results = []

# Testowanie
for hidden_layers_sizes in hidden_layers_sizes_list:
    # Zbieramy metryki dla tej konkretnej konfiguracji warstw
    accuracies = []
    precisions = []
    recalls = []
    f1_scores = []
    
    best_accuracy = -np.inf
    best_precision = -np.inf
    best_recall = -np.inf
    best_f1 = -np.inf

  
    for r in range(1, repeat + 1):
        for lr in learning_rates:
            for lr_adj in learning_rate_adjusts:
                for epochs in epochses:
                        trained_weights = train(
                            X_train, y_train, lr, lr_adj, epochs, hidden_layers_sizes
                        )
                        predictions_train = predict(X_train, trained_weights)
                        predictions_validation = predict(X_validation, trained_weights)
                        predictions_test = predict(X_test, trained_weights)

                        # Zaokrąglamy wyniki do wartości 0 lub 1 dla porównań
                        # y_pred = (predictions_test > 0.5).astype(int)
                        y_pred_class = np.argmax(predictions_test, axis=1)
                        y_test_class = np.argmax(y_test, axis=1)

                        # Obliczamy metryki
                        accuracy, precision, recall, f1 = calculate_metrics(y_test_class, y_pred_class)

                        # Dodajemy metryki do list
                        accuracies.append(accuracy)
                        precisions.append(precision)
                        recalls.append(recall)
                        f1_scores.append(f1)

                        # Zbieramy najlepsze wyniki
                        best_accuracy = max(best_accuracy, accuracy)
                        best_precision = max(best_precision, precision)
                        best_recall = max(best_recall, recall)
                        best_f1 = max(best_f1, f1)

    # Obliczanie średnich wartości dla tej konfiguracji
    avg_accuracy = np.mean(accuracies)
    avg_precision = np.mean(precisions)
    avg_recall = np.mean(recalls)
    avg_f1 = np.mean(f1_scores)

    # Dodanie wyników do tabeli
    results.append({
        'hidden_layers': hidden_layers_sizes,
        'avg_accuracy': avg_accuracy,
        'avg_precision': avg_precision,
        'avg_recall': avg_recall,
        'avg_f1': avg_f1,
        'best_accuracy': best_accuracy,
        'best_precision': best_precision,
        'best_recall': best_recall,
        'best_f1': best_f1
    })

# Tworzenie DataFrame z wynikami
results_df = pd.DataFrame(results)

# Wyświetlanie wyników
print(results_df)

  hidden_layers  avg_accuracy  avg_precision  avg_recall    avg_f1  \
0          [24]      0.696276       0.677132    0.630186  0.636177   
1      [24, 24]      0.707265       0.699820    0.657815  0.671055   

   best_accuracy  best_precision  best_recall   best_f1  
0       0.704212        0.703571     0.639479  0.653518  
1       0.722527        0.728770     0.694316  0.705387  


In [6]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# =======================
# Parametry symulacji
# =======================
learning_rates = [0.01]
learning_rate_adjusts = [0.5]
epochses = [1000]
repeat = 3
hidden_layers_sizes_list = [[10], [10, 10]]

# =======================
# Wczytanie danych
# =======================
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Wczytanie gotowego pliku z podziałem
df = pd.read_excel("sensor_readings_24.xlsx")

# Cechy
features = [f"US{i}" for i in range(1, 25)]

# Podział na X i y według kolumny 'Set'
X_train = df[df['Set']=='train'][features].values
y_train = df[df['Set']=='train']['Class'].values

X_test = df[df['Set']=='test'][features].values
y_test = df[df['Set']=='test']['Class'].values

le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

results = []

for lr in learning_rates:
    for lr_adj in learning_rate_adjusts:
        adjusted_lr = lr * lr_adj
        for epochs in epochses:
            for hidden_layers in hidden_layers_sizes_list:
                for r in range(repeat):
                    mlp = MLPClassifier(
                        hidden_layer_sizes=hidden_layers,
                        activation='logistic',
                        max_iter=epochs,
                        learning_rate_init=adjusted_lr,
                        random_state=r
                    )
                    mlp.fit(X_train, y_train)
                    
                    y_pred = mlp.predict(X_test)
                    
                    accuracy = accuracy_score(y_test, y_pred)
                    precision = precision_score(y_test, y_pred, average='macro', zero_division=0)
                    recall = recall_score(y_test, y_pred, average='macro', zero_division=0)
                    f1 = f1_score(y_test, y_pred, average='macro', zero_division=0)
                    
                    results.append({
                        "learning_rate": lr,
                        "lr_adjust": lr_adj,
                        "adjusted_lr": adjusted_lr,
                        "epochs": epochs,
                        "hidden_layers": hidden_layers,
                        "repeat": r+1,
                        "accuracy": accuracy,
                        "precision": precision,
                        "recall": recall,
                        "f1_score": f1
                    })

df_results = pd.DataFrame(results)
print(df_results)

# Opcjonalnie zapis
# df_results.to_csv("mlp_results.csv", index=False)

   learning_rate  lr_adjust  adjusted_lr  epochs hidden_layers  repeat  \
0           0.01        0.5        0.005    1000          [10]       1   
1           0.01        0.5        0.005    1000          [10]       2   
2           0.01        0.5        0.005    1000          [10]       3   
3           0.01        0.5        0.005    1000      [10, 10]       1   
4           0.01        0.5        0.005    1000      [10, 10]       2   
5           0.01        0.5        0.005    1000      [10, 10]       3   

   accuracy  precision    recall  f1_score  
0  0.945055   0.937405  0.938283  0.937776  
1  0.906593   0.898534  0.903984  0.901140  
2  0.934066   0.915314  0.929151  0.921961  
3  0.927656   0.914266  0.920790  0.917019  
4  0.941392   0.932171  0.933970  0.933051  
5  0.926740   0.923966  0.909235  0.916296  
