## Klasyfikacja
Sieć perceptronowa, wielowarstwowa własna

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder

np.random.seed(42)

# Wczytanie danych
data = pd.read_csv('star_classification.csv', delimiter=",")

# Wybrane cechy
features = ['alpha','delta','u','g','r','i','z','redshift']

for col in features:
    mean = data[col].mean()
    std = data[col].std()
    data = data[(data[col] >= mean - 3*std) & (data[col] <= mean + 3*std)]

le = LabelEncoder()
data['class_encoded'] = le.fit_transform(data['class'])  # GALAXY=0, STAR=1, QSO=2

# min_count = data['class_encoded'].value_counts().min()
# balanced_data = pd.concat([
#     df.sample(min_count, random_state=42)
#     for _, df in data.groupby('class_encoded')
# ])

# # Funkcja split_data działa na macierzy numpy, więc tworzymy macierz danych
# X = balanced_data[features].values
# y_encoded = balanced_data['class_encoded'].values
# y = np.eye(3)[y_encoded]  # one-hot

# # Łączymy X i y, żeby podział był zsynchronizowany
# combined = np.concatenate((X, y), axis=1)

X = data[features].values
y_encoded = data['class_encoded'].values
y = np.eye(3)[y_encoded]
combined = np.concatenate((X, y), axis=1)

# Podział danych na zbiory treningowe, generalizacyjne i walidacyjne
def split_data(data, train_ratio=0.6, validation_ratio=0.2):
    np.random.shuffle(data) # tasowanie danych
    
    train_size = int(len(data) * train_ratio) 
    validation_size = int(len(data) * validation_ratio)

    train_data = data[:train_size] # wybiera obserwacje do liczby "train_size"
    validation_data = data[train_size:train_size + validation_size] # wybiera obserwacje od "train_size" do sumy "train_size" i "validation_size"
    test_data = data[train_size + validation_size:] # wybiera obserwacje od powyzszej sumy do końca

    return train_data, validation_data, test_data

train_data, validation_data, test_data = split_data(combined)

# --- BALANSOWANIE TYLKO ZBIORU TRENINGOWEGO ---
train_df = pd.DataFrame(
    train_data,
    columns=features + ['c0','c1','c2']
)
train_df['class'] = np.argmax(train_df[['c0','c1','c2']].values, axis=1)

min_count = train_df['class'].value_counts().min()

balanced_train_df = pd.concat([
    df.sample(min_count, random_state=42)
    for _, df in train_df.groupby('class')
])

train_data = balanced_train_df.drop(columns='class').values

# Oddzielamy cechy i klasy
X_train = train_data[:, :len(features)]
y_train = train_data[:, len(features):]
X_validation = validation_data[:, :len(features)]
y_validation = validation_data[:, len(features):]
X_test = test_data[:, :len(features)]
y_test = test_data[:, len(features):]

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_validation = scaler.transform(X_validation)
X_test = scaler.transform(X_test)

# --- PODSUMOWANIE ---
print("Rozmiary zbiorów:")
print(f"Train: {X_train.shape}, Validation: {X_validation.shape}, Test: {X_test.shape}")

# Liczebność klas w train
train_classes = np.argmax(y_train, axis=1)
unique, counts = np.unique(train_classes, return_counts=True)
print("\nLiczebność klas w zbiorze treningowym:")
print(dict(zip(unique, counts)))


Rozmiary zbiorów:
Train: (30696, 8), Validation: (19562, 8), Test: (19564, 8)

Liczebność klas w zbiorze treningowym:
{0: 10232, 1: 10232, 2: 10232}


In [11]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from imblearn.over_sampling import RandomOverSampler
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Funkcja aktywacji: Sigmoid
def sigmoid(x, derivative=False):
    if derivative:
        return x * (1 - x)
    return 1 / (1 + np.exp(-x))

def softmax(x):
    exps = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exps / np.sum(exps, axis=1, keepdims=True)

def relu(x, derivative=False):
    if derivative:
        return (x > 0).astype(float)
    return np.maximum(0, x)

# Inicjalizacja wag sieci dla wielu warstw ukrytych
# def initialize_weights(input_size, hidden_layers_sizes, output_size):
#     weights = []
#     layer_sizes = [input_size] + hidden_layers_sizes + [output_size]
#     for i in range(len(layer_sizes) - 1):
#         weights.append(2 * np.random.random((layer_sizes[i], layer_sizes[i+1])) - 1)
#     return weights

def initialize_weights(input_size, hidden_layers_sizes, output_size):
    weights = []
    layer_sizes = [input_size] + hidden_layers_sizes + [output_size]
    for i in range(len(layer_sizes) - 1):
        weights.append(np.random.randn(layer_sizes[i], layer_sizes[i+1]) * np.sqrt(2 / layer_sizes[i]))
    return weights

# Podział danych na zbiory treningowe, generalizacyjne i walidacyjne
def split_data(data, train_ratio=0.6, validation_ratio=0.2):
    np.random.shuffle(data) # tasowanie danych
    
    train_size = int(len(data) * train_ratio) 
    validation_size = int(len(data) * validation_ratio)

    train_data = data[:train_size] # wybiera obserwacje do liczby "train_size"
    validation_data = data[train_size:train_size + validation_size] # wybiera obserwacje od "train_size" do sumy "train_size" i "validation_size"
    test_data = data[train_size + validation_size:] # wybiera obserwacje od powyzszej sumy do końca

    return train_data, validation_data, test_data

# Funkcja dostosowująca tempa nauki
def adjust_learning_rate(learning_rate, mse, previous_mse, learning_rate_adjust, threshold=0.001):
    if mse < previous_mse:
        learning_rate *= 1.1  # jeśli błąd maleje, to zwiększa współczynnik uczenia o 10%
    else:
        learning_rate *= 0.5  # jeśli błąd nie maleje, to zmniejszamy współczynnik uczenia o 50%

    if np.abs(mse - previous_mse) < threshold: #jeśli różnica między błędami jest mniejsza niż dany próg to zmniejszamy learning rate, bo zbliżamy się do optymalnej konfiguracji
        learning_rate *= learning_rate_adjust

    return learning_rate

# Trening sieci z wieloma warstwami ukrytymi
def train(X, y, learning_rate, learning_rate_adjust, epochs,
          hidden_layers_sizes, optimizer, momentum):

    input_size = X.shape[1]
    output_size = y.shape[1]
    weights = initialize_weights(input_size, hidden_layers_sizes, output_size)
    velocities = [np.zeros_like(w) for w in weights]


    for epoch in range(epochs):
        # Forward pass
        activations = [X]
        zs = []
        for i, w in enumerate(weights):
            z = np.dot(activations[-1], w)
            if i == len(weights) - 1:
                activations.append(softmax(z))
            else:
                activations.append(relu(z))
        predicted_output = activations[-1]

        # Backpropagation
        error = predicted_output - y  # cross-entropy gradient
        deltas = [error] 
        for i in range(len(weights) - 1, 0, -1):
            delta = deltas[-1].dot(weights[i].T) * relu(activations[i], derivative=True)
            deltas.append(delta)

        deltas.reverse() 

        # Aktualizacja wag
        # for i in range(len(weights)):
        #     weights[i] += activations[i].T.dot(deltas[i]) * learning_rate

        # # Dostosowanie learning rate
        # learning_rate = adjust_learning_rate(
        #     learning_rate,
        #     np.mean(error ** 2),
        #     np.mean((y - predicted_output) ** 2),
        #     learning_rate_adjust
        # )

        # for i in range(len(weights)):
        #     weights[i] -= learning_rate * activations[i].T.dot(deltas[i]) / X.shape[0]
        for i in range(len(weights)):
            grad = activations[i].T.dot(deltas[i]) / X.shape[0]

            if optimizer == 'gd':
                weights[i] -= learning_rate * grad

            elif optimizer == 'momentum':
                velocities[i] = momentum * velocities[i] - learning_rate * grad
                weights[i] += velocities[i]

    return weights


def predict(X, weights):
    output = X
    for i, w in enumerate(weights):
        output = np.dot(output, w)
        if i < len(weights) - 1:
            output = relu(output)
        else:
            output = softmax(output)
    return output

# Funkcja obliczająca błąd predykcji
def calculate_error(predictions, labels):
    return np.mean(np.abs(predictions - labels))

def correct(y, predictions):
    # y i predictions są one-hot encoded
    y_class = np.argmax(y, axis=1)
    pred_class = np.argmax(predictions, axis=1)
    correct_predictions = np.sum(y_class == pred_class)
    return correct_predictions / len(y)

# Parametry sieci
learning_rates = [0.01]
learning_rate_adjusts = [0.0005]
epochses = [1000]
repeat = 3
optimizers = ['gd', 'momentum']
momentums = [0.0, 0.9]
# gd → zwykły gradient prosty
# momentum → gradient z momentem

# Warstwy
hidden_layers_sizes_list = [
    [10],         
    [10, 10]       
]

# Funkcja obliczająca wyniki dla accuracy, precision, recall, F1
def calculate_metrics(y_test_class, y_pred_class):
    accuracy = accuracy_score(y_test_class, y_pred_class)
    precision = precision_score(y_test_class, y_pred_class, average='macro')
    recall = recall_score(y_test_class, y_pred_class, average='macro')
    f1 = f1_score(y_test_class, y_pred_class, average='macro')
    
    return accuracy, precision, recall, f1

# Przechowywanie wyników dla różnych konfiguracji warstw
results = []

# Testowanie
for hidden_layers_sizes in hidden_layers_sizes_list:  
    for r in range(1, repeat + 1):
        for lr in learning_rates:
            for lr_adj in learning_rate_adjusts:
                for epochs in epochses:
                    for optimizer in optimizers:
                        for momentum in momentums:
                            trained_weights = train(
                                X_train, y_train, lr, lr_adj, epochs, hidden_layers_sizes, optimizer=optimizer, momentum=momentum
                            )
                            predictions_train = predict(X_train, trained_weights)
                            predictions_validation = predict(X_validation, trained_weights)
                            predictions_test = predict(X_test, trained_weights)

                            # Zaokrąglamy wyniki do wartości 0 lub 1 dla porównań
                            # y_pred = (predictions_test > 0.5).astype(int)
                            y_pred_class = np.argmax(predictions_test, axis=1)
                            y_test_class = np.argmax(y_test, axis=1)

                            # Obliczamy metryki
                            accuracy, precision, recall, f1 = calculate_metrics(y_test_class, y_pred_class)

                            # Dodanie wyników do tabeli
                            results.append({
                                'hidden_layers': str(hidden_layers_sizes),
                                'optimizer': optimizer,
                                'momentum': momentum,
                                'learning_rate': lr,
                                'learning_rate_adjust': lr_adj,
                                'epochs': epochs,
                                'repeat': r,
                                'accuracy': accuracy,
                                'precision': precision,
                                'recall': recall,
                                'f1': f1
                            })

# Tworzenie DataFrame z wynikami
results_df = pd.DataFrame(results)

# Wyświetlanie wyników
print(results_df)
print(results_df.head())

summary = results_df.groupby(
    ['hidden_layers', 'learning_rate', 'optimizer', 'momentum']
).agg(
    avg_accuracy=('accuracy', 'mean'),
    avg_precision=('precision', 'mean'),
    avg_recall=('recall', 'mean'),
    avg_f1=('f1', 'mean'),
    best_accuracy=('accuracy', 'max'),
    best_f1=('f1', 'max')
).reset_index()

print(summary)


   hidden_layers optimizer  momentum  learning_rate  learning_rate_adjust  \
0           [10]        gd       0.0           0.01                0.0005   
1           [10]        gd       0.9           0.01                0.0005   
2           [10]  momentum       0.0           0.01                0.0005   
3           [10]  momentum       0.9           0.01                0.0005   
4           [10]        gd       0.0           0.01                0.0005   
5           [10]        gd       0.9           0.01                0.0005   
6           [10]  momentum       0.0           0.01                0.0005   
7           [10]  momentum       0.9           0.01                0.0005   
8           [10]        gd       0.0           0.01                0.0005   
9           [10]        gd       0.9           0.01                0.0005   
10          [10]  momentum       0.0           0.01                0.0005   
11          [10]  momentum       0.9           0.01                0.0005   

## Sieć z biblioteki

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# =======================
# Parametry symulacji
# =======================
learning_rates = [0.01]
learning_rate_adjusts = [0.5]
epochses = [1000]
repeat = 3
hidden_layers_sizes_list = [[10], [10, 10]]
optimizers = ['gd', 'momentum']
momentums = [0.0, 0.9]

results = []

# =======================
# Wczytanie danych
# =======================
data = pd.read_csv('star_classification.csv', delimiter=",")

features = ['alpha','delta','u','g','r','i','z','redshift']

# Usuwanie wartości odstających
for col in features:
    mean = data[col].mean()
    std = data[col].std()
    data = data[(data[col] >= mean - 3*std) & (data[col] <= mean + 3*std)]

# Kodowanie klas
le = LabelEncoder()
data['class_encoded'] = le.fit_transform(data['class'])  # GALAXY=0, STAR=1, QSO=2

# Dane i etykiety
X = data[features].values
y = data['class_encoded'].values

# Łączymy X i y, żeby podział był zsynchronizowany
combined = np.concatenate((X, y.reshape(-1,1)), axis=1)

# def split_data(data, train_ratio=0.6, validation_ratio=0.2):
#     np.random.shuffle(data)
#     train_size = int(len(data) * train_ratio)
#     validation_size = int(len(data) * validation_ratio)
#     train_data = data[:train_size]
#     validation_data = data[train_size:train_size + validation_size]
#     test_data = data[train_size + validation_size:]
#     return train_data, validation_data, test_data

train_data, validation_data, test_data = split_data(combined)

train_df = pd.DataFrame(train_data, columns=features + ['class'])
min_count = train_df['class'].value_counts().min()
balanced_train_df = pd.concat([
    df.sample(min_count, random_state=42)
    for _, df in train_df.groupby('class')
])
train_data = balanced_train_df.values

X_train = train_data[:, :-1]
y_train = train_data[:, -1].astype(int)
X_validation = validation_data[:, :-1]
y_validation = validation_data[:, -1].astype(int)
X_test = test_data[:, :-1]
y_test = test_data[:, -1].astype(int)

# Skalowanie
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_validation = scaler.transform(X_validation)
X_test = scaler.transform(X_test)

results = []

for lr in learning_rates:
    for lr_adj in learning_rate_adjusts:
        adjusted_lr = lr * lr_adj
        for epochs in epochses:
            for hidden_layers in hidden_layers_sizes_list:
                for r in range(repeat):
                    for optimizer in optimizers:
                        for momentum in momentums:
                            # Dobór parametrów dla MLPClassifier
                            if optimizer == 'gd':
                                clf_momentum = 0.0
                            else:  # 'momentum'
                                clf_momentum = momentum
                            
                            mlp = MLPClassifier(
                                hidden_layer_sizes=hidden_layers,
                                activation='logistic',
                                max_iter=epochs,
                                solver='sgd',
                                learning_rate_init=adjusted_lr,
                                momentum=clf_momentum,
                                random_state=r
                            )
                            mlp.fit(X_train, y_train)
                            
                            y_pred = mlp.predict(X_test)
                            
                            accuracy = accuracy_score(y_test, y_pred)
                            precision = precision_score(y_test, y_pred, average='macro', zero_division=0)
                            recall = recall_score(y_test, y_pred, average='macro', zero_division=0)
                            f1 = f1_score(y_test, y_pred, average='macro', zero_division=0)
                            
                            results.append({
                                "hidden_layers": str(hidden_layers_sizes),
                                "optimizer": optimizer,
                                "momentum": momentum,
                                "learning_rate": lr,
                                "learning_rate_adjust": lr_adj,
                                "adjusted_lr": adjusted_lr,
                                "epochs": epochs,
                                "repeat": r+1,
                                "accuracy": accuracy,
                                "precision": precision,
                                "recall": recall,
                                "f1_score": f1
                            })

df_results = pd.DataFrame(results)
print(df_results)

# Opcjonalnie zapis
# df_results.to_csv("mlp_results.csv", index=False)

summary = df_results.groupby(
    ['hidden_layers', 'learning_rate', 'optimizer', 'momentum']
).agg(
    avg_accuracy=('accuracy', 'mean'),
    avg_precision=('precision', 'mean'),
    avg_recall=('recall', 'mean'),
    avg_f1=('f1_score', 'mean'),
    best_accuracy=('accuracy', 'max'),
    best_f1=('f1_score', 'max')
).reset_index()

print("\nPodsumowanie wyników:")
print(summary)


   hidden_layers optimizer  momentum  learning_rate  learning_rate_adjust  \
0           [10]        gd       0.0           0.01                   0.5   
1           [10]        gd       0.9           0.01                   0.5   
2           [10]  momentum       0.0           0.01                   0.5   
3           [10]  momentum       0.9           0.01                   0.5   
4           [10]        gd       0.0           0.01                   0.5   
5           [10]        gd       0.9           0.01                   0.5   
6           [10]  momentum       0.0           0.01                   0.5   
7           [10]  momentum       0.9           0.01                   0.5   
8           [10]        gd       0.0           0.01                   0.5   
9           [10]        gd       0.9           0.01                   0.5   
10          [10]  momentum       0.0           0.01                   0.5   
11          [10]  momentum       0.9           0.01                   0.5   

TypeError: unhashable type: 'list'