In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from keras.utils import np_utils
from sklearn.model_selection import train_test_split
import numpy as np

path = "../../../dane/8CPU_20RAM/3600s/all_merged.csv"

In [2]:
# Załaduj dane
df = pd.read_csv(path)

In [3]:
for col in ['replicaId', 'endpointUrl_methods']:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])

In [4]:
# Dodajemy skalowanie dla wybranych kolumn
features_to_scale = ['queueSizeForward_methods', 'queueSizeBack_methods',
                     'cpuUsage_stock', 'memoryUsage_stock',
                     'applicationTime_trading', 'databaseTime_trading',
                     'numberOfSellOffers_trading', 'numberOfBuyOffers_trading']
scaler = StandardScaler()
for col in features_to_scale:
    df[col] = scaler.fit_transform(df[col].values.reshape(-1, 1))

In [5]:
# Wybór cech
features = [
            # 'timestamp',
            # 'apiTime_methods',
            # 'applicationTime_methods',
            # 'databaseTime_methods',
            'endpointUrl_methods',
            # 'queueSizeForward_methods',
            # 'queueSizeBack_methods',
            'cpuUsage_stock',
            'memoryUsage_stock',
            # 'applicationTime_trading',
            # 'databaseTime_trading',
            # 'numberOfSellOffers_trading',
            # 'numberOfBuyOffers_trading',
            # 'cpuUsage_traffic',
            # 'memoryUsage_traffic',
            # 'replicaId'
            ]
df_features = df[features]

# Kodowanie pudełek
le = LabelEncoder()
df['test'] = le.fit_transform(df['test'])

# Kopiowanie danych
df_encoded = df_features.copy()
df_encoded['test'] = df['test']

In [6]:
# Stwórz puste listy do przechowywania danych treningowych i testowych
X_train = []
X_test = []
y_train = []
y_test = []

In [7]:
def create_windows(X, y, window_size, step_size):
    X_windows = []
    y_windows = []

    # Przesuń okno po danych
    for i in range(0, len(X) - window_size, step_size):
        # Utwórz okno danych
        X_window = X.iloc[i:i + window_size]
        # Utwórz etykietę dla okna (etykieta ostatniej obserwacji w oknie)
        y_window = y.iloc[i + window_size]

        X_windows.append(X_window.values)
        y_windows.append(y_window)

    return np.array(X_windows), np.array(y_windows)

In [8]:
window_size = 700
step_size = 100
# Dla każdego unikalnego testu
for box in df_encoded['test'].unique():
    # Wybierz tylko rekordy dla tego testu
    box_data = df_encoded[df_encoded['test'] == box]
    
    # Oblicz punkt podziału (80% danych)
    split_point = int(len(box_data) * 0.8)
    
    # Dodaj pierwsze 80% rekordów do danych treningowych
    X_train_box = box_data.drop('test', axis=1).iloc[:split_point]
    y_train_box = box_data['test'].iloc[:split_point]
    
    # Dodaj ostatnie 20% rekordów do danych testowych
    X_test_box = box_data.drop('test', axis=1).iloc[split_point:]
    y_test_box = box_data['test'].iloc[split_point:]
    
    # Stwórz okienka dla danych treningowych
    X_train_windows, y_train_windows = create_windows(X_train_box, y_train_box, window_size, step_size)
    
    # Stwórz okienka dla danych testowych
    X_test_windows, y_test_windows = create_windows(X_test_box, y_test_box, window_size, step_size)
    
    # Dodaj okienka do list
    X_train.append(X_train_windows)
    y_train.append(y_train_windows)
    X_test.append(X_test_windows)
    y_test.append(y_test_windows)

In [9]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense
from keras.utils import to_categorical

In [10]:
# Połączenie danych treningowych dla wszystkich testów
X_train_combined = np.concatenate(X_train, axis=0)
y_train_combined = np.concatenate(y_train, axis=0)

In [11]:
# Zakodowanie etykiet w formacie one-hot encoding
y_train_combined_encoded = to_categorical(y_train_combined, num_classes=len(df['test'].unique()))

In [12]:
# Trenowanie modelu na połączonym zestawie danych
model = Sequential()    
model.add(GRU(50, return_sequences=False, input_shape=(window_size, X_train_combined.shape[2])))
model.add(Dense(len(df['test'].unique()), activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train_combined, y_train_combined_encoded, epochs=25, batch_size=32)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x225dbe89f10>

In [13]:
for i in range(len(X_test)):
    y_test_encoded = to_categorical(y_test[i], num_classes=len(df['test'].unique()))
    loss, accuracy = model.evaluate(X_test[i], y_test_encoded)
    print(f"Test {i+1}: Loss = {loss}, Accuracy = {accuracy}")

Test 1: Loss = 1.1584311723709106, Accuracy = 0.6832239627838135
Test 2: Loss = 2.2652127742767334, Accuracy = 0.33176690340042114
Test 3: Loss = 2.2505533695220947, Accuracy = 0.14547304809093475
Test 4: Loss = 3.4733963012695312, Accuracy = 0.027566539123654366
Test 5: Loss = 0.2091863453388214, Accuracy = 0.9334140419960022
Test 6: Loss = 0.8425816893577576, Accuracy = 0.8392664790153503
Test 7: Loss = 0.16061608493328094, Accuracy = 0.9483470916748047
Test 8: Loss = 0.2715919315814972, Accuracy = 0.8960176706314087
Test 9: Loss = 0.020031215623021126, Accuracy = 1.0
Test 10: Loss = 1.1802853345870972, Accuracy = 0.5957926511764526
Test 11: Loss = 3.269630193710327, Accuracy = 0.2142857164144516
Test 12: Loss = 1.402982473373413, Accuracy = 0.625
Test 13: Loss = 0.4548487663269043, Accuracy = 0.9718309640884399
Test 14: Loss = 2.1806986331939697, Accuracy = 0.13946117460727692
Test 15: Loss = 3.0843420028686523, Accuracy = 0.3117647171020508
Test 16: Loss = 2.3961706161499023, Accur