In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils
from sklearn.model_selection import train_test_split
import numpy as np

path = "../../dane/8CPU_20RAM/3600s/all_merged.csv"

In [2]:
# Załaduj dane
df = pd.read_csv(path)

In [3]:
for col in ['replicaId', 'endpointUrl_methods']:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])

In [4]:
# Wybór cech
features = [
            # 'timestamp',
            # 'apiTime_methods',
            # 'applicationTime_methods',
            # 'databaseTime_methods',
            'endpointUrl_methods',
            'queueSizeForward_methods',
            'queueSizeBack_methods',
            'cpuUsage_stock',
            'memoryUsage_stock',
            'applicationTime_trading',
            'databaseTime_trading',
            'numberOfSellOffers_trading',
            'numberOfBuyOffers_trading',
            # 'cpuUsage_traffic',
            # 'memoryUsage_traffic',
            # 'replicaId'
            ]
df_features = df[features]

# Kodowanie pudełek
le = LabelEncoder()
df['test'] = le.fit_transform(df['test'])

# Kopiowanie danych
df_encoded = df_features.copy()
df_encoded['test'] = df['test']

In [5]:
# Stwórz puste listy do przechowywania danych treningowych i testowych
X_train = []
X_test = []
y_train = []
y_test = []

In [6]:
def create_windows(X, y, window_size, step_size):
    X_windows = []
    y_windows = []

    # Przesuń okno po danych
    for i in range(0, len(X) - window_size, step_size):
        # Utwórz okno danych
        X_window = X.iloc[i:i + window_size]
        # Utwórz etykietę dla okna (etykieta ostatniej obserwacji w oknie)
        y_window = y.iloc[i + window_size]

        X_windows.append(X_window.values)
        y_windows.append(y_window)

    return np.array(X_windows), np.array(y_windows)

In [7]:
window_size = 500
step_size = 100
# Dla każdego unikalnego pudełka
for box in df_encoded['test'].unique():
    # Wybierz tylko rekordy dla tego pudełka
    box_data = df_encoded[df_encoded['test'] == box]
    
    # Oblicz punkt podziału (80% danych)
    split_point = int(len(box_data) * 0.8)
    
    # Dodaj pierwsze 80% rekordów do danych treningowych
    X_train_box = box_data.drop('test', axis=1).iloc[:split_point]
    y_train_box = box_data['test'].iloc[:split_point]
    
    # Dodaj ostatnie 20% rekordów do danych testowych
    X_test_box = box_data.drop('test', axis=1).iloc[split_point:]
    y_test_box = box_data['test'].iloc[split_point:]
    
    # Stwórz okienka dla danych treningowych
    X_train_windows, y_train_windows = create_windows(X_train_box, y_train_box, window_size, step_size)
    
    # Stwórz okienka dla danych testowych
    X_test_windows, y_test_windows = create_windows(X_test_box, y_test_box, window_size, step_size)
    
    # Dodaj okienka do list
    X_train.append(X_train_windows)
    y_train.append(y_train_windows)
    X_test.append(X_test_windows)
    y_test.append(y_test_windows)

In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from keras.utils import to_categorical

In [9]:
# Połączenie danych treningowych dla wszystkich pudełek
X_train_combined = np.concatenate(X_train, axis=0)
y_train_combined = np.concatenate(y_train, axis=0)

In [10]:
# Zakodowanie etykiet w formacie one-hot encoding
y_train_combined_encoded = to_categorical(y_train_combined, num_classes=len(df['test'].unique()))

In [11]:
# Trenowanie modelu na połączonym zestawie danych
model = Sequential()
model.add(LSTM(100, input_shape=(window_size, X_train_combined.shape[2])))
model.add(Dense(len(df['test'].unique()), activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train_combined, y_train_combined_encoded, epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2247fb76410>

In [13]:
for i in range(len(X_test)):
    y_test_encoded = to_categorical(y_test[i], num_classes=len(df['test'].unique()))
    loss, accuracy = model.evaluate(X_test[i], y_test_encoded)
    print(f"Test {i+1}: Loss = {loss}, Accuracy = {accuracy}")

Test 1: Loss = 3.0151193141937256, Accuracy = 0.0
Test 2: Loss = 2.921444892883301, Accuracy = 0.0
Test 3: Loss = 2.831681251525879, Accuracy = 0.0
Test 4: Loss = 2.6976773738861084, Accuracy = 0.0
Test 5: Loss = 3.0119175910949707, Accuracy = 0.0
Test 6: Loss = 2.482677459716797, Accuracy = 0.706135630607605
Test 7: Loss = 3.012112855911255, Accuracy = 0.0
Test 8: Loss = 2.932706117630005, Accuracy = 0.0
Test 9: Loss = 0.1999308317899704, Accuracy = 1.0
Test 10: Loss = 1.1300462484359741, Accuracy = 0.8739684820175171
Test 11: Loss = 2.6482632160186768, Accuracy = 0.0
Test 12: Loss = 2.439598560333252, Accuracy = 1.0
Test 13: Loss = 2.9731979370117188, Accuracy = 0.0
Test 14: Loss = 2.9023663997650146, Accuracy = 0.0
Test 15: Loss = 2.8445773124694824, Accuracy = 0.0
Test 16: Loss = 2.478774309158325, Accuracy = 0.0
Test 17: Loss = 2.1480462551116943, Accuracy = 0.4361269176006317
Test 18: Loss = 2.3695085048675537, Accuracy = 0.20079365372657776
