In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report, accuracy_score

# Load the dataset
data = pd.read_csv('C:/Users/abdulssekyanzi/EDA Dataset.csv/100.csv')  # Replace with your actual file path

# Drop unnecessary columns
data = data.drop(columns=['Unnamed: 0', 'time_ms'], errors='ignore')

# Assume the last column is the target label
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Encode the target labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Find unique class and their counts.
unique_classes, class_counts = np.unique(y, return_counts=True)
print("Class Counts:", dict(zip(unique_classes, class_counts)))

# Identify classes with only one sample
single_sample_classes = unique_classes[class_counts == 1]

# Remove rows corresponding to single-sample classes
for cls in single_sample_classes:
    data = data[data.iloc[:, -1] != label_encoder.inverse_transform([cls])[0]]

# Re-extract X and y after removing rows
X = data.iloc[:, :-1].values
y = label_encoder.fit_transform(data.iloc[:, -1].values)

# Feature scaling
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Reshape the data for RNN/LSTM (samples, timesteps, features)
X = X.reshape((X.shape[0], 1, X.shape[1]))

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Build the LSTM model
model = Sequential()
model.add(LSTM(64, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(0.3))
model.add(LSTM(32, return_sequences=False))
model.add(Dropout(0.3))
model.add(Dense(32, activation='relu'))
model.add(Dense(len(np.unique(y)), activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Print the model summary
model.summary()

# Early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train the model
history = model.fit(
    X_train, y_train,
    epochs=10,
    batch_size=16,
    validation_data=(X_test, y_test),
    callbacks=[early_stopping]
)

# Predict on the test set
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred_classes)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

# Classification report
print(classification_report(y_test, y_pred_classes, target_names=label_encoder.classes_))

# Plot training history
plt.figure(figsize=(12, 4))

# Plot loss
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

# Plot accuracy
plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

Class Counts: {np.int64(0): np.int64(2), np.int64(1): np.int64(1), np.int64(2): np.int64(1), np.int64(3): np.int64(1), np.int64(4): np.int64(1), np.int64(5): np.int64(1), np.int64(6): np.int64(1), np.int64(7): np.int64(1), np.int64(8): np.int64(1), np.int64(9): np.int64(1), np.int64(10): np.int64(1), np.int64(11): np.int64(1), np.int64(12): np.int64(1), np.int64(13): np.int64(1), np.int64(14): np.int64(1), np.int64(15): np.int64(1), np.int64(16): np.int64(1), np.int64(17): np.int64(1), np.int64(18): np.int64(2), np.int64(19): np.int64(1), np.int64(20): np.int64(3), np.int64(21): np.int64(3), np.int64(22): np.int64(1), np.int64(23): np.int64(2), np.int64(24): np.int64(1), np.int64(25): np.int64(3), np.int64(26): np.int64(3), np.int64(27): np.int64(3), np.int64(28): np.int64(1), np.int64(29): np.int64(1), np.int64(30): np.int64(1), np.int64(31): np.int64(1), np.int64(32): np.int64(2), np.int64(33): np.int64(2), np.int64(34): np.int64(2), np.int64(35): np.int64(4), np.int64(36): np.int64(

  super().__init__(**kwargs)


Epoch 1/10
[1m32498/32498[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 6ms/step - accuracy: 0.0275 - loss: 4.3052 - val_accuracy: 0.0289 - val_loss: 4.2043
Epoch 2/10
[1m32498/32498[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m201s[0m 6ms/step - accuracy: 0.0295 - loss: 4.2138 - val_accuracy: 0.0299 - val_loss: 4.2030
Epoch 3/10
[1m32498/32498[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m237s[0m 7ms/step - accuracy: 0.0296 - loss: 4.2102 - val_accuracy: 0.0294 - val_loss: 4.2026
Epoch 4/10
[1m32498/32498[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m274s[0m 8ms/step - accuracy: 0.0297 - loss: 4.2122 - val_accuracy: 0.0302 - val_loss: 4.2022
Epoch 5/10
[1m32498/32498[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m251s[0m 8ms/step - accuracy: 0.0303 - loss: 4.2081 - val_accuracy: 0.0301 - val_loss: 4.2027
Epoch 6/10
[1m32498/32498[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m258s[0m 8ms/step - accuracy: 0.0295 - loss: 4.2112 - val_accuracy: 0.0297 - val_loss:

ValueError: Number of classes, 429, does not match size of target_names, 440. Try specifying the labels parameter