In [1]:
import os
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [2]:
def load_data(parent_folder):
    X = []
    y = []
    phoneme_classes = sorted(os.listdir(parent_folder))
    for phoneme_class in phoneme_classes:
        class_folder = os.path.join(parent_folder, phoneme_class)
        for file in os.listdir(class_folder):
            if file.endswith('.npy'):
                cepstral_coeff = np.load(os.path.join(class_folder, file))
                X.append(cepstral_coeff)
                y.append(phoneme_class)
    return np.array(X), np.array(y)

In [3]:
X, y = load_data('cepstral_coeff')
print(X.shape)
print(y.shape)

(3737, 60)
(3737,)


In [4]:
from sklearn.preprocessing import LabelEncoder

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train_reshaped = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test_reshaped = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# Convert string labels to integer indices
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

In [5]:
model = tf.keras.Sequential([
    tf.keras.layers.LSTM(64, input_shape=(60, 1), return_sequences=True),
    tf.keras.layers.LSTM(64),
    tf.keras.layers.Dense(len(np.unique(y)), activation='softmax')
])

  super().__init__(**kwargs)


In [6]:
model.summary()

In [7]:
# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train_reshaped, y_train_encoded, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 33ms/step - accuracy: 0.0503 - loss: 3.5274 - val_accuracy: 0.2107 - val_loss: 2.6973
Epoch 2/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 31ms/step - accuracy: 0.3074 - loss: 2.3232 - val_accuracy: 0.5836 - val_loss: 1.5140
Epoch 3/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 31ms/step - accuracy: 0.6557 - loss: 1.3721 - val_accuracy: 0.9231 - val_loss: 0.9243
Epoch 4/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 31ms/step - accuracy: 0.8815 - loss: 0.8939 - val_accuracy: 0.9431 - val_loss: 0.6276
Epoch 5/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 34ms/step - accuracy: 0.9575 - loss: 0.6074 - val_accuracy: 0.9799 - val_loss: 0.5055
Epoch 6/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 40ms/step - accuracy: 0.9851 - loss: 0.4510 - val_accuracy: 1.0000 - val_loss: 0.2706
Epoch 7/10
[1m75/75[0m [32m━━━━

<keras.src.callbacks.history.History at 0x24d2703e650>

In [8]:
test_loss, test_accuracy = model.evaluate(X_test_reshaped, y_test_encoded)
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 1.0000 - loss: 0.0813
Test Loss: 0.07845021039247513, Test Accuracy: 1.0
