In [1]:
# ### **1. Set up your environment**

# For this project, we'll be using Python and a few specific libraries. Before we begin, you'll need to set up your environment.

# - First, download and install Python (If it's not already installed)
# - Install required Libraries:
    ```
!pip install keras
!pip install tensorflow
!pip install librosa
!pip install scipy
!pip install sklearn


Collecting keras
  Using cached keras-2.13.1-py3-none-any.whl (1.7 MB)
Installing collected packages: keras
Successfully installed keras-2.13.1

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3.1[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Collecting tensorflow
  Using cached tensorflow-2.13.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (524.1 MB)
Collecting tensorboard<2.14,>=2.13
  Using cached tensorboard-2.13.0-py3-none-any.whl (5.6 MB)
Collecting google-pasta>=0.1.1
  Using cached google_pasta-0.2.0-py3-none-any.whl (57 kB)
Collecting flatbuffers>=23.1.21
  Using cached flatbuffers-23.5.26-py2.py3-none-any.whl (26 kB)
Collecting numpy<=1.24.3,>=1.22
  Using cached numpy-1.24.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)
Collecting gast<=0.4.0,>=0.2.1
  Using cached gast-0.4.0-py3-none-any.w

In [1]:
### **2. Data Collection**

# You can use datasets like Google Speech Commands Dataset which contains over 100,000 audio files of spoken words like "Yes", "No", "Stop", "Go", etc. 

# ```bash
# wget http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz
# tar xzf speech_commands_v0.02.tar.gz


In [2]:
### **3. Data Preprocessing**

# In this step, audio files need to be converted into a numerical form to feed the model. The librosa library provides functionality for this.
import librosa
def wav2mfcc(file_path, max_len=11):
    wave, sr = librosa.load(file_path, mono=True, sr=None)
    wave = wave[::3]
    mfcc = librosa.feature.mfcc(wave, sr=16000)

    # If maximum length exceeds mfcc lengths then pad the remaining ones
    if max_len > mfcc.shape[1]:
        pad_width = max_len - mfcc.shape[1]
        mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')

    # Else cut off the remaining parts
    else:
        mfcc = mfcc[:, :max_len]
    
    return mfcc


In [None]:
### **4. Load the data**

# Load the processed data. Split them into training and testing sets, considering the balance in datasets.


from sklearn.model_selection import train_test_split

X = np.array(features)
y = np.array(labels)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify =y)



In [None]:
### **5. Building the Model**

# You can use either a CNN or an RNN for this task.

# **CNN:**


from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D

model = Sequential()
model.add(Conv2D(32, kernel_size=(2, 2), activation='relu', input_shape=(20, 11, 1)))
model.add(Conv2D(48, kernel_size=(2, 2), activation='relu'))
model.add(Conv2D(120, kernel_size=(2, 2), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(num_classes, activation='softmax'))
```

# **RNN:**

# ```python
# from keras.layers import LSTM

# model = Sequential()
# model.add(LSTM(100, return_sequences=True, input_shape=(20, 11)))
# model.add(LSTM(100))
# model.add(Dropout(0.5))
# model.add(Dense(50, activation='relu'))
# model.add(Dense(num_classes, activation='softmax'))


In [None]:
### **6. Compile and Train the Model**

# Compile the model using an optimizer and a suitable loss function depending upon the problem. Here we will use Adam optimizer and categorical cross-entropy loss.


model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(), metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=100, epochs=200, verbose=1, validation_data=(X_test, y_test))


In [None]:
### **7. Test the Model**

# You can test the model using test data and visualize the results of the model using a confusion matrix or simply measure the accuracy.

score = model.evaluate(X_test, y_test, verbose=1)
print("Test Score:", score[0])
print("Test Accuracy:", score[1])
