In [2]:
import numpy as np
import librosa
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pandas as pd
from tqdm import tqdm

In [3]:
csv_path = r"tensorflow-speech-recognition-challenge\train"
train_df = pd.read_csv(f"{csv_path}/train.csv")
test_df = pd.read_csv(f"{csv_path}/test.csv")
label_list = list(train_df['word'].unique())
target_length = 64480

In [29]:
def audio_to_array(file_path):
	audio, _ = librosa.load(file_path, sr=16000, dtype=np.float32)
	padded_audio = np.pad(audio, (0, max(0, target_length - len(audio))), mode='constant')
	return padded_audio

In [9]:
def load_audios(df):
  return [audio_to_array(path) for path in tqdm(df['path'])]

In [30]:
audios_temp = load_audios(train_df)


[A
[A
100%|██████████| 160/160 [00:00<00:00, 599.00it/s]


In [10]:
def compute_mfcc(audio, sr=16000, n_mfcc=13):
	return librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)

In [57]:
def mfcc_feature_extracion(df):
  return [compute_mfcc(audio) for audio in tqdm(load_audios(df)[:2])]

In [58]:
train_mfcc_features = mfcc_feature_extracion(train_df)
test_mfcc_features = mfcc_feature_extracion(test_df)


[A
[A
100%|██████████| 160/160 [00:00<00:00, 527.34it/s]

100%|██████████| 2/2 [00:00<00:00, 93.92it/s]

100%|██████████| 41/41 [00:00<00:00, 521.99it/s]

100%|██████████| 2/2 [00:00<00:00, 174.10it/s]


In [70]:
X_train = train_mfcc_features

# Assuming train_df['word'] contains the target labels
y_train = train_df['word'][:2]

In [71]:
from sklearn.ensemble import RandomForestClassifier


# Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

ValueError: Found array with dim 3. RandomForestClassifier expected <= 2.

In [49]:
print(len(X_train))  # Print the length of X_train
print(len(y_train))  # Print the length of y_train

# Check a few examples to ensure they match
for i in range(5):  # Print the first 5 examples for inspection
    print(X_train[i].shape, y_train[i])


2080
160
(126,) Fuck
(126,) Shit
(126,) Fuck
(126,) Shit
(126,) Shit


In [None]:
target_length = 50000  # Adjust this based on your dataset and requirements
padded_audio = load_audio('your_audio_file.wav', target_length=target_length)


In [None]:
mfcc_features = compute_mfcc(padded_audio)


In [4]:
import librosa
import numpy as np
import pandas as pd

# Load audio file and corresponding label
def load_audio(file_path):
    audio, sr = librosa.load(file_path, sr=None)  # Load audio file
    return audio, sr

# Load all audio files and labels into a list
audio_files = [...]  # List of file paths
labels = [label for label in train_df['word']]  # List of corresponding labels
audio_data = [(load_audio(file)) for file in train_df['path']]


In [7]:
# Function to extract MFCC features from audio
def extract_mfcc(audio, sr, num_mfcc=13, n_fft=2048, hop_length=512, pad_to=None):
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=num_mfcc, n_fft=n_fft, hop_length=hop_length)
    if pad_to:
        pad_width = pad_to - mfcc.shape[1]
        mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
    return mfcc.flatten()  # Flatten MFCC matrix

# Extract MFCC features for all audio data
mfcc_features = [extract_mfcc(audio, sr, pad_to=target_length) for (audio, sr) in audio_data]


In [8]:
# Create DataFrame with MFCC features and labels
df = pd.DataFrame(mfcc_features)
df['label'] = labels


In [9]:
from sklearn.ensemble import RandomForestClassifier

# Prepare features and target
X = df.drop('label', axis=1)
y = df['label']

# Initialize and train the random forest model
rf_model = RandomForestClassifier()

In [12]:
rf_model.fit(X, y)

In [14]:
test_audio_files = [...]  # List of file paths
test_labels = [label for label in test_df['word']]  # List of corresponding labels
test_audio_data = [(load_audio(file)) for file in test_df['path']]

test_mfcc_features = [extract_mfcc(audio, sr, pad_to=target_length) for (audio, sr) in test_audio_data]

test_df = pd.DataFrame(test_mfcc_features)
test_df['label'] = test_labels

In [15]:
test_X = test_df.drop('label', axis=1)
test_y = test_df['label']

In [16]:
y_pred = rf_model.predict(test_X)

In [17]:
from sklearn.metrics import classification_report

In [18]:
print(classification_report(test_y, y_pred))

              precision    recall  f1-score   support

        Fuck       0.83      0.90      0.86        21
        Shit       0.89      0.80      0.84        20

    accuracy                           0.85        41
   macro avg       0.86      0.85      0.85        41
weighted avg       0.86      0.85      0.85        41

