In [None]:
from cProfile import label
import librosa
import os
import json
import numpy as np

In [None]:
import tensorflow as tf

gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # Set only the first GPU as visible
        tf.config.set_visible_devices(gpus[0], 'GPU')
        # Allow memory growth to allocate memory dynamically on the GPU
        tf.config.experimental.set_memory_growth(gpus[0], True)
        print("GPU configuration successful.")
    except RuntimeError as e:
        print(e)
else:
    print("No GPU detected.")

In [None]:
DATASET_PATH = "/home/ec.gpu/Desktop/Soumen/kws/dataset_12/"
save_path = "/home/ec.gpu/Desktop/Soumen/kws/data_npy/"
SAMPLES_TO_CONSIDER = 16000 # 1 sec. of audio

In [None]:
# Define Hamming window function
def hamming_window(length):
    return 0.54 - 0.46 * np.cos(2 * np.pi * np.arange(length) / (length - 1))

In [None]:
keyword_list=['off', 'left', 'down', 'up', 'go', 'on', 'stop', 'unknown', 'no', 'right', 'yes']  #, 'silence'
keyword_index_dict={keyword_list[i]:i for i in range(len(keyword_list))}
print(keyword_index_dict)

In [None]:
# Parameters for STFT
frame_length = int(SAMPLES_TO_CONSIDER * (32 / 1000))
hop_length = int(SAMPLES_TO_CONSIDER * (16 / 1000))
print(frame_length)
print(hop_length)

In [None]:
# Initialize lists to hold data
labels = []
log_mel_spectrograms = []
j = 0

# loop through all sub-dirs
for i, (dirpath, dirnames, filenames) in enumerate(os.walk(DATASET_PATH)):

	# ensure we're at sub-folder level
	if dirpath is not DATASET_PATH:

		# save label (i.e., sub-folder name) in the mapping
		label = dirpath.split("/")[-1] 
		if label in keyword_list:
			j = j+1
			k = 0		
			print("\nProcessing: '{}'".format(label))	

			# process all audio files in sub-dir and store MFCCs	
			for f in filenames:
				file_path = os.path.join(dirpath, f)
				try:
					signal, sample_rate = librosa.load(file_path)
				except Exception as e:
					print(f"Error loading file {file_path}: {e}")    
				k = k+1 
				if len(signal)>=SAMPLES_TO_CONSIDER:
					signal=signal[0:SAMPLES_TO_CONSIDER]
				else:
					npad = SAMPLES_TO_CONSIDER - len(signal)
					signal=np.pad(signal, pad_width=npad, mode='constant', constant_values=0)[npad:]
															
				signal=signal.astype(np.float32)								
				n_mels = 40  # Number of Mel bands
															
				hamming_window_signal = signal * hamming_window(SAMPLES_TO_CONSIDER)  # Apply Hamming window to audio signal

				# Compute the log-mel-spectrogram
				mel_spectrogram = librosa.feature.melspectrogram(y=hamming_window_signal, 
													  sr=SAMPLES_TO_CONSIDER, 
															n_mels=n_mels, 
															n_fft=frame_length, 
															win_length=frame_length, 
															hop_length=hop_length, 
															center=False
															)
				
				# Convert to log scale
				log_mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)
				f = log_mel_spectrogram
				log_mel_spectrograms.append(log_mel_spectrogram.T)
				labels.append(j-1)					
				if k >999 :
					print("{} : {} = ".format(label, j-1),k)
					print(f.shape)
					break


np.save(os.path.join(save_path, 'y_log_mel_spectrograms.npy'), np.array(labels))
np.save(os.path.join(save_path, 'X_log_mel_spectrograms.npy'), np.array(log_mel_spectrograms))

print("Data saved in .npy format in the specified directory")




In [None]:
from sklearn.model_selection import train_test_split

# Assuming labels and log_mel_spectrograms are your data
labels = np.array(labels)
log_mel_spectrograms = np.array(log_mel_spectrograms)

# Split data into train (80%) and temp (20%)
X_train, X_temp, y_train, y_temp = train_test_split(
    log_mel_spectrograms, labels, test_size=0.2, random_state=42, stratify=labels
)

# Split temp into validation (10%) and test (10%)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)

# Save the datasets in .npy format
np.save(os.path.join(save_path, 'X_train.npy'), X_train)
np.save(os.path.join(save_path, 'y_train.npy'), y_train)
np.save(os.path.join(save_path, 'X_val.npy'), X_val)
np.save(os.path.join(save_path, 'y_val.npy'), y_val)
np.save(os.path.join(save_path, 'X_test.npy'), X_test)
np.save(os.path.join(save_path, 'y_test.npy'), y_test)
print("Data split and saved in .npy format.")
