making data sets

In [None]:
import librosa
import numpy as np
import soundfile
import os

def manipulate(data, noise_factor):
    noise = np.random.randn(len(data))
    augmented_data = data + noise_factor * noise
    # Cast back to same data type
    augmented_data = augmented_data.astype(type(data[0]))
    return augmented_data

def create_datasets(source_file, output_folder, label, num_datasets):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    for i in range(num_datasets):
        data, sampling_rate = librosa.load(source_file, sr=None)
        noise_factor = np.random.uniform(0.0, 0.15)
        augmented_data = manipulate(data, noise_factor)
        output_file = os.path.join(output_folder, f"{label}_{i+1}_noise_{noise_factor:.2f}.wav")
        soundfile.write(output_file, augmented_data, sampling_rate)
        print(f"{label.capitalize()} dataset {i+1} created with noise factor: {noise_factor:.2f}")

# Source files for dot and dash
dot_source_file = "dot_new.wav"
dash_source_file = "dash_new.wav"

# Output folders for dot and dash datasets
dot_output_folder = "dot"
dash_output_folder = "dash"

# Number of datasets for dot and dash
num_datasets = 3000

# Create dot datasets
create_datasets(dot_source_file, dot_output_folder, "dot", num_datasets)

# Create dash datasets
create_datasets(dash_source_file, dash_output_folder, "dash", num_datasets)

training model

In [None]:
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib

# Function to extract features from audio files
def extract_features(audio_file):
    data, sr = librosa.load(audio_file, sr=None)
    # Example feature extraction, you can add more features
    features = [
        np.mean(librosa.feature.mfcc(y=data, sr=sr, n_mfcc=13).T, axis=0),
        np.mean(librosa.feature.chroma_stft(y=data, sr=sr).T, axis=0)
    ]
    return np.concatenate(features)

# Path to the directories containing dash and dot audio files
dash_folder = "dash"
dot_folder = "dot"

# Collect features and labels
X = []
y = []

# Collect features and labels for dash audio files
for file in os.listdir(dash_folder):
    if file.endswith(".wav"):
        features = extract_features(os.path.join(dash_folder, file))
        X.append(features)
        y.append("dash")

# Collect features and labels for dot audio files
for file in os.listdir(dot_folder):
    if file.endswith(".wav"):
        features = extract_features(os.path.join(dot_folder, file))
        X.append(features)
        y.append("dot")

# Convert lists to numpy arrays
X = np.array(X)
y = np.array(y)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

# Evaluate the model
y_pred_train = rf_classifier.predict(X_train)
train_accuracy = accuracy_score(y_train, y_pred_train)
print("Training accuracy:", train_accuracy)

y_pred_test = rf_classifier.predict(X_test)
test_accuracy = accuracy_score(y_test, y_pred_test)
print("Test accuracy:", test_accuracy)

# Save the trained model to a file
model_filename = "audio_classifier_model.pkl"
joblib.dump(rf_classifier, model_filename)
print("Model saved to", model_filename)


predicting with audio files

In [2]:
import os
import numpy as np
import librosa
import joblib

# Load the trained model
model_filename = "audio_classifier_model.pkl"
rf_classifier = joblib.load(model_filename)

# Function to extract features from a 1-second audio clip
def extract_features_clip(data, sr):
    # Example feature extraction, you can add more features
    features = [
        np.mean(librosa.feature.mfcc(y=data, sr=sr, n_mfcc=13).T, axis=0),
        np.mean(librosa.feature.chroma_stft(y=data, sr=sr).T, axis=0)
    ]
    return np.concatenate(features)

# Function to split an audio file into 1-second clips
def split_audio_file(audio_file):
    data, sr = librosa.load(audio_file, sr=None)
    clip_length = sr  # 1 second clip
    clips = []
    for i in range(0, len(data), clip_length):
        clip = data[i:i+clip_length]
        if len(clip) == clip_length:  # Ensure all clips are of the same length
            clips.append(clip)
    return np.array(clips), sr

# Function to classify audio clips as dash or dot
def classify_audio_clips(clips, sr):
    predictions = []
    for clip in clips:
        features = extract_features_clip(clip, sr)
        prediction = rf_classifier.predict([features])[0]
        predictions.append(prediction)
    return predictions

# Path to the audio file
audio_file = "input1.wav"

# Split the audio file into 1-second clips
clips, sr = split_audio_file(audio_file)

# Classify each clip
predictions = classify_audio_clips(clips, sr)

# Print the predictions
for i, prediction in enumerate(predictions):
    print(f"Clip {i+1}: {prediction}")


Clip 1: dash
Clip 2: dash
Clip 3: dot
Clip 4: dot
Clip 5: dash
Clip 6: dot
Clip 7: dot


attempt for real time predictions

In [1]:
import numpy as np
import pyaudio
import librosa
import joblib

# Load the trained model
model_filename = "audio_classifier_model.pkl"
rf_classifier = joblib.load(model_filename)

# Function to extract features from a 1-second audio clip
def extract_features_clip(data, sr):
    # Example feature extraction, you can add more features
    mfccs = librosa.feature.mfcc(y=data, sr=sr, n_mfcc=13)
    chroma = librosa.feature.chroma_stft(y=data, sr=sr)
    # Reshape or flatten features to ensure they are 2-dimensional
    if len(mfccs.shape) > 1:
        mfccs = np.mean(mfccs.T, axis=0)
    if len(chroma.shape) > 1:
        chroma = np.mean(chroma.T, axis=0)
    return np.concatenate([mfccs, chroma])

# Function to classify a 1-second audio clip
def classify_clip(data, sr):
    data_float = data.astype(np.float32) / 32768.0  # Convert to floating-point
    features = extract_features_clip(data_float, sr)
    prediction = rf_classifier.predict([features])[0]
    return prediction


# Callback function for audio stream processing
def audio_callback(in_data, frame_count, time_info, status):
    global audio_buffer, frames_per_buffer, sampling_rate, predictions_made, max_predictions
    audio_data = np.frombuffer(in_data, dtype=np.int16)
    audio_buffer.extend(audio_data)
    while len(audio_buffer) >= sampling_rate:
        clip = np.array(audio_buffer[:sampling_rate])
        audio_buffer = audio_buffer[sampling_rate:]
        prediction = classify_clip(clip, sampling_rate)
        print(f"Prediction {predictions_made + 1}: {prediction}")
        predictions_made += 1
        if predictions_made >= max_predictions:
            print("Maximum predictions reached. Stopping stream.")
            stream.stop_stream()
            break
    return (in_data, pyaudio.paContinue)

# Parameters
sampling_rate = 44100
channels = 1
frames_per_buffer = 1024
max_predictions = 10  # Make 10 predictions in 10 seconds

# Initialize PyAudio
p = pyaudio.PyAudio()

# Open stream using callback
stream = p.open(format=pyaudio.paInt16,
                channels=channels,
                rate=sampling_rate,
                input=True,
                frames_per_buffer=frames_per_buffer,
                stream_callback=audio_callback)

print("Listening for audio...")

# Initialize variables
audio_buffer = []  # Initialize audio buffer
predictions_made = 0

# Start stream
stream.start_stream()

# Wait for stream to finish
try:
    while stream.is_active():
        pass
except KeyboardInterrupt:
    pass

# Stop stream
stream.stop_stream()
stream.close()

# Close PyAudio
p.terminate()


Listening for audio...
Prediction 1: dot
Prediction 2: dot
Prediction 3: dot
