In [32]:
from tensorflow.keras.models import load_model

loaded_model = load_model("pitch_detection_cnn.keras")

In [None]:
import sounddevice as sd
import numpy as np
import librosa
import joblib
import soundfile as sf



fs = 44100
max_frames = 40
chunk_size = int(0.3 * fs)
threshold = 0.005

buffer = np.zeros(chunk_size)
hit_number = 0  # global counter

# Define pitch classes (C, C#, D, ..., B)
pitch_names = ['C', 'C#', 'D', 'D#', 'E', 'F', 
               'F#', 'G', 'G#', 'A', 'A#', 'B']

def process_snippet(snippet):
    print("Processing snippet of length:", len(snippet))

    # Chroma CQT feature
    chroma = librosa.feature.chroma_cqt(y=snippet, sr=fs, bins_per_octave=36)
    chroma = chroma / (np.sum(chroma, axis=0, keepdims=True) + 1e-6)

    # Pad or truncate chroma to fixed length
    if chroma.shape[1] < max_frames:
        pad_width = max_frames - chroma.shape[1]
        chroma = np.pad(chroma, ((0, 0), (0, pad_width)), mode='constant')
    else:
        chroma = chroma[:, :max_frames]

    # Add batch dimension
    chroma_input = np.expand_dims(chroma, axis=0)  # shape: (1, 12, max_frames)

    prediction = loaded_model.predict(chroma_input)
    predicted_class = np.argmax(prediction)

    # Map class index to pitch name
    predicted_pitch = pitch_names[(predicted_class + 9)%12]

    print("Predicted pitch:", predicted_pitch)

    # Play immediately
    import sounddevice as sd
    sd.play(snippet, fs)

ignore_samples = int(0.5 * fs)  # ignore next 0.5 sec after a hit
samples_since_last_hit = ignore_samples  # initialize above threshold

import numpy as np
pre_trigger_sec = 0.03  # 50 ms

def callback(indata, frames, time, status):
    global buffer, hit_number, samples_since_last_hit
    audio_chunk = indata[:, 0]  # mono
    samples_since_last_hit += len(audio_chunk)

    if np.max(np.abs(audio_chunk)) > threshold and samples_since_last_hit >= ignore_samples:
        
        print(f"Hit detected! Total hits: {hit_number}")

        # Find first sample above threshold
        above_thresh = np.where(np.abs(audio_chunk) > threshold)[0]
        if len(above_thresh) == 0:
            return
        start_idx = above_thresh[0]

        # Include pre-trigger
        pre_trigger_samples = int(pre_trigger_sec * fs)
        start_idx = max(0, start_idx - pre_trigger_samples)
        trimmed = audio_chunk[start_idx:]

        # Take exactly 0.25 s
        snippet_length = int(0.25 * fs)
        if len(trimmed) >= snippet_length:
            snippet = trimmed[:snippet_length]
        else:
            pad_width = snippet_length - len(trimmed)
            snippet = np.pad(trimmed, (0, pad_width), mode='constant')

        # Normalize amplitude
        snippet = snippet / (np.max(np.abs(snippet)) + 1e-6)


        filename = f"New Pitches/{pitch_names[hit_number%12]}_{hit_number//12 + 3}_real2.wav"
        sf.write(filename, snippet, fs)

        process_snippet(snippet)
        hit_number += 1
        # Reset cooldown
        samples_since_last_hit = 0

        

    # Update circular buffer
    buffer = np.roll(buffer, -len(audio_chunk))
    buffer[-len(audio_chunk):] = audio_chunk

with sd.InputStream(channels=1, callback=callback, samplerate=fs, blocksize=chunk_size):
    print("Recording... Press Ctrl+C to stop")
    while True:
        sd.sleep(1000)


Recording... Press Ctrl+C to stop
Hit detected! Total hits: 0
Processing snippet of length: 11025
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
Predicted pitch: A#




Hit detected! Total hits: 1
Processing snippet of length: 11025
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
Predicted pitch: C
Hit detected! Total hits: 2
Processing snippet of length: 11025
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
Predicted pitch: C#
Hit detected! Total hits: 3
Processing snippet of length: 11025
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
Predicted pitch: D
Hit detected! Total hits: 4
Processing snippet of length: 11025
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
Predicted pitch: D
Hit detected! Total hits: 5
Processing snippet of length: 11025
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
Predicted pitch: C
Hit detected! Total hits: 6
Processing snippet of length: 11025
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
Predicted pitch: C#
Hit detected! Total hits: 7
Processing snippet of length: 11025
[1m

KeyboardInterrupt: 