
# 🥁 DeskDrummer AI — Tap-to-Track (Colab, fixed)

Turn desk taps into a mini song. Upload or record a short audio; the notebook detects onsets, estimates tempo, quantizes a beat, and generates drums+bass+hi-hats.

**How to use**
1. Run **Setup**.
2. Upload or record an audio.
3. Run **Make Song** to generate and play your track.


## 🛠️ Setup

In [None]:

!pip -q install --upgrade librosa==0.10.2.post1 soundfile==0.12.1 numpy==2.0.2 scipy==1.13.1 pydub==0.25.1
import numpy as np, librosa, soundfile as sf, os, io, base64, math
from IPython.display import Audio, display, Javascript
from scipy.signal import sawtooth
from pathlib import Path
os.makedirs("results", exist_ok=True)
SR = 22050
print("✅ Installed. SR =", SR)


## ☁️ Upload an audio (optional)

In [None]:

from google.colab import files
audio_path = None
uploaded = files.upload()
for k in uploaded:
    audio_path = k
    break
if audio_path:
    print("📁 Using uploaded file:", audio_path)
else:
    print("ℹ️ No file uploaded — you can record in the next cell.")


## 🎙️ Record from microphone (Chrome recommended)

In [None]:

# Uses MediaRecorder to capture ~4s of audio in-browser, then converts to WAV.
from google.colab import output
from IPython.display import Javascript
record_js = r'''
const sleep = time => new Promise(resolve => setTimeout(resolve, time));
var recordAudio = () => new Promise(async resolve => {
  const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
  const mediaRecorder = new MediaRecorder(stream);
  const audioChunks = [];
  mediaRecorder.addEventListener("dataavailable", event => { audioChunks.push(event.data); });
  const start = () => mediaRecorder.start();
  const stop = () => new Promise(resolve => {
    mediaRecorder.addEventListener("stop", async () => {
      const audioBlob = new Blob(audioChunks);
      const arrayBuffer = await audioBlob.arrayBuffer();
      const base64String = btoa(String.fromCharCode(...new Uint8Array(arrayBuffer)));
      resolve(base64String);
    });
    mediaRecorder.stop();
    stream.getTracks().forEach(t => t.stop());
  });
  resolve({ start, stop });
});
var record = async (s=4) => {
  const recorder = await recordAudio();
  recorder.start();
  await sleep(s*1000);
  const audio = await recorder.stop();
  return audio;
};
''';
display(Javascript(record_js))
print("Click allow mic access, then run the next cell to record.")

In [None]:

from google.colab import output
import base64, soundfile as sf, numpy as np, io
from pydub import AudioSegment

if not 'audio_path' in globals() or audio_path is None:
    duration = 4
    b64 = output.eval_js(f"record({duration})")
    binary = base64.b64decode(b64)
    audio_io = io.BytesIO(binary)
    try:
        seg = AudioSegment.from_file(audio_io)  # webm/ogg -> decode via ffmpeg
        samples = np.array(seg.get_array_of_samples()).astype(np.float32)
        if seg.channels == 2:
            samples = samples.reshape((-1, 2)).mean(axis=1)
        samples = samples / (2**(8*seg.sample_width-1))
        sf.write("tap.wav", samples, seg.frame_rate)
        audio_path = "tap.wav"
        print("🎙️ Recorded to tap.wav")
    except Exception as e:
        print("Recording decode failed:", e)
else:
    print("Using previously uploaded file:", audio_path)


## 🎼 Make Song from your taps

In [None]:

assert audio_path is not None, "No audio selected. Upload or record first."
y, sr = librosa.load(audio_path, sr=SR, mono=True)
y = librosa.util.normalize(y)

# Onset detection and tempo
onset_env = librosa.onset.onset_strength(y=y, sr=sr)
tempo, beats = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr, units='time')
onset_frames = librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr, units='time')

print(f"Estimated tempo: {tempo:.1f} BPM")
print(f"Detected {len(onset_frames)} taps")

# Quantize taps to grid
bpm = max(60, min(180, float(tempo))) if tempo>0 else 100.0
beat_sec = 60.0 / bpm
length = max(8, int(np.ceil((onset_frames[-1] if len(onset_frames)>0 else 8)/beat_sec)+4))
grid = np.zeros(length)
for t in onset_frames:
    idx = int(round(t/beat_sec))
    if 0 <= idx < length: grid[idx] = 1

# Synthesize drums
dur = int((length+2) * beat_sec * sr)
audio = np.zeros(dur, dtype=np.float32)

def env(n, a=0.005, d=0.2, sr=SR):
    t = np.linspace(0, n/sr, n, False)
    e = np.exp(-t/d)
    e[:max(1,int(a*sr))] *= np.linspace(0,1,max(1,int(a*sr)))
    return e

def add_kick(t0):
    n = int(0.25*sr); t = np.linspace(0, n/sr, n, False)
    sine = np.sin(2*np.pi*(100*np.exp(-t*10))*t)
    audio[t0:t0+n] += 0.6*sine*env(n, a=0.002, d=0.15)

def add_snare(t0):
    n = int(0.15*sr); noise = np.random.randn(n)
    audio[t0:t0+n] += 0.3*noise*env(n, a=0.001, d=0.12)

def add_hat(t0):
    n = int(0.05*sr); noise = np.random.randn(n)
    audio[t0:t0+n] += 0.15*noise*env(n, a=0.001, d=0.05)

for i in range(length):
    t0 = int(i*beat_sec*sr)
    if grid[i] > 0: add_kick(t0)
    if i % 2 == 0: add_snare(t0)
    add_hat(t0); add_hat(t0 + int(0.5*beat_sec*sr))

# Simple bass
root_freq = 55
for i in range(length):
    if grid[i] > 0 or i % 4 == 0:
        n = int(beat_sec*sr); t0 = int(i*beat_sec*sr)
        t = np.linspace(0, n/sr, n, False)
        wave = 0.25*np.sin(2*np.pi*root_freq*t)
        audio[t0:t0+n] += wave*env(n, a=0.005, d=0.25)

# Normalize & export
mx = np.max(np.abs(audio)) + 1e-6
audio = (audio / mx * 0.9).astype(np.float32)
sf.write("results/deskdrummer_mix.wav", audio, sr)
print("✅ Wrote results/deskdrummer_mix.wav")

display(Audio("results/deskdrummer_mix.wav", rate=sr))
