In [1]:
import wave
import array

def wav_to_uint8_list(file_path):
    with wave.open(file_path, 'rb') as wav_file:
        # Get the parameters of the WAV file
        sample_width = wav_file.getsampwidth()
        frame_rate = wav_file.getframerate()

        # Read all frames from the WAV file
        frames = wav_file.readframes(wav_file.getnframes())

        uint8_list = array.array('B', frames)

        return uint8_list

def save_uint8_list_to_file(uint8_list, output_file_path):
    with open(output_file_path, 'wb') as output_file:
        output_file.write(uint8_list)

file_path = 'drumkit/snare-drum.wav'
uint8_list = wav_to_uint8_list(file_path)

output_file_path = 'drumkit/snare-drum.bin'
# save_uint8_list_to_file(uint8_list, output_file_path)


In [2]:
import array

def read_uint8_list_from_file(file_path):
    with open(file_path, 'rb') as file:
        uint8_list = array.array('B', file.read())
    return uint8_list

file_path = 'shaker.bin'
original_sound_byte_array = read_uint8_list_from_file(file_path)


In [55]:
def snap_to_beat(timestamp, beat, half_beat):
    nearest_beat = round(timestamp / beat) * beat
    nearest_half_beat = round(timestamp / half_beat) * half_beat

    return min(nearest_beat, nearest_half_beat, key=lambda x: abs(x - timestamp))

In [102]:
import numpy as np
import time
from math import sqrt

def mix_sounds(timestamps, volumes, byte_array, speed, existing_array = None):
    sound_data = np.frombuffer(byte_array, dtype=np.int32)
    
    beat = speed/60 * 1000
    half_beat = speed/60 * 500
    
    timestamps = [snap_to_beat(timestamp, beat, half_beat) for timestamp in timestamps]
        
    if existing_array is None:
        sound_final = np.empty(0, dtype=np.int32)
        
        for i in range(0, len(timestamps)):
            delay = int(timestamps[i] * 44100 / 1000) - len(sound_final)
            sound_final = np.concatenate([sound_final, np.zeros(delay, dtype=np.int32)])
            volume_factor = pow(2, (sqrt(sqrt(sqrt(volumes[i]))) * 192 - 192)/6)
            scaled_sound_segment = (sound_data * volume_factor).astype(np.int32) 

            sound_final = np.concatenate([sound_final, scaled_sound_segment])

        # add a 0.1 seconds after the audio so that it does not sound cut off
        sound_final = np.concatenate([sound_final, np.zeros(int(100*44100/1000), dtype=np.int32)])

        return sound_final
    
    else:
        existing_data = np.frombuffer(existing_array, dtype=np.int32)
        hasPassedLength = False
        for i in range(len(timestamps)):
            overlap_start_index = int(timestamps[i] * 44100 / 1000)
            overlap_end_index = overlap_start_index + len(sound_data)
            
            volume_factor = pow(2, (sqrt(sqrt(sqrt(volumes[i]))) * 192 - 192)/6)
            scaled_sound_segment = (sound_data * volume_factor).astype(np.int32) 
        
            if overlap_start_index > len(existing_data): 
                # pad with 0s until the start index
                padding = overlap_start_index - len(existing_data)
                existing_data = np.concatenate([existing_data, np.zeros(padding, dtype=np.int32)])
                existing_data = np.concatenate([existing_data, scaled_sound_segment])
            elif overlap_start_index < len(existing_data) and overlap_end_index > len(existing_data):
                front_values = len(existing_data) - overlap_start_index

                existing_data[overlap_start_index:] = np.add(existing_data[overlap_start_index:], scaled_sound_segment[:front_values])
                existing_data = np.concatenate([existing_data, scaled_sound_segment[front_values:]])
            else:
                existing_data[overlap_start_index:overlap_end_index] = np.add(existing_data[overlap_start_index:overlap_end_index], sound_data)
        
        
        # add a 0.1 seconds after the audio so that it does not sound cut off
        existing_data = np.concatenate([existing_data, np.zeros(int(100*44100/1000), dtype=np.int32)])
        
        return existing_data

In [103]:
import wave

def save_as_wav(byte_array, filename, sample_width=4, channels=1, frame_rate=44100):
    with wave.open(filename, 'wb') as wave_file:
        wave_file.setsampwidth(sample_width)
        wave_file.setframerate(frame_rate)
        wave_file.setnchannels(channels)
        wave_file.writeframes(byte_array)

# Example usage:
# output_filename = 'looped_sound1.wav'
# save_as_wav(looped_byte_array, output_filename)
# save_as_wav(original_sound_byte_array, "shaker1.wav")


In [104]:
# tesing mixing

guiro_bytes = read_uint8_list_from_file("guiro.bin")
shaker_bytes = read_uint8_list_from_file("shaker.bin")
cymbal_bytes = read_uint8_list_from_file("drumkit/cymbal.bin")

shaker_timestamps = [1027, 2017, 3057, 4083]
guiro_timestamps = [2550, 4670]#[1000, 2000, 3000, 4000] [5000, 6000, 7000, 8000] #[3057, 6000]
cymbal_timestamps = [5056]

shaker_volumes = [1.0, 0.5, 1.0, 0.5]
guiro_volumes = [1.0, 1.0]
cymbal_volumes = [1.0]

metronome_speed = 60

In [105]:
save_as_wav(mix_sounds(shaker_timestamps, shaker_volumes, shaker_bytes, metronome_speed), "mixed_sounds_volume.wav")

In [106]:
mixed_sounds_bytes = wav_to_uint8_list("mixed_sounds_volume.wav")
mixed_guiro_shaker = mix_sounds(guiro_timestamps, guiro_volumes, guiro_bytes, metronome_speed, mixed_sounds_bytes)

In [107]:
save_as_wav(mixed_guiro_shaker, "mixed_guiro_shaker_volume.wav")

In [108]:
save_as_wav(mix_sounds(cymbal_timestamps, cymbal_volumes, cymbal_bytes, metronome_speed, wav_to_uint8_list("mixed_guiro_shaker_volume.wav")), "mixed_guiro_shaker_cymbal_volume.wav")

In [114]:
import random
import os

def generate_percussion_repetition(speed=120):
    beat = speed / 60 * 1000
    half_beat = speed / 60 * 500

    percussion_dict = {}

    for instrument in ["shaker", "tambourine", "guiro", "cabasa", "cymbal", "hi-hat-open", "hi-hat-closed", "medium-tom", "low-tom", "kick-drum", "snare-drum"]:
        # Generate timestamps
        timestamps = [i for i in range(0, 8000, round(random.uniform(beat, beat * 2)))]
        percussion_dict[instrument] = {'timestamps': timestamps}

        # Generate volumes in the range of 0.1 to 2.0
        volumes = [round(random.uniform(0.1, 2.0), 2) for _ in range(len(timestamps))]
        percussion_dict[instrument]['volumes'] = volumes

        # Read bytes from file and add to the dictionary
        file_path = f"{instrument}.bin"
        if os.path.exists(file_path):
            bytes_data = read_uint8_list_from_file(file_path)
            percussion_dict[instrument]['bytes'] = bytes_data

    return percussion_dict

# Example usage with 120 BPM metronome speed
metronome_speed = 120
percussion_repetition = generate_percussion_repetition(metronome_speed)

# Print the percussion repetition information for each instrument
for instrument, data in percussion_repetition.items():
    print(f"{instrument.capitalize()} Information:")
    print(f"Timestamps: {data['timestamps']}")
    print(f"Volumes: {data['volumes']}")
    if 'bytes' in data:
        print("bytes")
    print()


Shaker Information:
Timestamps: [0, 3291, 6582]
Volumes: [1.26, 1.88, 0.98]
bytes

Tambourine Information:
Timestamps: [0, 2479, 4958, 7437]
Volumes: [0.12, 1.02, 0.3, 1.1]
bytes

Guiro Information:
Timestamps: [0, 2423, 4846, 7269]
Volumes: [0.51, 1.78, 1.3, 0.24]
bytes

Cabasa Information:
Timestamps: [0, 2408, 4816, 7224]
Volumes: [0.42, 1.36, 1.98, 0.45]
bytes

Cymbal Information:
Timestamps: [0, 3834, 7668]
Volumes: [0.64, 1.01, 0.24]
bytes

Hi-hat-open Information:
Timestamps: [0, 2584, 5168, 7752]
Volumes: [1.28, 0.35, 1.96, 0.84]
bytes

Hi-hat-closed Information:
Timestamps: [0, 2090, 4180, 6270]
Volumes: [0.25, 0.1, 1.92, 1.32]
bytes

Medium-tom Information:
Timestamps: [0, 3390, 6780]
Volumes: [0.36, 1.98, 0.24]
bytes

Low-tom Information:
Timestamps: [0, 2687, 5374]
Volumes: [0.67, 0.18, 1.34]
bytes

Kick-drum Information:
Timestamps: [0, 3918, 7836]
Volumes: [0.98, 0.69, 1.79]
bytes

Snare-drum Information:
Timestamps: [0, 3564, 7128]
Volumes: [1.05, 0.22, 1.95]
bytes



In [115]:
mixed_sounds_bytes = np.empty(0, dtype=np.int32)

for instrument, data in percussion_repetition.items():
    print(instrument)
    if instrument == "shaker":
        mixed_sounds_bytes = mix_sounds(data['timestamps'], data['volumes'], data['bytes'], 120)
    else:
        mixed_sounds_bytes = mix_sounds(data['timestamps'], data['volumes'], data['bytes'], 120, mixed_sounds_bytes)

shaker
tambourine
guiro
cabasa
cymbal
hi-hat-open
hi-hat-closed
medium-tom
low-tom
kick-drum
snare-drum


  scaled_sound_segment = (sound_data * volume_factor).astype(np.int32)
  scaled_sound_segment = (sound_data * volume_factor).astype(np.int32)


In [116]:
save_as_wav(mixed_sounds_bytes, "random_repetition.wav")