In [None]:
import torchaudio
import torch
import os

# Load the audio file
audio_file = "../data/inputs/original-demo-speech.wav"
waveform, sample_rate = torchaudio.load(audio_file)

# Print some info about the loaded audio
print(f"Sample rate: {sample_rate} Hz")
print(f"Audio duration: {waveform.shape[1] / sample_rate:.2f} seconds")
print(f"Audio shape: {waveform.shape}")

# Define the chunk size (0.5 seconds)
chunk_size = int(0.5 * sample_rate)

# Create output directory if it doesn't exist
output_dir = "../data/outputs/split_audio"
os.makedirs(output_dir, exist_ok=True)

# Initialize position and total processed
position = 0
total_processed = 0
max_duration = 2.0  # Maximum duration to process in seconds
max_samples = int(max_duration * sample_rate)

# Process audio in chunks using a while loop
print("\nProcessing audio in 0.5-second chunks:")
while position < waveform.shape[1]:
    # Extract the current chunk
    end_pos = min(position + chunk_size, waveform.shape[1])
    chunk = waveform[:, position:end_pos]
    
    # Calculate chunk duration
    chunk_duration = chunk.shape[1] / sample_rate
    
    # Process the chunk
    chunk_number = position // chunk_size + 1
    print(f"Chunk {chunk_number}: {chunk_duration:.2f} seconds, samples {position} to {end_pos-1}")
    
    # Save the chunk to the output directory
    chunk_filename = os.path.join(output_dir, f"chunk_{chunk_number}.wav")
    torchaudio.save(chunk_filename, chunk, sample_rate)
    print(f"  Saved as {chunk_filename}")
    
    # Update position for next iteration
    position = end_pos
    total_processed += chunk.shape[1]
    
    # Check if we've processed 2 seconds of audio; if so, break out of the loop
    if total_processed >= max_samples:
        print(f"Reached {max_duration} seconds of processed audio. Stopping.")
        break

print(f"\nFinished processing after {total_processed / sample_rate:.2f} seconds of audio")
print(f"Split audio files saved to: {output_dir}")

Sample rate: 48000 Hz
Audio duration: 145.75 seconds
Audio shape: torch.Size([1, 6996000])

Processing audio in 0.5-second chunks:
Chunk 1: 0.50 seconds, samples 0 to 23999
  Saved as chunk_1.wav
Chunk 2: 0.50 seconds, samples 24000 to 47999
  Saved as chunk_2.wav
Chunk 3: 0.50 seconds, samples 48000 to 71999
  Saved as chunk_3.wav
Chunk 4: 0.50 seconds, samples 72000 to 95999
  Saved as chunk_4.wav
Reached 2.0 seconds of processed audio. Stopping.

Finished processing after 2.00 seconds of audio


In [4]:
waveform.shape, sample_rate

(torch.Size([1, 6996000]), 48000)