In [1]:
import os
import pydub
import matplotlib.pyplot as plt
import pydub.silence
from pydub.utils import make_chunks
from spleeter.separator import Separator

# Initialize Spleeter separator
separator = Separator('spleeter:2stems')  # 2stems model separates vocals and accompaniment

# Define input and output paths
input_path = "raw_data"
output_path = "spleeter_output"
final_output_path = "final_data"
os.makedirs(output_path, exist_ok=True)
os.makedirs(final_output_path, exist_ok=True)

# Collect and merge data
files = os.listdir(input_path)
merged_file = []
all_data = []

for file in files:
    if "_" in file:
        first_part_name = file.split("_")[0]  # Split first part of file name
        
        # Check if there is already a similar file in the merged_file list
        matched_file = None
        for merged in merged_file:
            if merged.startswith(first_part_name) and "_" in merged:
                matched_file = merged
                break
        
        if matched_file:
            # Merge the two parts
            voice_1 = pydub.AudioSegment.from_file(os.path.join(input_path, matched_file))
            voice_2 = pydub.AudioSegment.from_file(os.path.join(input_path, file))
            result = voice_1 + voice_2
            
            # Export the merged file
            merged_file_path = os.path.join(input_path, f"{first_part_name}.wav")
            result.export(merged_file_path, format="wav")
            all_data.append(merged_file_path)
            print(f"Merged {matched_file} and {file} into {first_part_name}.wav")
            
            # Remove the matched file from the list
            merged_file.remove(matched_file)
        else:
            # Add the file to the merged_file list
            merged_file.append(file)
    else:
        # Add files without an underscore directly to the all_data list
        all_data.append(os.path.join(input_path, file))

# Process all collected data
for data in all_data:
    # Use Spleeter to split vocals from the accompaniment
    separator.separate_to_file(data, output_path)
    vocal_file_path = os.path.join(output_path, os.path.splitext(os.path.basename(data))[0], 'vocals.wav')
    
    # Load the separated vocal file
    audio = pydub.AudioSegment.from_file(vocal_file_path)
    audio = audio.set_sample_width(2)
    audio = audio.set_frame_rate(48000)
    audio = audio.set_channels(1)
    
    file_name = os.path.basename(data).split(".")[0]
    os.makedirs(os.path.join(final_output_path, file_name), exist_ok=True)

    # Optional: Uncomment if you want to visualize the audio waveform
    # audio_array = audio.get_array_of_samples()
    # plt.figure(figsize=(20, 2))
    # plt.plot(audio_array)
    # plt.show()

    # Split the audio on silence and export the chunks
    chunks = pydub.silence.split_on_silence(audio, min_silence_len=3000, silence_thresh=-40)

    if chunks:  # Ensure chunks is not empty
        result = sum(chunks)
        parts = make_chunks(result, 1000)
        for i, part in enumerate(parts):
            if len(part) >= 1000:
                part.export(os.path.join(final_output_path, file_name, f"voice_{i}.wav"), format="wav")
                print(f"Processed and exported voice_{i}.wav")
    else:
        parts = make_chunks(audio, 1000)
        for i, part in enumerate(parts):
            if len(part) >= 1000:
                part.export(os.path.join(final_output_path, file_name, f"voice_{i}.wav"), format="wav")
                print(f"Processed and exported voice_{i}.wav")

print("Processing completed! Extracted vocals are saved in the final_data directory.")


Merged ebi_1.mp3 and ebi_2.mp3 into ebi.wav
Merged farzadfarzin_1.mp3 and farzadfarzin_2.mp3 into farzadfarzin.wav
Merged gogosh_1.mp3 and gogosh_2.mp3 into gogosh.wav
Merged marjanfarsad_1.mp3 and marjanfarsad_2.mp3 into marjanfarsad.wav
Merged mohsenchavoshi_1.mp3 and mohsenchavoshi_2.mp3 into mohsenchavoshi.wav
Merged mohsenyeganeh_1.mp3 and mohsenyeganeh_2.mp3 into mohsenyeganeh.wav
Merged shadmehraghili_1.mp3 and shadmehraghili_2.mp3 into shadmehraghili.wav
INFO:tensorflow:Using config: {'_model_dir': 'pretrained_models\\2stems', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': gpu_options {
  per_process_gpu_memory_fraction: 0.7
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': Non

: 