In [1]:
!pip install sentence-transformers

Collecting sentence-transformers
  Downloading sentence_transformers-4.1.0-py3-none-any.whl.metadata (13 kB)
Downloading sentence_transformers-4.1.0-py3-none-any.whl (345 kB)
Installing collected packages: sentence-transformers
Successfully installed sentence-transformers-4.1.0


In [13]:
import os
import shutil
from pathlib import Path
from sentence_transformers import SentenceTransformer, util

# Load a small, efficient sentence transformer
model = SentenceTransformer('all-MiniLM-L6-v2')

# Define categories and their keywords
categories = {
    "Kicks": ["kick"],
    "Snares": ["snare"],
    "Hats": ["hat", "hihat", "hi-hat"],
    "Claps": ["clap"],
    "Percussion": ["perc", "percussion"],
    "Cymbals": ["cymbal", "ride", "crash"],
    "Drum Loops": ["drum loop", "full drum loop"],
    "Top Loops": ["top loop"],
    "Hat Loops": ["hat loop", "hihat loop", "hi-hat loop"],
    "Kick Loops": ["kick loop"],
    "Percussion Loops": ["perc loop", "percussion loop"],
    "Guitar One-shots": ["guitar"],
    "Piano One-shots": ["piano"],
    "Keys/Synth One-shots": ["keys", "synth"],
    "Bass One-shots": ["bass"],
    "Vocal Chops": ["vocal"],
    "Guitar Loops": ["guitar loop"],
    "Piano Loops": ["piano loop"],
    "Synth Loops": ["synth loop", "keys loop"],
    "Bass Loops": ["bass loop"],
    "Vocal Loops": ["vocal loop"],
    "FX": ["fx", "impact", "riser", "sweep", "drop"],
    "Ambience/Textures": ["ambience", "texture", "vinyl", "noise"]
}

# Precompute category embeddings for better matching
category_embeddings = {cat: model.encode(cat) for cat in categories.keys()}

def classify_sample(name):
    name_lower = name.lower()

    # Priority 1: Keyword matching with loop differentiation
    if "loop" in name_lower:
        for category, keywords in categories.items():
            if "loop" in category.lower():
                if any(keyword in name_lower for keyword in keywords):
                    return category
        return "Melodic Loops"  # Default loop category if none matched

    # Priority 2: One-shot/instrument matching
    for category, keywords in categories.items():
        if "loop" not in category.lower():
            if any(keyword in name_lower for keyword in keywords):
                return category

    # Priority 3: Use embedding similarity
    name_embedding = model.encode(name)
    scores = {cat: util.cos_sim(name_embedding, emb).item() for cat, emb in category_embeddings.items()}
    best_match = max(scores, key=scores.get)

    # Threshold to avoid bad matches
    if scores[best_match] > 0.4:
        return best_match
    else:
        return "Uncategorized"

def sort_samples(input_folder, output_folder):
    input_folder = Path(input_folder)
    output_folder = Path(output_folder)

    for root, dirs, files in os.walk(input_folder):
        
        for file in files:
            if file.lower().endswith(('.wav', '.mp3', '.flac', '.aiff', '.ogg')):
                file_path = Path(root) / file
                category = classify_sample(file)
                target_dir = output_folder / category
                target_dir.mkdir(parents=True, exist_ok=True)

                shutil.copy(file_path, target_dir / file)
                print(f"Copied {file} to {target_dir}")

input_samples_folder = Path("~/Splice/sounds/packs").expanduser()
output_sorted_folder = "."

sort_samples(input_samples_folder, output_sorted_folder)


Copied SPINNIN_drum_top_loop_running_128.wav to Melodic Loops
Copied OS_VV3_hihat_record.wav to Hats
Copied OS_VV3_open_hihat_burn.wav to Hats
Copied OS_VV3_snare_pocket.wav to Snares
Copied OS_VV3_rim_smoke.wav to Uncategorized
Copied OS_VV3_kick_kyoto.wav to Kicks
Copied OS_VV3_70_drum_loop_smokin.wav to Melodic Loops
Copied OS_VV3_70_vocal_cuts_sunday_Dm.wav to Vocal Chops
Copied OS_VV3_70_drum_fill_smokin.wav to Drum Loops
Copied CDS_DEAD_SNARE_ONE_SHOT_SMACK.wav to Snares
Copied hk_crash_ricebowl.wav to Cymbals
Copied SNS_SS_90_drum_loop_misfit.wav to Melodic Loops
Copied bb2_90_break_loop_broken.wav to Melodic Loops
Copied SO_ST_100_perc_loop_gold_congas.wav to Melodic Loops
Copied SO_ST_100_drum_loop_gold_roll.wav to Melodic Loops
Copied CPA_JSD_153_drum_loop_connect.wav to Melodic Loops
Copied DD_SJD_snare_one_shot_supra_sidestick_medium.wav to Snares
Copied CO_NE_80_hat_loop_virginia.wav to Melodic Loops
Copied KSHMR_Snap_07.wav to Uncategorized
Copied TL_RG_140_Drum_Loop_Clas

In [5]:
classify_sample("/Users/martinaviles/Splice/sounds/packs/Modern Folk/DrumDrops_Modern_Folk/One_Shots/Crash/DD_Folk_Crash_One_Shot_medium.wav")

'Cymbals'