In [1]:
from google.colab import drive
drive.mount("/content/drive"),

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


(None,)

In [2]:
from pathlib import Path
import pandas as pd
import zipfile

SYNTH_ZIP = Path("/content/drive/MyDrive/deep_learning/sentetik-dataset.zip")
SYNTH_DIR = Path("/content/drive/MyDrive/deep_learning/sentetik-dataset")

OPENMIC_DIR = Path("/content/drive/MyDrive/openmic-2018-2")

print("SYNTH_ZIP exists:", SYNTH_ZIP.exists())
print("OPENMIC_DIR exists:", OPENMIC_DIR.exists())
print("SYNTH_DIR exists:", SYNTH_DIR.exists())


SYNTH_ZIP exists: True
OPENMIC_DIR exists: True
SYNTH_DIR exists: True


In [25]:
# Only extract if not already extracted
if not (SYNTH_DIR / "labels.csv").exists():
    SYNTH_DIR.mkdir(parents=True, exist_ok=True)
    with zipfile.ZipFile(SYNTH_ZIP, "r") as z:
        z.extractall(SYNTH_DIR)
    print("Extracted synthetic zip to:", SYNTH_DIR)
else:
    print("Synthetic already extracted:", SYNTH_DIR)

print("Synthetic dir contents:", [p.name for p in SYNTH_DIR.iterdir()])


Extracted synthetic zip to: /content/drive/MyDrive/deep_learning
Synthetic dir contents: ['sentetik-dataset.zip', 'sentetik-dataset', 'combined_openmic_and_synth.csv', 'combined_openmic_and_synth_clean.csv']


In [3]:
matches = list(SYNTH_DIR.rglob("labels.csv"))
print("Found labels.csv files:")
for m in matches:
    print(m)


Found labels.csv files:
/content/drive/MyDrive/deep_learning/sentetik-dataset/labels.csv


In [4]:
#Load synthetic labels + add audio paths
OPENMIC_TAGS = [
    "accordion","banjo","bass","cello","clarinet","cymbals","drums","flute","guitar",
    "mallet_percussion","mandolin","organ","piano","saxophone","synthesizer",
    "trombone","trumpet","ukulele","violin","voice"
]

synth_df = pd.read_csv(SYNTH_DIR / "labels.csv")
synth_df["source"] = "synthetic"
synth_df["path"] = synth_df["filename"].apply(lambda x: str((SYNTH_DIR / "audio" / x).resolve()))

print("Synthetic rows:", len(synth_df))
synth_df.head(2)


Synthetic rows: 2200


Unnamed: 0,filename,polyphony,chosen_families,y_accordion,m_accordion,y_banjo,m_banjo,y_bass,m_bass,y_cello,...,y_trumpet,m_trumpet,y_ukulele,m_ukulele,y_violin,m_violin,y_voice,m_voice,source,path
0,mix_00000.wav,1,flute,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,1,synthetic,/content/drive/MyDrive/deep_learning/sentetik-...
1,mix_00001.wav,1,keyboard,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,1,synthetic,/content/drive/MyDrive/deep_learning/sentetik-...


In [5]:
from pathlib import Path
import pandas as pd
import numpy as np

OPENMIC_TAGS = [
    "accordion","banjo","bass","cello","clarinet","cymbals","drums","flute","guitar",
    "mallet_percussion","mandolin","organ","piano","saxophone","synthesizer",
    "trombone","trumpet","ukulele","violin","voice"
]

OPENMIC_DIR = Path("/content/drive/MyDrive/openmic-2018-2")
openmic_labels_path = OPENMIC_DIR / "openmic-2018-aggregated-labels.csv"
openmic_audio_dir = OPENMIC_DIR / "audio"

df_long = pd.read_csv(openmic_labels_path)
print("Loaded long labels:", df_long.shape)
print("Columns:", df_long.columns.tolist())

# Expect columns: sample_key, instrument, relevance, num_responses
required = {"sample_key", "instrument", "relevance"}
missing_req = required - set(df_long.columns)
if missing_req:
    raise RuntimeError(f"Expected columns missing: {missing_req}. Found: {df_long.columns.tolist()}")

# Keep only instruments we care about (OpenMIC tag set)
df_long["instrument"] = df_long["instrument"].astype(str)
df_long = df_long[df_long["instrument"].isin(OPENMIC_TAGS)].copy()

# Convert relevance -> binary label
# Common: relevance is a probability-like score in [0,1] or mean vote.
# A standard threshold is 0.5; adjust if your course specifies another threshold.
THRESH = 0.5
df_long["y"] = (df_long["relevance"] >= THRESH).astype(int)

# Pivot to wide: one row per sample_key, one column per instrument
open_df = (
    df_long.pivot_table(index="sample_key", columns="instrument", values="y", aggfunc="max", fill_value=0)
    .reset_index()
)

# Rename columns to y_<tag>
open_df = open_df.rename(columns={t: f"y_{t}" for t in OPENMIC_TAGS if t in open_df.columns})

# Ensure all y_ columns exist (fill missing tags with 0)
for t in OPENMIC_TAGS:
    col = f"y_{t}"
    if col not in open_df.columns:
        open_df[col] = 0

# Build filename/path (.ogg)
open_df["filename"] = open_df["sample_key"].astype(str) + ".ogg"
open_df["source"] = "openmic"
open_df["path"] = open_df["filename"].apply(lambda x: str((openmic_audio_dir / x).resolve()))

# Add mask=1 for all OpenMIC tags
for t in OPENMIC_TAGS:
    open_df[f"m_{t}"] = 1

# Add optional cols to match synthetic
open_df["polyphony"] = ""
open_df["chosen_families"] = ""

print("OpenMIC wide labels:", open_df.shape)
open_df.head(2)


Loaded long labels: (41534, 4)
Columns: ['sample_key', 'instrument', 'relevance', 'num_responses']
OpenMIC wide labels: (20000, 46)


instrument,sample_key,y_accordion,y_banjo,y_bass,y_cello,y_clarinet,y_cymbals,y_drums,y_flute,y_guitar,...,m_piano,m_saxophone,m_synthesizer,m_trombone,m_trumpet,m_ukulele,m_violin,m_voice,polyphony,chosen_families
0,000046_3840,0,0,0,0,0,0,0,0,0,...,1,1,1,1,1,1,1,1,,
1,000135_483840,0,0,0,0,0,0,0,0,0,...,1,1,1,1,1,1,1,1,,


5. Concatenation

In [6]:
keep_cols = ["source","path","filename","polyphony","chosen_families"] \
          + [f"y_{t}" for t in OPENMIC_TAGS] \
          + [f"m_{t}" for t in OPENMIC_TAGS]

combined_df = pd.concat([synth_df[keep_cols], open_df[keep_cols]], ignore_index=True)

out_path = Path("/content/drive/MyDrive/deep_learning/combined_openmic_and_synth.csv")
combined_df.to_csv(out_path, index=False)

print("Saved:", out_path)
print("Rows synthetic:", len(synth_df))
print("Rows openmic:", len(open_df))
print("Rows combined:", len(combined_df))


Saved: /content/drive/MyDrive/deep_learning/combined_openmic_and_synth.csv
Rows synthetic: 2200
Rows openmic: 20000
Rows combined: 22200


In [7]:
# ensure consistent types
combined_df["polyphony"] = pd.to_numeric(combined_df["polyphony"], errors="coerce").astype("Int64")
combined_df["chosen_families"] = combined_df["chosen_families"].fillna("").astype("string")

clean_path = "/content/drive/MyDrive/deep_learning/combined_openmic_and_synth_clean.csv"
combined_df.to_csv(clean_path, index=False)
print("Saved:", clean_path)


Saved: /content/drive/MyDrive/deep_learning/combined_openmic_and_synth_clean.csv


In [8]:
from pathlib import Path

missing = combined_df.sample(200, random_state=0)[~combined_df.sample(200, random_state=0)["path"].apply(lambda p: Path(p).exists())]
print("Missing audio paths in sample:", len(missing))
if len(missing) > 0:
    display(missing[["source","filename","path"]].head(100))


Missing audio paths in sample: 179


Unnamed: 0,source,filename,path
19193,openmic,132269_7680.ogg,/content/drive/MyDrive/openmic-2018-2/audio/13...
15445,openmic,106142_15360.ogg,/content/drive/MyDrive/openmic-2018-2/audio/10...
3814,openmic,013985_0.ogg,/content/drive/MyDrive/openmic-2018-2/audio/01...
5044,openmic,023922_7680.ogg,/content/drive/MyDrive/openmic-2018-2/audio/02...
8035,openmic,046647_725760.ogg,/content/drive/MyDrive/openmic-2018-2/audio/04...
...,...,...,...
14599,openmic,096552_122880.ogg,/content/drive/MyDrive/openmic-2018-2/audio/09...
20318,openmic,140827_295680.ogg,/content/drive/MyDrive/openmic-2018-2/audio/14...
14480,openmic,095263_96000.ogg,/content/drive/MyDrive/openmic-2018-2/audio/09...
7419,openmic,041684_57600.ogg,/content/drive/MyDrive/openmic-2018-2/audio/04...


Cell A — Inspect OpenMIC audio folder structure

In [9]:
from pathlib import Path

OPENMIC_DIR = Path("/content/drive/MyDrive/openmic-2018-2")
audio_dir = OPENMIC_DIR / "audio"

print("audio_dir exists:", audio_dir.exists())
print("Top-level items (first 30):")
items = sorted(list(audio_dir.iterdir()))
for p in items[:30]:
    print(" -", p.name, "(dir)" if p.is_dir() else "(file)")


audio_dir exists: True
Top-level items (first 30):
 - 000 (dir)
 - 001 (dir)
 - 002 (dir)
 - 003 (dir)
 - 004 (dir)
 - 005 (dir)
 - 006 (dir)
 - 007 (dir)
 - 008 (dir)
 - 009 (dir)
 - 010 (dir)
 - 011 (dir)
 - 012 (dir)
 - 013 (dir)
 - 014 (dir)
 - 015 (dir)
 - 016 (dir)
 - 017 (dir)
 - 018 (dir)
 - 019 (dir)
 - 020 (dir)
 - 021 (dir)
 - 022 (dir)
 - 023 (dir)
 - 024 (dir)
 - 025 (dir)
 - 026 (dir)
 - 027 (dir)
 - 028 (dir)
 - 029 (dir)


In [10]:
from pathlib import Path

audio_dir = Path("/content/drive/MyDrive/openmic-2018-2/audio")

ogg_paths = list(audio_dir.rglob("*.ogg"))
print("Found .ogg files:", len(ogg_paths))

# Build lookup: "132269_7680.ogg" -> "/content/drive/.../audio/13/132269_7680.ogg"
ogg_map = {p.name: str(p.resolve()) for p in ogg_paths}

# Quick sanity check
some = list(ogg_map.items())[:5]
some


Found .ogg files: 20000


[('001069_42240.ogg',
  '/content/drive/MyDrive/openmic-2018-2/audio/001/001069_42240.ogg'),
 ('001096_61440.ogg',
  '/content/drive/MyDrive/openmic-2018-2/audio/001/001096_61440.ogg'),
 ('001035_637440.ogg',
  '/content/drive/MyDrive/openmic-2018-2/audio/001/001035_637440.ogg'),
 ('001023_272640.ogg',
  '/content/drive/MyDrive/openmic-2018-2/audio/001/001023_272640.ogg'),
 ('001044_241920.ogg',
  '/content/drive/MyDrive/openmic-2018-2/audio/001/001044_241920.ogg')]

In [11]:
# open_df must already exist from your pivot cell
open_df["path"] = open_df["filename"].map(ogg_map)

missing = open_df["path"].isna().sum()
print("OpenMIC missing paths after mapping:", missing)

# Show a few missing filenames if any remain
if missing > 0:
    print(open_df.loc[open_df["path"].isna(), "filename"].head(20).tolist())


OpenMIC missing paths after mapping: 0


In [12]:
import pandas as pd
from pathlib import Path

# Load synthetic (adjust if your SYNTH_DIR differs)
SYNTH_DIR = Path("/content/drive/MyDrive/deep_learning/sentetik-dataset")
synth_df = pd.read_csv(SYNTH_DIR / "labels.csv")
synth_df["source"] = "synthetic"
synth_df["path"] = synth_df["filename"].apply(lambda x: str((SYNTH_DIR / "audio" / x).resolve()))

OPENMIC_TAGS = [
    "accordion","banjo","bass","cello","clarinet","cymbals","drums","flute","guitar",
    "mallet_percussion","mandolin","organ","piano","saxophone","synthesizer",
    "trombone","trumpet","ukulele","violin","voice"
]

keep_cols = ["source","path","filename","polyphony","chosen_families"] \
          + [f"y_{t}" for t in OPENMIC_TAGS] \
          + [f"m_{t}" for t in OPENMIC_TAGS]

combined_df = pd.concat([synth_df[keep_cols], open_df[keep_cols]], ignore_index=True)

out_path = Path("/content/drive/MyDrive/deep_learning/combined_openmic_and_synth.csv")
combined_df.to_csv(out_path, index=False)

print("Saved:", out_path)
print("Rows combined:", len(combined_df))


Saved: /content/drive/MyDrive/deep_learning/combined_openmic_and_synth.csv
Rows combined: 22200


In [13]:
from pathlib import Path
import pandas as pd

df = pd.read_csv("/content/drive/MyDrive/deep_learning/combined_openmic_and_synth.csv", low_memory=False)

sample = df.sample(200, random_state=0)
missing = sample[~sample["path"].apply(lambda p: Path(p).exists())]

print("Missing audio paths in sample:", len(missing))
if len(missing) > 0:
    display(missing[["source","filename","path"]].head(20))


Missing audio paths in sample: 0


# LAST CHECK

In [14]:
import pandas as pd

df = pd.read_csv("/content/drive/MyDrive/deep_learning/combined_openmic_and_synth.csv", low_memory=False)
print(df["source"].value_counts())

OPENMIC_TAGS = [
    "accordion","banjo","bass","cello","clarinet","cymbals","drums","flute","guitar",
    "mallet_percussion","mandolin","organ","piano","saxophone","synthesizer",
    "trombone","trumpet","ukulele","violin","voice"
]
y_cols = [f"y_{t}" for t in OPENMIC_TAGS]

print("\nTag prevalence (overall positives):")
print(df[y_cols].mean().sort_values(ascending=False).head(10))


source
openmic      20000
synthetic     2200
Name: count, dtype: int64

Tag prevalence (overall positives):
y_piano                0.108378
y_guitar               0.106261
y_synthesizer          0.103694
y_voice                0.100090
y_mallet_percussion    0.088874
y_flute                0.085991
y_organ                0.081937
y_bass                 0.080405
y_violin               0.052838
y_trumpet              0.051622
dtype: float64


In [16]:
!pip -q install torchcodec


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.3/2.1 MB[0m [31m8.5 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━[0m [32m2.0/2.1 MB[0m [31m28.7 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m23.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [18]:
import torchaudio
from pathlib import Path

p = Path("/content/drive/MyDrive/openmic-2018-2/audio/155/155311_453120.ogg")  # adjust to any real file
wav, sr = torchaudio.load(str(p))
print(sr, wav.shape)


44100 torch.Size([2, 441088])


In [19]:
import torchaudio
import torch
from pathlib import Path

p = Path("/content/drive/MyDrive/openmic-2018-2/audio/155/155311_453120.ogg")
wav, sr = torchaudio.load(str(p))     # [C, T] at 44100
wav = wav.mean(dim=0)                 # [T] mono

target_sr = 16000
wav = torchaudio.functional.resample(wav, sr, target_sr)

target_len = target_sr * 10
wav = wav[:target_len] if wav.numel() >= target_len else torch.nn.functional.pad(wav, (0, target_len - wav.numel()))

print("after:", target_sr, wav.shape)


after: 16000 torch.Size([160000])
