-
Notifications
You must be signed in to change notification settings - Fork 0
/
modify_org_csv.py
executable file
·93 lines (79 loc) · 2.17 KB
/
modify_org_csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import numpy as np
import pandas as pd
import os
import sys
import random
import glob
import math
import json
from tqdm import tqdm
import argparse
import yaml
import librosa
import itertools
from utils import \
FSD50K, ESC50, sv_csv_dir
os.makedirs(sv_csv_dir, exist_ok=True)
os.makedirs(sv_csv_dir+'bg/', exist_ok=True)
os.makedirs(sv_csv_dir+'ev/', exist_ok=True)
## make bg csv
bg_lbls = ['rain', 'car_passing']
splits = ['dev','eval']
def add_ext(fn):
return str(fn)+'.wav'
# 全部結合
dfs =[]
for lbl, split in itertools.product(bg_lbls, splits):
try:
df = pd.read_csv(f'org_audio_csv/background/fsd50k/{lbl}/{split}.csv')
df['split'] = split
dfs.append(df)
except:
print(f'No such csv file of condition: {lbl}_{split}')
df = pd.concat(dfs).reset_index(drop=True)
df['fname'] = df['fname'].apply(add_ext)
# サンプル数を測る
lens = []
for row in df.itertuples():
fn = FSD50K+f'/FSD50K.{row.split}_audio/{row.fname}'
s, fs = librosa.load(fn, sr=None)
lens.append(len(s))
df['length'] = lens
df.to_csv(sv_csv_dir+'bg/fsd50k.csv')
## make ev csv
ev_lbls = [
'thunder', 'dog', 'footsteps',
'chirping_birds', 'car_horn', 'church_bell',
]
def normalize_amp(data):
return data/np.abs(data).max()
def detect_nonactive_section(data, th=0.1):
filtered_amp=normalize_amp(np.convolve(np.abs(data), np.hanning(512), mode='same'))
active = filtered_amp>np.max(filtered_amp)*th
active_point = np.where(active == 1)[0]
return active_point[0], active_point[-1]
# noisy 消して全結合
dfs = []
for lbl in ev_lbls:
df = pd.read_csv(f'org_audio_csv/event/esc50/{lbl}.csv')
df = df[df['clean']==True].drop(columns=['clean', 'noise desc'])
dfs.append(df)
df = pd.concat(dfs).reset_index(drop=True)
sts = []
eds = []
split = []
for row in tqdm(df.itertuples()):
fn = ESC50+f'/{row.fname}'
s, fs = librosa.load(fn, sr=None)
st, ed = detect_nonactive_section(s, th=0.1)
if row.fname[0] == '5':
sp = 'eval'
else:
sp = 'dev'
sts.append(st)
eds.append(ed)
split.append(sp)
df['split'] = split
df['st'] = sts
df['ed'] = eds
df.to_csv(sv_csv_dir+'ev/esc50.csv')