In [1]:
# -*- coding: utf-8 -*-

import os
import csv
import numpy as np
import subprocess
import librosa

In [2]:
dirpath = "../data/LearningData/magiarecord/"
csvpath = "../data/LearningData/magiarecord/csvdata/main_1-3.csv"
wavpath = "../data/LearningData/magiarecord/main/main_1-3.wav"

In [3]:
def loadDivideLine(filepath):
    #音声の区切り位置読み込み
    #input [min.、sec.、人物ID]
    #output1 [sec., ...]
    #output2 [id1, ...]
    if not os.path.exists(filepath):
        print('divide_time data is nothing')
        print('o・∇・o')
    
    divideline = []
    with open(filepath, 'r') as f:
        reader = csv.reader(f)
        for row in reader:
            divideline.append(row)
    divideline = np.array(divideline,dtype='float64')
    
    divide_sec = 60*divideline.T[0]+divideline.T[1]
    peoplelist = np.array(divideline.T[2],dtype='int32')
    
    diff = divide_sec[1:]-divide_sec[:-1]
    if min(diff) < 0:
        print('divide_time value is wrong')
        print('o・∇・o')
    
    return divide_sec,peoplelist

#divide_sec,peoplelist = loadDivideLine(csvpath)

In [4]:
def countWavefile(dirpath,peoplelist):
    #既にあるwavファイル数をカウントする
    #ディレクトリなければmkdir
    number_of_data = {}
    directory_path = {}
    for people in set(peoplelist):
        people = int(people)
        if people == 0:
            continue
        if not os.path.exists(dirpath+str(people)):
            subprocess.call('mkdir '+dirpath+str(people))
        number_of_data[people] = len(os.listdir(dirpath+str(people)))
        directory_path[people] = dirpath+str(people)+'/'
            
    return number_of_data,directory_path

#countWavefile(dirpath,peoplelist)

In [5]:
def loadWavefile(filename,rate=44100,mono=True):
    #wavファイル読み込み
    if not os.path.exists(filename):
        print('wav data is nothing')
        print('o・∇・o')
    y,sr = librosa.load(filename,sr=rate,mono=mono)
    return y,sr

def outputWavefile(filepath,y,rate=44100):
    #wavファイル出力
    if os.path.exists(filepath):
        print('wav data is already there')
        print('o・∇・o')
    librosa.output.write_wav(filepath,y,rate)

In [6]:
def divideWavefile(csvpath,wavpath,dirpath):
    
    #分割時刻データ読み込み
    divide_sec,peoplelist = loadDivideLine(csvpath)
    
    #Wavファイル数カウント
    #なければmkdir
    number_of_data,directory_path = countWavefile(dirpath,peoplelist)
        
    #wav data読み込み
    print('Loading wav data')
    wave, fs = loadWavefile(wavpath)
    
    #wav data分割
    last_time = 0.0
    for time,people in zip(divide_sec,peoplelist):
        
        if people == 0:
            last_time = time
            continue
            
        s = int(last_time * fs)
        e = int(time * fs)
        split_data_path = directory_path[people]+str(number_of_data[people])+'.wav'
        print(last_time,people,split_data_path)
        wave_part = wave[s:e]
        outputWavefile(split_data_path,wave_part)
            
        number_of_data[people] += 1
        last_time = time

In [7]:
#divideWavefile(csvpath,wavpath,dirpath)

Loading wav data
4.0 1 ../data/LearningData/magiarecord/1/101.wav
6.5 12 ../data/LearningData/magiarecord/12/39.wav
9.0 2 ../data/LearningData/magiarecord/2/26.wav
18.0 1 ../data/LearningData/magiarecord/1/102.wav
19.5 2 ../data/LearningData/magiarecord/2/27.wav
30.5 1 ../data/LearningData/magiarecord/1/103.wav
32.0 12 ../data/LearningData/magiarecord/12/40.wav
39.1 2 ../data/LearningData/magiarecord/2/28.wav
47.0 1 ../data/LearningData/magiarecord/1/104.wav
64.0 2 ../data/LearningData/magiarecord/2/29.wav
66.7 12 ../data/LearningData/magiarecord/12/41.wav
74.8 12 ../data/LearningData/magiarecord/12/42.wav
78.7 1 ../data/LearningData/magiarecord/1/105.wav
82.0 12 ../data/LearningData/magiarecord/12/43.wav
93.8 2 ../data/LearningData/magiarecord/2/30.wav
96.0 12 ../data/LearningData/magiarecord/12/44.wav
101.4 1 ../data/LearningData/magiarecord/1/106.wav
104.6 12 ../data/LearningData/magiarecord/12/45.wav
109.0 1 ../data/LearningData/magiarecord/1/107.wav
110.8 12 ../data/LearningData/m

1049.1 12 ../data/LearningData/magiarecord/12/83.wav
1051.2 14 ../data/LearningData/magiarecord/14/31.wav
1053.0 12 ../data/LearningData/magiarecord/12/84.wav
1061.8 1 ../data/LearningData/magiarecord/1/172.wav
1064.8 14 ../data/LearningData/magiarecord/14/32.wav
1067.5 1 ../data/LearningData/magiarecord/1/173.wav
1069.0 14 ../data/LearningData/magiarecord/14/33.wav
1073.0 1 ../data/LearningData/magiarecord/1/174.wav
1074.5 14 ../data/LearningData/magiarecord/14/34.wav
1084.5 1 ../data/LearningData/magiarecord/1/175.wav
1088.4 14 ../data/LearningData/magiarecord/14/35.wav
1094.7 1 ../data/LearningData/magiarecord/1/176.wav
1100.2 14 ../data/LearningData/magiarecord/14/36.wav
1104.5 1 ../data/LearningData/magiarecord/1/177.wav
1110.9 1 ../data/LearningData/magiarecord/1/178.wav
1113.0 14 ../data/LearningData/magiarecord/14/37.wav
1116.3 41 ../data/LearningData/magiarecord/41/14.wav
1118.3 12 ../data/LearningData/magiarecord/12/85.wav
1121.0 1 ../data/LearningData/magiarecord/1/179.wav
1

In [8]:
#それぞれのキャラのデータサイズ確認

path = "../data/LearningData/magiarecord/"

allsize = 0
sizedata = []
for a in range(1,101):
    sizesum = 0
    filepath = path + str(a)
    if not os.path.exists(filepath):
        continue
    filelist = os.listdir(path+str(a)+'/')
    for f in filelist:
        size = os.path.getsize(filepath+'/'+f)
        sizesum += size
    if sizesum < 1000:
        continue
    sizesum = sizesum / 1024 / 1024
    if sizesum > 1024:
        print(a,round(sizesum/1024,3),'GB')
    else:
        print(a,round(sizesum,3),'MB')
    allsize += sizesum
print('\nall',round(allsize/1024,3),'GB')

1 334.162 MB
2 150.041 MB
3 88.551 MB
4 84.414 MB
5 67.074 MB
11 80.439 MB
12 123.911 MB
13 87.449 MB
14 93.53 MB
21 123.116 MB
22 119.165 MB
23 67.46 MB
31 5.855 MB
36 44.137 MB
37 42.305 MB
41 4.661 MB
42 4.593 MB

all 1.485 GB
