In [1]:
# -*- coding: utf-8 -*-

import os
import csv
import numpy as np
import subprocess
import librosa

In [2]:
dirpath = "../data/LearningData/magiarecord/"
csvpath = "../data/LearningData/magiarecord/csvdata/main_5-7.csv"
wavpath = "../data/LearningData/magiarecord/main/main_5-7.wav"

In [3]:
def loadDivideLine(filepath):
    #音声の区切り位置読み込み
    #input [min.、sec.、人物ID]
    #output1 [sec., ...]
    #output2 [id1, ...]
    if not os.path.exists(filepath):
        print('divide_time data is nothing')
        print('o・∇・o')
    
    divideline = []
    with open(filepath, 'r') as f:
        reader = csv.reader(f)
        for row in reader:
            divideline.append(row)
    divideline = np.array(divideline,dtype='float64')
    
    divide_sec = 60*divideline.T[0]+divideline.T[1]
    peoplelist = np.array(divideline.T[2],dtype='int32')
    
    diff = divide_sec[1:]-divide_sec[:-1]

    if min(diff) < 0:
        print('divide_time value is wrong')
        print('o・∇・o')
        return -1,-1
    
    return divide_sec,peoplelist

#divide_sec,peoplelist = loadDivideLine(csvpath)

In [4]:
def countWavefile(dirpath,peoplelist):
    #既にあるwavファイル数をカウントする
    #ディレクトリなければmkdir
    number_of_data = {}
    directory_path = {}
    for people in set(peoplelist):
        people = int(people)
        if people == 0:
            continue
        if not os.path.exists(dirpath+str(people)):
            subprocess.call('mkdir '+dirpath+str(people))
        number_of_data[people] = len(os.listdir(dirpath+str(people)))
        directory_path[people] = dirpath+str(people)+'/'
            
    return number_of_data,directory_path

#countWavefile(dirpath,peoplelist)

In [5]:
def loadWavefile(filename,rate=44100,mono=True):
    #wavファイル読み込み
    if not os.path.exists(filename):
        print('wav data is nothing')
        print('o・∇・o')
    y,sr = librosa.load(filename,sr=rate,mono=mono)
    return y,sr

def outputWavefile(filepath,y,rate=44100):
    #wavファイル出力
    if os.path.exists(filepath):
        print('wav data is already there')
        print('o・∇・o')
    librosa.output.write_wav(filepath,y,rate)

In [6]:
def divideWavefile(csvpath,wavpath,dirpath):
    
    #分割時刻データ読み込み
    divide_sec,peoplelist = loadDivideLine(csvpath)
    
    
    #Wavファイル数カウント
    #なければmkdir
    number_of_data,directory_path = countWavefile(dirpath,peoplelist)
        
    #wav data読み込み
    print('Loading wav data')
    wave, fs = loadWavefile(wavpath)
    
    #wav data分割
    last_time = 0.0
    for time,people in zip(divide_sec,peoplelist):
        
        if people == 0:
            last_time = time
            continue
            
        s = int(last_time * fs)
        e = int(time * fs)
        split_data_path = directory_path[people]+str(number_of_data[people])+'.wav'
        print(last_time,people,split_data_path)
        wave_part = wave[s:e]
        outputWavefile(split_data_path,wave_part)
            
        number_of_data[people] += 1
        last_time = time

In [7]:
#divideWavefile(csvpath,wavpath,dirpath)

Loading wav data
9.0 12 ../data/LearningData/magiarecord/12/297.wav
17.45 1 ../data/LearningData/magiarecord/1/1626.wav
20.878 13 ../data/LearningData/magiarecord/13/136.wav
25.588 1 ../data/LearningData/magiarecord/1/1627.wav
30.603 12 ../data/LearningData/magiarecord/12/298.wav
36.139 11 ../data/LearningData/magiarecord/11/122.wav
38.771 12 ../data/LearningData/magiarecord/12/299.wav
42.274 11 ../data/LearningData/magiarecord/11/123.wav
47.687 4 ../data/LearningData/magiarecord/4/400.wav
53.162 3 ../data/LearningData/magiarecord/3/382.wav
57.177 2 ../data/LearningData/magiarecord/2/890.wav
60.442 1 ../data/LearningData/magiarecord/1/1628.wav
63.103 11 ../data/LearningData/magiarecord/11/124.wav
67.131 11 ../data/LearningData/magiarecord/11/125.wav
76.5 13 ../data/LearningData/magiarecord/13/137.wav
80.0 12 ../data/LearningData/magiarecord/12/300.wav
84.26 1 ../data/LearningData/magiarecord/1/1629.wav
96.351 1 ../data/LearningData/magiarecord/1/1630.wav
98.685 1 ../data/LearningData/m

990.539 1 ../data/LearningData/magiarecord/1/1664.wav
1000.222 5 ../data/LearningData/magiarecord/5/144.wav
1003.243 1 ../data/LearningData/magiarecord/1/1665.wav
1006.0 5 ../data/LearningData/magiarecord/5/145.wav
1008.32 1 ../data/LearningData/magiarecord/1/1666.wav
1013.026 5 ../data/LearningData/magiarecord/5/146.wav
1018.02 1 ../data/LearningData/magiarecord/1/1667.wav
1020.291 5 ../data/LearningData/magiarecord/5/147.wav
1022.62 1 ../data/LearningData/magiarecord/1/1668.wav
1025.628 5 ../data/LearningData/magiarecord/5/148.wav
1029.211 1 ../data/LearningData/magiarecord/1/1669.wav
1035.512 5 ../data/LearningData/magiarecord/5/149.wav
1049.991 2 ../data/LearningData/magiarecord/2/909.wav
1052.964 1 ../data/LearningData/magiarecord/1/1670.wav
1060.451 5 ../data/LearningData/magiarecord/5/150.wav
1065.368 1 ../data/LearningData/magiarecord/1/1671.wav
1071.996 5 ../data/LearningData/magiarecord/5/151.wav
1073.936 3 ../data/LearningData/magiarecord/3/393.wav
1081.412 5 ../data/Learnin

In [8]:
#それぞれのキャラのデータサイズ確認

path = "../data/LearningData/magiarecord/"
charcsvpath = "../data/LearningData/magiarecord/csvdata/character_import.csv"

char = {}
with open(charcsvpath,'r')as f:
    data = csv.reader(f)
    for low in data:
        char[int(low[0])] = str(low[1])
        

allsize = 0
sizedata = []
for a in range(1,51):
    sizesum = 0
    filepath = path + str(a)
    if not os.path.exists(filepath):
        continue
    filelist = os.listdir(path+str(a)+'/')
    for f in filelist:
        size = os.path.getsize(filepath+'/'+f)
        sizesum += size
    #if sizesum < 1000:
    #    continue
    sizesum = sizesum / 1024 / 1024
    print(a,char[a],round(sizesum,3),'MB')
    allsize += sizesum
print('\nall',round(allsize/1024,3),'GB')

1 環いろは 1590.763 MB
2 七海やちよ 882.288 MB
3 由比鶴乃 388.09 MB
4 深月フェリシア 355.503 MB
5 二葉さな 232.382 MB
6 ウワサの鶴乃 52.41 MB
11 秋野かえで 169.762 MB
12 十咎ももこ 333.866 MB
13 水波レナ 231.02 MB
14 八雲みたま 137.971 MB
15 雪野かなえ 53.688 MB
16 安名メル 47.195 MB
17 和泉十七夜（仮置） 0.0 MB
21 鹿目まどか（振袖含） 188.977 MB
22 暁美ほむら 180.973 MB
23 巴マミ（突然失礼含） 84.547 MB
24 佐倉杏子 197.568 MB
25 美樹さやか 0.0 MB
31 環うい 22.74 MB
32 里見灯火 22.238 MB
33 柊ねむ 17.538 MB
34 アリナグレイ 95.435 MB
35 梓みふゆ 58.925 MB
36 天音月咲 80.222 MB
37 天音月夜 131.232 MB
38 黒羽根（ミザリー） 22.405 MB
39 黒羽根（アイ） 1.307 MB
41 もきゅ 13.642 MB
42 キュゥべえ 4.593 MB
43 ウワサ 27.736 MB
44 給水おじさん 6.362 MB
45 アイ 53.289 MB

all 5.551 GB
