## tjaファイルのパース

In [12]:
import soundfile as sf
import matplotlib.pyplot as plt
import numpy as np
import os
from glob import glob
from scipy import signal
from scipy.fftpack import fft
from librosa.filters import mel
from librosa.display import specshow
from librosa import stft
from librosa.effects import pitch_shift
import pickle
import sys
from numba import jit, prange
from sklearn.preprocessing import normalize

In [45]:
def import_tja(filename, diff=False):
    
    timestamp = []
    title = ''
    bpm = 100
    wav = ''
    now = 0  # for offset
    measure = [4,4]
    
    with open(filename, "rb") as f:

        while True:
        
            line = f.readline()
            
            try:
                line = line.decode('utf-8')
            except UnicodeDecodeError:
                line = line.decode('sjis')
            
            # findメソッドは引数に渡されたパターンんが最初に現れるインデックスを返す else: return -1
            if line.find('//') != -1:
                line = line[:line.find('//')]
            if line[0:5] == 'TITLE':
                title = line[6:-2]
            elif line[0:4] == 'BPM:':
                bpm = float(line[4:-2])
            elif line [0:6] == 'OFFSET':
                now = -float(line[7:-2])
            elif line == '#START\r\n':
                break
            
        sound = []
        while True:

            line = f.readline()
            
            try:
                line = line.decode('utf-8')
            except UnicodeDecodeError:
                line = line.decode('sjis')


            if line.find('//') != -1:
                line = line[:line.find('//')]

            if line[0] <= '9' and line[0] >= '0':
                if line.find(',') != -1:
                    sound += line[0:line.find(',')]
                    beat = len(sound)

                    for i in range(beat):
                        if diff:
                            if int(sound[i]) in (1,3,5,6,7):  # ドン
                                timestamp.append(
                                    [int(100*(now+i*60*measure[0]/bpm/beat))/100,1])  # 1 / beat秒のタイムスタンプと，トークン(1,2)
                            elif int(sound[i] in (2,4)):  # カ
                                timestamp.append(
                                    [int(100*(now+i*60*measure[0]/bpm/beat))/100,2])
                        else:
                            if int(sound[i]) != 0:
                                   timestamp.append(
                                   [int(100*(now+i*60*measure[0]/bpm/beat))/100,int(sound[i])])

                    now += 60/bpm*measure[0]  # 一小節終わり (一小節分の秒数)
                    sound = []

                else:
                    sound += line[0:-2]

            elif line[0] == ',':
                now += 60/bpm*measure[0]
            elif line[0:10] == '#BPMCHANGE':
                bpm = float(line[11:-2])
            elif line[0:8] == '#MEASURE':
                measure[0] = int(line[line.find('/')-1])
                measure[1] = int(line[line.find('/')+1])
            elif line[0:6] == '#DELAY':
                now += float(line[7:-2])
            elif line[0:4] == "#END":
                break
                    
    return np.array(timestamp)

In [52]:
data, samplerate = sf.read('taitatsudata_24karats TRIBE OF GOLD_24karats TRIBE OF GOLD.ogg', always_2d=True)

In [47]:
timestamp = import_tja('taitatsudata_24karats TRIBE OF GOLD_24karats TRIBE OF GOLD.tja')

In [50]:
timestamp[:10]

array([[6.  , 1.  ],
       [6.29, 2.  ],
       [6.43, 1.  ],
       [6.72, 2.  ],
       [6.87, 1.  ],
       [7.16, 2.  ],
       [7.31, 1.  ],
       [7.45, 2.  ],
       [7.6 , 2.  ],
       [7.74, 1.  ]])

In [60]:
def synthesize(diff=True, don='../data/data_don.wav', ka='../data/data_ka.wav'):
    
    donsound = sf.read(don)[0]
    kasound = sf.read(ka)[0]
    donlen = len(donsound)
    kalen = len(kasound)
    
    if diff is True:
        for stamp in timestamp:
            
            timing = int(stamp[0]*samplerate)  # fps分の値を確保する
            
            try:
                if stamp[1] in (1,3,5,6,7):
                    data[timing:timing+donlen] += donsound
                elif stamp[1] in (2,4):
                    data[timing:timing+kalen] += kasound
            
            except ValueError:
                pass
            
    elif diff == 'don':
        if isinstance(self.timestamp[0], tuple):
            for stamp in self.timestamp:
                if stamp*self.samplerate+donlen < self.data.shape[0]:
                    self.data[int(stamp[0]*self.samplerate):int(stamp[0]*self.samplerate) + donlen] += donsound
        else:
            for stamp in self.timestamp:
                if stamp*self.samplerate+donlen < self.data.shape[0]:
                    self.data[int(stamp*self.samplerate):int(stamp*self.samplerate) + donlen] += donsound
    
    elif diff == 'ka':
        if isinstance(self.timestamp[0], tuple):
            for stamp in self.timestamp:
                if stamp*self.samplerate+kalen < self.data.shape[0]:
                    self.data[int(stamp[0]*self.samplerate):int(stamp[0]*self.samplerate) + kalen] += kasound
        else:
            for stamp in self.timestamp:
                if stamp*self.samplerate+kalen < self.data.shape[0]:
                    self.data[int(stamp*self.samplerate):int(stamp*self.samplerate) + kalen] += kasound
        

In [70]:
synthesize()

- とりあえずドンとカの音をタイムスタンプ化し，音を原曲に加えることに成功．

In [72]:
sf.write('new.wav', data, samplerate)