In [1]:
import pydub
import numpy as np
from python_speech_features import mfcc, delta, logfbank
import librosa
import torch
from os import listdir

In [2]:
path2mp3 = '../data/dev/1KFUSfbcwfw50002.mp3'
path2txt = '../data/dev/1KFUSfbcwfw50002.txt'

In [3]:
def readMP3(f, normalized=False):
    """MP3 to numpy array"""
    a = pydub.AudioSegment.from_mp3(f)
    y = np.array(a.get_array_of_samples())
    if a.channels == 2:
        y = y.reshape((-1, 2))
    if normalized:
        return a.frame_rate, np.float32(y) / 2**15
    else:
        return a.frame_rate, y

In [4]:
rate, sig = readMP3(path2mp3)

In [5]:
sig.shape

(90112,)

In [6]:
mfcc_feat = mfcc(sig,rate)

In [7]:
mfcc_feat.shape

(562, 13)

In [8]:
d_mfcc_feat = delta(mfcc_feat, 2)

In [9]:
d_mfcc_feat.shape

(562, 13)

In [10]:
fbank_feat = logfbank(sig,rate)

In [11]:
fbank_feat.shape

(562, 26)

In [12]:
mfcc2 = zip(*mfcc(sig,rate)) #mfcc(sig,rate) #

In [13]:
mfcc2 = np.stack([np.array(i) for i in mfcc2])

In [14]:
mfcc2.shape

(13, 562)

In [15]:
cc = np.expand_dims(np.expand_dims(mfcc2,axis=0),axis=0)

In [16]:
cc.shape

(1, 1, 13, 562)

In [17]:
cct = torch.autograd.Variable(torch.from_numpy(cc.astype(float)).float())

In [18]:
cct.shape

torch.Size([1, 1, 13, 562])

In [240]:
def get_audio_feat_librosa(input_file, dim=13, window_size=25, stride=10, 
                      feature='mfcc', cmvn=False, delta=False, delta_delta=False, save_feature=None):
    y, sr = librosa.load(input_file,sr=None)
    ws = int(sr*0.001*window_size)
    st = int(sr*0.001*stride)
    if feature == 'fbank': # log-scaled
        feat = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=dim,
                                    n_fft=ws, hop_length=st)
        feat = np.log(feat+1e-6)
    elif feature == 'mfcc':
        feat = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=dim, n_mels=26,
                                    n_fft=ws, hop_length=st)
        feat[0] = librosa.feature.rmse(y, hop_length=st, frame_length=ws) 
        
    else:
        raise ValueError('Unsupported Acoustic Feature: '+feature)

    feat = [feat]
    if delta:
        feat.append(librosa.feature.delta(feat[0]))

    if delta_delta:
        feat.append(librosa.feature.delta(feat[0],order=2))
    feat = np.concatenate(feat,axis=0)
    if cmvn:
        feat = (feat - feat.mean(axis=1)[:,np.newaxis]) / (feat.std(axis=1)+1e-16)[:,np.newaxis]
    if save_feature is not None:
        tmp = np.swapaxes(feat,0,1).astype('float32')
        np.save(save_feature,tmp)
        return len(tmp)
    else:
        return np.swapaxes(feat,0,1).astype('float32')

In [20]:
a1 = get_audio_feat_librosa(path2mp3)
a1

array([[ 5.3055843e-05,  1.3473907e+01, -1.5743690e+00, ...,
        -4.4256482e+00,  2.1285338e+00,  5.8058966e-02],
       [ 5.2968004e-05,  1.0580042e+01, -3.9050567e+00, ...,
         2.5809073e-01,  2.3016870e+00, -5.0932455e+00],
       [ 6.6001980e-05,  6.7694855e+00, -1.1243533e+01, ...,
         1.2006351e+00,  8.2259731e+00, -9.6584007e-02],
       ...,
       [ 3.1970714e-05,  0.0000000e+00, -6.2602315e-14, ...,
        -7.4604954e-14,  0.0000000e+00,  6.7853838e-14],
       [ 0.0000000e+00,  0.0000000e+00, -6.2602315e-14, ...,
        -7.4604954e-14,  0.0000000e+00,  6.7853838e-14],
       [ 0.0000000e+00,  0.0000000e+00, -6.2602315e-14, ...,
        -7.4604954e-14,  0.0000000e+00,  6.7853838e-14]], dtype=float32)

In [21]:
a1.shape

(573, 13)

In [241]:
def get_audio_feat_psf(input_file, dim=13, window_size=25, stride=10):
    sig, rate = librosa.load(input_file, sr=None)
    feat = mfcc(sig, samplerate=rate, numcep=dim, winlen=window_size/1000, winstep=stride/1000)
    return feat

In [242]:
def get_audio_feat(input_file, dim=13, window_size=25, stride=10, method='psf'):
    if method == 'psf':
        feat = get_audio_feat_psf(input_file, dim, window_size, stride)
    else:
        feat = get_audio_feat_librosa(input_file, dim, window_size, stride, cmvn=True)
    mfcc = zip(*feat)
    mfcc = np.stack([np.array(i) for i in mfcc])
    cc = np.expand_dims(mfcc,axis=0)
    #cc = np.expand_dims(np.expand_dims(mfcc, axis=0),axis=0)
    cct = torch.autograd.Variable(torch.from_numpy(cc.astype(float)).float())
    return cct

In [231]:
get_audio_feat(path2mp3, method='psf')



tensor([[[-14.0856, -10.7070, -10.4366,  ...,  -7.9894, -10.8911, -15.0372],
         [-23.4145, -10.6640,  -9.3113,  ..., -15.3147, -16.6253, -15.8784],
         [-23.0852, -26.4813, -24.0595,  ..., -36.7484, -28.5153, -26.2736],
         ...,
         [-15.4084, -40.7633, -35.0773,  ...,  -6.2392,  -2.6497,   1.2092],
         [  6.4542,   4.2020,   3.6008,  ..., -28.5293, -19.0213, -21.8093],
         [ -1.5756,  -2.9960,  -0.5695,  ...,  -6.0823,  -0.3115,   2.0276]]])

In [25]:
torch.flip(get_audio_feat(path2mp3, method='psf'), [1])

tensor([[-3.6044e+01, -1.8148e+01, -1.5037e+01,  ..., -1.1098e+01,
         -1.3980e+01, -1.4588e+01],
        [ 0.0000e+00, -1.1806e+01, -1.5878e+01,  ..., -1.0463e+01,
         -2.4009e+01, -2.2678e+01],
        [-3.2076e-14, -1.3636e+01, -2.6274e+01,  ..., -2.7164e+01,
         -2.5822e+01, -2.1032e+01],
        ...,
        [-3.2777e-13,  2.0018e+00,  1.2092e+00,  ..., -2.9953e+01,
         -2.0820e+01, -1.7548e+01],
        [ 0.0000e+00,  9.2892e+00, -2.1809e+01,  ...,  1.1812e+01,
         -1.8519e+00,  6.6752e+00],
        [ 3.5948e-13, -1.3227e+01,  2.0276e+00,  ...,  7.3629e-01,
         -4.6350e+00, -5.0158e+00]])

In [243]:
get_audio_feat(path2mp3, method='librosa').shape

torch.Size([1, 13, 573])

In [27]:
y, sr = librosa.load(path2mp3,sr=None)
y

array([9.1552734e-05, 6.1035156e-05, 6.1035156e-05, ..., 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00], dtype=float32)

In [28]:
rate, sig = readMP3(path2mp3)
sig

array([-10, -24, -35, ...,  -5,   7,  13], dtype=int16)

In [29]:
np.array_equal(get_audio_feat(path2mp3, method='psf'), get_audio_feat(path2mp3, method='librosa'))

False

In [32]:
get_audio_feat_librosa(path2mp3, feature='fbank', dim=26, cmvn=False)

array([[-13.773326 , -13.73917  , -13.736883 , ..., -13.813982 ,
        -13.81537  , -13.815215 ],
       [-13.795713 , -13.773858 , -13.7577505, ..., -13.813632 ,
        -13.815297 , -13.815288 ],
       [-13.785141 , -13.799453 , -13.808878 , ..., -13.807434 ,
        -13.815107 , -13.815192 ],
       ...,
       [-13.815437 , -13.815411 , -13.815408 , ..., -13.81551  ,
        -13.815511 , -13.815511 ],
       [-13.815511 , -13.815511 , -13.815511 , ..., -13.815511 ,
        -13.815511 , -13.815511 ],
       [-13.815511 , -13.815511 , -13.815511 , ..., -13.815511 ,
        -13.815511 , -13.815511 ]], dtype=float32)

In [33]:
fbank_feat

array([[2.94606475, 5.30000278, 5.43647196, ..., 6.61245851, 2.72615739,
        2.67667794],
       [2.59755188, 5.60831071, 5.4310289 , ..., 6.6993696 , 3.16617381,
        2.85343087],
       [2.1105837 , 5.67809607, 5.26407824, ..., 6.28173124, 1.76820534,
        1.90300327],
       ...,
       [6.26139481, 8.2433599 , 9.6849295 , ..., 6.53663859, 5.04233776,
        4.64273055],
       [4.93133168, 5.24287202, 5.33836367, ..., 6.36564206, 5.8763119 ,
        5.88703613],
       [1.23639138, 2.64273511, 4.17202093, ..., 5.00450562, 3.48012979,
        3.4426068 ]])

In [34]:
with open(path2txt, 'r') as f:
    content = f.readline()
k = np.array([ord(c) - 32 for c in content.replace('Text:', '').strip()])
k = torch.autograd.Variable(torch.from_numpy(k.astype(int)).int())

In [35]:
k

tensor([41,  0, 35, 33, 46,  7, 52,  0, 36, 37, 51, 35, 50, 41, 34, 37,  0, 52,
        47,  0, 57, 47, 53,  0, 40, 47, 55,  0, 49, 53, 41, 35, 43, 44, 57,  0,
        41,  0, 55, 33, 46, 52, 37, 36,  0, 52, 47,  0, 36, 41, 54, 37,  0, 41,
        46, 52, 47,  0, 46, 53, 45, 34, 46, 37, 51, 51,  0, 39, 37, 52,  0, 33,
        55, 33, 57,  0, 38, 50, 47, 45,  0, 52, 40, 37], dtype=torch.int32)

In [36]:
torch.flip(k, [0])

tensor([37, 40, 52,  0, 45, 47, 50, 38,  0, 57, 33, 55, 33,  0, 52, 37, 39,  0,
        51, 51, 37, 46, 34, 45, 53, 46,  0, 47, 52, 46, 41,  0, 37, 54, 41, 36,
         0, 47, 52,  0, 36, 37, 52, 46, 33, 55,  0, 41,  0, 57, 44, 43, 35, 41,
        53, 49,  0, 55, 47, 40,  0, 53, 47, 57,  0, 47, 52,  0, 37, 34, 41, 50,
        35, 51, 37, 36,  0, 52,  7, 46, 33, 35,  0, 41], dtype=torch.int32)

In [37]:
np.array(list(map(lambda x: chr(x + 32), k)))

array(['I', ' ', 'C', 'A', 'N', "'", 'T', ' ', 'D', 'E', 'S', 'C', 'R',
       'I', 'B', 'E', ' ', 'T', 'O', ' ', 'Y', 'O', 'U', ' ', 'H', 'O',
       'W', ' ', 'Q', 'U', 'I', 'C', 'K', 'L', 'Y', ' ', 'I', ' ', 'W',
       'A', 'N', 'T', 'E', 'D', ' ', 'T', 'O', ' ', 'D', 'I', 'V', 'E',
       ' ', 'I', 'N', 'T', 'O', ' ', 'N', 'U', 'M', 'B', 'N', 'E', 'S',
       'S', ' ', 'G', 'E', 'T', ' ', 'A', 'W', 'A', 'Y', ' ', 'F', 'R',
       'O', 'M', ' ', 'T', 'H', 'E'], dtype='<U1')

In [219]:
def get_txt_as_tensor(txt_file):
    with open(txt_file, 'r') as f:
         content = f.readline()
    ascii = np.array([ord(c) - 32 for c in content.replace('Text:', '').strip()])
    ascii = np.append(ascii, [-2])
    ascii = np.insert(ascii, 0, [-1])
    ascii = np.expand_dims(ascii,axis=0)
    ascii = torch.autograd.Variable(torch.from_numpy(ascii.astype(float)).float())
    return ascii

In [220]:
get_txt_as_tensor(path2txt)

tensor([[-1., 41.,  0., 35., 33., 46.,  7., 52.,  0., 36., 37., 51., 35., 50.,
         41., 34., 37.,  0., 52., 47.,  0., 57., 47., 53.,  0., 40., 47., 55.,
          0., 49., 53., 41., 35., 43., 44., 57.,  0., 41.,  0., 55., 33., 46.,
         52., 37., 36.,  0., 52., 47.,  0., 36., 41., 54., 37.,  0., 41., 46.,
         52., 47.,  0., 46., 53., 45., 34., 46., 37., 51., 51.,  0., 39., 37.,
         52.,  0., 33., 55., 33., 57.,  0., 38., 50., 47., 45.,  0., 52., 40.,
         37., -2.]])

In [39]:
def list_files(directory, extension):
    return np.array(list((f for f in listdir(directory) if f.endswith('.' + extension))))

In [40]:
mp3files = list_files('../data/dev/', 'mp3')
txtfiles = list_files('../data/dev/', 'txt')

In [41]:
def pad_tensor(vec, pad, dim):
    """
    args:
        vec - tensor to pad
        pad - the size to pad to
        dim - dimension to pad

    return:
        a new tensor padded to 'pad' in dimension 'dim'
    """
    pad_size = list(vec.shape)
    pad_size[dim] = pad - vec.size(dim)
    return torch.cat([vec, torch.zeros(*pad_size)], dim=dim)

In [244]:
f = 0
for mp3file in mp3files:
    mp3tensor = get_audio_feat('../data/dev/' + mp3file)
    print(mp3tensor.shape)
    if (f > 0):
        if (mp3tensor.shape[2] > mp3tensors.shape[2]):
            mp3tensors = pad_tensor(mp3tensors, mp3tensor.shape[2], 2)
        elif (mp3tensor.shape[2] < mp3tensors.shape[2]):
            mp3tensor = pad_tensor(mp3tensor, mp3tensors.shape[2], 2)
        mp3tensors = torch.cat((mp3tensors,  mp3tensor), 0)
        print("\t%s, %s, %s" % (mp3tensors.shape[0], mp3tensors.shape[1], mp3tensors.shape[2]))
    else:
        mp3tensors = mp3tensor
    f = f + 1
    #if f == 5:
    #    break

torch.Size([1, 13, 615])
torch.Size([1, 13, 604])
	2, 13, 615
torch.Size([1, 13, 265])
	3, 13, 615
torch.Size([1, 13, 489])
	4, 13, 615
torch.Size([1, 13, 179])
	5, 13, 615
torch.Size([1, 13, 226])
	6, 13, 615
torch.Size([1, 13, 615])
	7, 13, 615
torch.Size([1, 13, 179])
	8, 13, 615
torch.Size([1, 13, 442])
	9, 13, 615
torch.Size([1, 13, 276])
	10, 13, 615
torch.Size([1, 13, 564])
	11, 13, 615
torch.Size([1, 13, 136])
	12, 13, 615
torch.Size([1, 13, 579])
	13, 13, 615
torch.Size([1, 13, 316])
	14, 13, 615
torch.Size([1, 13, 269])
	15, 13, 615
torch.Size([1, 13, 615])
	16, 13, 615
torch.Size([1, 13, 175])
	17, 13, 615
torch.Size([1, 13, 201])
	18, 13, 615
torch.Size([1, 13, 366])
	19, 13, 615
torch.Size([1, 13, 103])
	20, 13, 615
torch.Size([1, 13, 201])
	21, 13, 615
torch.Size([1, 13, 294])
	22, 13, 615
torch.Size([1, 13, 327])
	23, 13, 615
torch.Size([1, 13, 525])
	24, 13, 615
torch.Size([1, 13, 597])
	25, 13, 615
torch.Size([1, 13, 323])
	26, 13, 615
torch.Size([1, 13, 226])
	27, 13,

In [61]:
mp3tensors.shape

torch.Size([108, 13, 629])

In [283]:
f = 0
for txtfile in txtfiles:
    txttensor = get_txt_as_tensor('../data/dev/' + txtfile)
    print(txttensor.shape)
    if (f > 0):
        if (txttensor.shape[1] > txttensors.shape[1]):
            txttensors = pad_tensor(txttensors, txttensor.shape[1], 1)
        elif (txttensor.shape[1] < txttensors.shape[1]):
            txttensor = pad_tensor(txttensor, txttensors.shape[1], 1)
        txttensors = torch.cat((txttensors,  txttensor), 0)
        print("\t%s, %s" % (txttensors.shape[0], txttensors.shape[1]))
    else:
        txttensors = txttensor
    f = f + 1
    #if f == 5:
    #    break

torch.Size([1, 28])
torch.Size([1, 34])
	2, 34
torch.Size([1, 21])
	3, 34
torch.Size([1, 58])
	4, 58
torch.Size([1, 120])
	5, 120
torch.Size([1, 50])
	6, 120
torch.Size([1, 53])
	7, 120
torch.Size([1, 28])
	8, 120
torch.Size([1, 79])
	9, 120
torch.Size([1, 114])
	10, 120
torch.Size([1, 44])
	11, 120
torch.Size([1, 21])
	12, 120
torch.Size([1, 74])
	13, 120
torch.Size([1, 72])
	14, 120
torch.Size([1, 97])
	15, 120
torch.Size([1, 30])
	16, 120
torch.Size([1, 82])
	17, 120
torch.Size([1, 42])
	18, 120
torch.Size([1, 40])
	19, 120
torch.Size([1, 45])
	20, 120
torch.Size([1, 20])
	21, 120
torch.Size([1, 86])
	22, 120
torch.Size([1, 106])
	23, 120
torch.Size([1, 20])
	24, 120
torch.Size([1, 90])
	25, 120
torch.Size([1, 19])
	26, 120
torch.Size([1, 101])
	27, 120
torch.Size([1, 122])
	28, 122
torch.Size([1, 26])
	29, 122
torch.Size([1, 108])
	30, 122
torch.Size([1, 24])
	31, 122
torch.Size([1, 64])
	32, 122
torch.Size([1, 32])
	33, 122
torch.Size([1, 114])
	34, 122
torch.Size([1, 52])
	35, 12

In [222]:
txttensors.shape

torch.Size([108, 139])

In [69]:
from torch import nn
from torch.nn import functional as F
from torch.utils.checkpoint import checkpoint_sequential

In [260]:
def get_lstm_model(input_size=13, hidden_size=256, nb_layers=3, batch_first=True):

    rnns = []
    #batch_norm = SequenceWise(nn.BatchNorm1d(input_size)) if batch_norm else None
    rnn = nn.LSTM(input_size, hidden_size, nb_layers, batch_first) #hidden_size=768, bidirectional=False, bias=True
    #rnn = nn.LSTM(input_size=13, hidden_size=768, bidirectional=False, bias=True)
    #rnns.append(('0', rnn))
    #for x in range(nb_layers - 1):
    #    rnn = nn.LSTM(input_size=13, hidden_size=768, bidirectional=False, bias=True)
    #    rnns.append(('%d' % (x + 1), rnn))
    #rnns = nn.Sequential(OrderedDict(rnns))

    fully_connected = nn.Sequential(
        nn.LSTM(input_size, hidden_size, nb_layers, batch_first),
        nn.BatchNorm1d(256),
        nn.Linear(256, 108, bias=False)
        #,nn.Softmax(dim=1)
    )

    #for x in rnns:
    #    x.flatten_parameters()

    #return rnns
    return rnn

In [261]:
rnn = nn.LSTM(input_size=13, hidden_size=256, num_layers=3, batch_first=True)

In [291]:
fully_connected = nn.Sequential(
        nn.BatchNorm1d(161024),
        nn.Linear(161024, 139, bias=False),
        nn.Softmax(dim=1)
    )
fully_connected2 = nn.Sequential(
        nn.BatchNorm1d(629),
        nn.Linear(256, 1, bias=False),
        nn.Softmax(dim=1)
    )

In [270]:
model = get_lstm_model()

In [271]:
model

LSTM(13, 256, num_layers=3)

In [250]:
learning_rate = 0.005
num_epochs = 200

In [251]:
loss_fn = torch.nn.CrossEntropyLoss(size_average=False)
param = list(rnn.parameters()) + list(fully_connected.parameters())
optimiser = torch.optim.Adam(param, lr=learning_rate, amsgrad=True)

In [286]:
mp3tensorsp = mp3tensors.permute(0, 2, 1)
mp3tensorsp.size()

torch.Size([108, 629, 13])

In [268]:
mp3tensorsp = mp3tensorsp[:, 0:139, :]
mp3tensorsp.size()

torch.Size([108, 139, 13])

In [284]:
txttensorsp = txttensors
txttensorsp.unsqueeze_(-1)
txttensorsp = txttensorsp.expand(108, 139, 1)
txttensorsp.size()

torch.Size([108, 139, 1])

In [257]:
h0 = torch.rand(3, 108, 256)
c0 = torch.rand(3, 108, 256)

In [294]:
txttensors = txttensors.view(108, 139).long()

In [295]:
txttensors.size()

torch.Size([108, 139])

In [297]:
txttensors[9]

tensor([-1, 34, 53, 52,  0, 41, 38,  0, 57, 47, 53,  0, 52, 47, 47, 43,  0, 52,
        40, 41, 51,  0, 19, 16,  0, 48, 37, 50, 35, 37, 46, 52,  0, 52, 40, 37,
        51, 37,  0, 19, 16, 16,  0, 48, 37, 47, 48, 44, 37,  0, 33, 46, 36,  0,
        40, 33, 36,  0, 52, 40, 37, 45,  0, 46, 47, 45, 41, 46, 33, 52, 37,  0,
        52, 40, 37, 41, 50,  0, 38, 50, 41, 37, 46, 36, 51,  0, 33, 46, 36,  0,
        52, 47, 47, 43,  0, 52, 40, 37,  0, 51, 33, 45, 37,  0, 46, 53, 45, 34,
        37, 50,  0, 47, 38, -2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0])

In [296]:
#####################
# Train model
#####################

hist = np.zeros(num_epochs)

for t in range(num_epochs):
    # Clear stored gradient
    model.zero_grad()
    
    # Initialise hidden state
    # Don't do this if you want your LSTM to be stateful
    #model.hidden = model.init_hidden()
    
    # Forward pass
    test = mp3tensorsp[9].view(1, *mp3tensorsp[9].size())
#     print(mp3tensorsp.size())
    
    rnn_out, hidden = rnn(mp3tensorsp, (h0, c0))
    rnn_out = rnn_out.contiguous().view(rnn_out.size(0),-1)
    print(rnn_out.size())
#     print(rnn_out)
#     print('RNN _OUT ')
#     print(rnn_out.size())
    fc_out = fully_connected(rnn_out)
    print(fc_out.size())
    #fc_out = fc_out.permute(0, 2, 1)
#     print(fc_out.size())
    
#     y_pred = model(test)
    #print(txttensorsp.size())
#     print(y_pred[0].size())
    print(txttensors.size())
    #assert False
    loss = loss_fn(fc_out, txttensors)
    #assert False
    if t % 20 == 0:
        print("Epoch ", t, "MSE: ", loss.item())
    hist[t] = loss.item()

    # Zero out gradient, else they will accumulate between epochs
    optimiser.zero_grad()

    # Backward pass
    loss.backward()

    # Update parameters
    optimiser.step()

torch.Size([108, 161024])
torch.Size([108, 139])
torch.Size([108, 139])


RuntimeError: multi-target not supported at /Users/soumith/mc3build/conda-bld/pytorch_1549593514549/work/aten/src/THNN/generic/ClassNLLCriterion.c:21

In [None]:
fully_connected = nn.Sequential(
        nn.BatchNorm1d(629),
        nn.Linear(256, 1, bias=False),
        nn.Linear(256, 1, bias=False),
        nn.Softmax(dim=1)
    )

In [189]:
mp3tensorsp[0][9]

tensor([ -3.7671, -18.7749, -35.1914,   8.1286, -26.1182,  30.0746, -29.1513,
         16.6291,  -8.0482,  13.3221,  -8.8879,   0.8081,  -1.6713])

In [188]:
torch.topk(fc_out[0][9], k=1)

(tensor([0.0127], grad_fn=<TopkBackward>), tensor([75]))

In [110]:
hist

array([10473842., 10473842., 10473842., 10473842., 10473842., 10473842.,
       10473842., 10473842., 10473842., 10473842., 10473842., 10473842.,
       10473842., 10473842., 10473842., 10473842., 10473842., 10473842.,
       10473842., 10473842., 10473842., 10473842., 10473842., 10473842.,
       10473842., 10473842., 10473842., 10473842., 10473842., 10473842.,
       10473842., 10473842., 10473842., 10473842., 10473842., 10473842.,
       10473842., 10473842., 10473842., 10473842., 10473842., 10473842.,
       10473842., 10473842., 10473842., 10473842., 10473842., 10473842.,
       10473842., 10473842., 10473842., 10473842., 10473842., 10473842.,
       10473842., 10473842., 10473842., 10473842., 10473842., 10473842.,
       10473842., 10473842., 10473842., 10473842., 10473842., 10473842.,
       10473842., 10473842., 10473842., 10473842., 10473842., 10473842.,
       10473842., 10473842., 10473842., 10473842., 10473842., 10473842.,
       10473842., 10473842., 10473842., 10473842., 

In [300]:
def _init_index_dict():
    index_list = [i for i in range(len(__char_list))]
    __index_of_str = dict(zip(__char_list, index_list))
    __char_of_index = dict(zip(index_list, __char_list))
    __total_num = index_list[-1] + 1
    return __index_of_str, __char_of_index, __total_num

def get_index_of(character):
    return __index_of_str[character]

def get_char_of(index):
    return __char_of_index[index]

def get_total_num():
    return __total_num

In [301]:
__char_list = (
    'A',
    'B',
    'C',
    'D',
    'E',
    'F',
    'G',
    'H',
    'I',
    'J',
    'K',
    'L',
    'M',
    'N',
    'O',
    'P',
    'Q',
    'R',
    'S',
    'T',
    'U',
    'V',
    'W',
    'X',
    'Y',
    'Z',
    '1',
    '2',
    '3',
    '4',
    '5',
    '6',
    '7',
    '8',
    '9',
    '0',
    '<sos>',
    '<eos>',
    '<pad>',
    '\'',
)
__index_of_str, __char_of_index, __total_num = _init_index_dict()

In [302]:
__index_of_str

{'A': 0,
 'B': 1,
 'C': 2,
 'D': 3,
 'E': 4,
 'F': 5,
 'G': 6,
 'H': 7,
 'I': 8,
 'J': 9,
 'K': 10,
 'L': 11,
 'M': 12,
 'N': 13,
 'O': 14,
 'P': 15,
 'Q': 16,
 'R': 17,
 'S': 18,
 'T': 19,
 'U': 20,
 'V': 21,
 'W': 22,
 'X': 23,
 'Y': 24,
 'Z': 25,
 '1': 26,
 '2': 27,
 '3': 28,
 '4': 29,
 '5': 30,
 '6': 31,
 '7': 32,
 '8': 33,
 '9': 34,
 '0': 35,
 '<sos>': 36,
 '<eos>': 37,
 '<pad>': 38,
 "'": 39}

In [303]:
__char_of_index

{0: 'A',
 1: 'B',
 2: 'C',
 3: 'D',
 4: 'E',
 5: 'F',
 6: 'G',
 7: 'H',
 8: 'I',
 9: 'J',
 10: 'K',
 11: 'L',
 12: 'M',
 13: 'N',
 14: 'O',
 15: 'P',
 16: 'Q',
 17: 'R',
 18: 'S',
 19: 'T',
 20: 'U',
 21: 'V',
 22: 'W',
 23: 'X',
 24: 'Y',
 25: 'Z',
 26: '1',
 27: '2',
 28: '3',
 29: '4',
 30: '5',
 31: '6',
 32: '7',
 33: '8',
 34: '9',
 35: '0',
 36: '<sos>',
 37: '<eos>',
 38: '<pad>',
 39: "'"}

In [304]:
__total_num

40