In [14]:
import os
import keras
from keras import layers as ls
from keras import models as md
import numpy as np
from google.colab import files

# **Training Model on data**

## **Create Model architecture**

In [2]:
def trainingModel(uniques,batchSize,seqSize):
    model = md.Sequential()
    model.add(ls.Embedding(uniques, 512, batch_input_shape=(batchSize, seqSize)))

    model.add(ls.LSTM(256, return_sequences=True, stateful=True))
    model.add(ls.Dropout(0.2))
    model.add(ls.LSTM(256, return_sequences=True, stateful=True))
    model.add(ls.Dropout(0.2))
    model.add(ls.LSTM(256, return_sequences=True, stateful=True))
    model.add(ls.Dropout(0.2))
    
    model.add(ls.TimeDistributed(ls.Dense(uniques))) 
    model.add(ls.Activation('softmax'))
    return model

## **Model Checkpoint Generator**

In [3]:
def save(cp, model):
    model.save_weights(f'{cp}.h5')

## **Batch Division and Training Function with SaveBestModel Implemented**

In [4]:
def batcheRead(T, uniques,batchSize,seqSize):
    length = T.shape[0];
    bChr = int(length / batchSize);

    for start in range(0, bChr - seqSize, seqSize):
        X = np.zeros((batchSize, seqSize))
        Y = np.zeros((batchSize, seqSize, uniques))
        for bIdx in range(0, batchSize): 
            for i in range(0, seqSize): 
                X[bIdx, i] = T[bChr * bIdx + start + i]  
                Y[bIdx, i, T[bChr * bIdx + start + i + 1]] = 1
        yield X, Y

def train(model,epochs,cp,charIndex,data,batchSize,seqSize):
    model.compile(optimizer='adam',
                loss='categorical_crossentropy',
                metrics=['accuracy'])
    trainData = np.asarray([charIndex[c] for c in data],dtype=np.int32)
    steps_per_epoch=int((len(trainData)/batchSize-1)/seqSize)
    epochNum, lossNum, accNum = [], [float('inf')], [float('-inf')]

    for epoch in range(epochs):
        epochNum.append(epoch+1)
        losses, accs = [], []

        for i, (X, Y) in enumerate(batcheRead(trainData, len(charIndex),batchSize,seqSize)):
            loss, acc = model.train_on_batch(X, Y)
            losses.append(loss)
            accs.append(acc)
        
        print(f'Epoch {epoch+1}/{epochs}: loss = {loss}, acc = {acc}')

        if  accNum[-1]<acc and loss<lossNum[-1]:
            save(cp, model)
            print(f'Saved to checkpoint {cp}.h5 accuracy increased from {accNum[-1]} to {acc}')
        lossNum.append(loss)
        accNum.append(acc)
    return {"loss":lossNum,"accuracy":accNum}

# **Generative Model Based on Previous model Training**

## **Model Architecture**

In [5]:
def createSeqModel(ch):
    model = md.Sequential()
    
    model.add(ls.Embedding(input_dim = ch, output_dim = 512, batch_input_shape = (1, 1))) 
  
    model.add(ls.LSTM(256, return_sequences = True, stateful = True))
    model.add(ls.Dropout(0.2))
    
    model.add(ls.LSTM(256, return_sequences = True, stateful = True))
    model.add(ls.Dropout(0.2))
    
    model.add(ls.LSTM(256, stateful = True)) 
    model.add(ls.Dropout(0.2))
    
    model.add((ls.Dense(ch)))
    model.add(ls.Activation("softmax"))
    
    return model

## **Generator Function**

In [6]:
def generateSeq(seqSize, modelName,indexChar):
    ch = len(indexChar)
    model = createSeqModel(ch)
    model.load_weights(f'{modelName}.h5')
     
    ind = [0]
    
    for _ in range(seqSize):
        batch = np.zeros((1, 1))
        batch[0, 0] = ind[-1]
        
        predictedProbs = model.predict_on_batch(batch).ravel()
        sample = np.random.choice(range(ch), size = 1, p = predictedProbs)
        
        ind.append(sample[0])
    
    seq = ''.join(indexChar[c] for c in ind)
    
    cnt = 0
    for i in seq:
        cnt += 1
        if i == "\n":
            break
    seq = seq[cnt:]
    cnt = 0
    for i in seq:
        cnt += 1
        if i == "\n" and seq[cnt] == "\n":
            break
    seq = seq[:cnt]
    return seq

In [7]:
if not os.path.exists(r'/content/data'):
    os.makedirs(r'/content/data')
%cd /content/data
files.upload()
file=open(r'/content/data/input.txt','r')
data=file.read()
file.close()
charIndex = {ch: i for (i, ch) in enumerate(sorted(list(set(data))))}
indexChar = {v:k for (k,v) in charIndex.items()}
%cd /content

/content/data


Saving input.txt to input.txt
/content


In [8]:
BATCH_SIZE=16
SEQ_SIZE=64
model=trainingModel(uniques=len(charIndex),batchSize=BATCH_SIZE,seqSize=SEQ_SIZE)
history = train(model=model,epochs=50,cp='modelV1',charIndex=charIndex,data=data,batchSize=BATCH_SIZE,seqSize=SEQ_SIZE)

Epoch 1/50: loss = 2.644301652908325, acc = 0.30078125
Saved to checkpoint modelV1.h5 accuracy increased from -inf to 0.30078125
Epoch 2/50: loss = 1.1079487800598145, acc = 0.6533203125
Saved to checkpoint modelV1.h5 accuracy increased from 0.30078125 to 0.6533203125
Epoch 3/50: loss = 0.6866281032562256, acc = 0.7666015625
Saved to checkpoint modelV1.h5 accuracy increased from 0.6533203125 to 0.7666015625
Epoch 4/50: loss = 0.5945200324058533, acc = 0.791015625
Saved to checkpoint modelV1.h5 accuracy increased from 0.7666015625 to 0.791015625
Epoch 5/50: loss = 0.5301041007041931, acc = 0.8115234375
Saved to checkpoint modelV1.h5 accuracy increased from 0.791015625 to 0.8115234375
Epoch 6/50: loss = 0.48777133226394653, acc = 0.8212890625
Saved to checkpoint modelV1.h5 accuracy increased from 0.8115234375 to 0.8212890625
Epoch 7/50: loss = 0.4526677131652832, acc = 0.837890625
Saved to checkpoint modelV1.h5 accuracy increased from 0.8212890625 to 0.837890625
Epoch 8/50: loss = 0.4440

In [12]:
l=[]
while len(l)<=65: l+=generateSeq(65*9,"modelV1",indexChar).split(' | ')[1:]
print(" | ".join(l))

Na | Ti [0.25] | Soft Ra | Ki | Ta | Tin [1.00] | Na [0.50] | Na [0.25] | Na | Kat [1.00]+ | Na | Dha [0.50] | Ge | Ti [0.25] | Soft Ra | Ki | Ta | Dhin [0.50]+ | Na | Dhin1+ | Dha | Ti [0.25] | Ra | Ki | Ta | Dha [0.50]+ | Ti | Ge | Na | Dha | Tin | Dha | Ge | Tin | Na | Ge | Na
 | Na | Dha [0.50] | Ge | Dha | Ti [0.25] | Soft Ra | Ki | Ta | Dhin [1.00] | Dhin | Dha [0.50]+ | Ge | Ti [0.25] | fa | Ki | Ti | Dhin [1.00] | Na | Dha [0.20]++ | Ge | Ti | Ta | Na | Ge | Ti | Te | Na | Ge | Ti | Ta | Ge+ | Ti | Te | Dha | Dha | Ti | Te | Dha | Dha | Ti | Te | Dha | Dha1 | Dha1+ | Dhin1 | Dha [0.50]+ | Ge | Dhin1 | Na

