In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
! ls "/content/gdrive/My Drive/Colab Notebooks/Generate Music/char-rnn-keras-master.zip (Unzipped Files)/char-rnn-keras-master/"

data   model.py   __pycache__  Solution.ipynb	Visualization.ipynb
logs   model.pyc  README.md    train.py
model  output	  sample.py    Untitled0.ipynb


In [0]:
import warnings
warnings.filterwarnings('ignore')

import os
import json
import numpy as np

from keras.models import Sequential, model_from_json, Model
from keras.layers import LSTM, Dropout, Dense, Activation, Embedding, TimeDistributed, Input

Using TensorFlow backend.


In [0]:
DIR = "/content/gdrive/My Drive/Colab Notebooks/Generate Music/char-rnn-keras-master.zip (Unzipped Files)/char-rnn-keras-master/"
DATA_DIR = os.path.join(DIR, 'data')
INPUT_FILE = os.path.join(DATA_DIR, 'Christmas_input.txt')
MODEL_DIR = os.path.join(DIR, 'model')

print(DATA_DIR, "\n", INPUT_FILE, "\n", MODEL_DIR)

BATCHES = 16
SEQ_LENGTH = 128

/content/gdrive/My Drive/Colab Notebooks/Generate Music/char-rnn-keras-master.zip (Unzipped Files)/char-rnn-keras-master/data 
 /content/gdrive/My Drive/Colab Notebooks/Generate Music/char-rnn-keras-master.zip (Unzipped Files)/char-rnn-keras-master/data/Christmas_input.txt 
 /content/gdrive/My Drive/Colab Notebooks/Generate Music/char-rnn-keras-master.zip (Unzipped Files)/char-rnn-keras-master/model


## Logic: 
> Divide the entire data into chunks (batches), then at each iteration generate a batch of data such that for every batch select the sequence_length # of chars.

__Corpus:__ "Quick Brown Fox jumped over the lazy dog"<br>
length = 40; batches = 4; sequence_length = 5; batch_chars = 10;<br>

<font color=green>__Batch Chars:__</font><br>
Quick Brow<br>
n Fox jump<br>
ed over th<br>
e lazy dog<br>

<font color=green>__Batch_Seq-1 data:__</font><br>
Quick<br>
n Fox<br>
ed ov<br>
e laz<br>

<font color=green>__Batch_Seq-2 data:__</font><br>
 Brow<br>
 jump<br>
er th<br>
y dog<br>

In [0]:
def read_batches(data, vocab_size):
    '''Generator Function to generate data in batches.'''
    data_len = len(data)    # This is the Total # of characters in the corpus
    batch_chars = int(data_len / BATCHES)    # This denotes the max # of characters present in each batch
    
    # Sequence_length denotes the # of chars to be fed to the ML model (Input --> # rows = batches, #cols = sequence_length)
    # Following loop will run for all the chars in the corpus with a step of sequence_length
    for start in range(0, batch_chars - SEQ_LENGTH, SEQ_LENGTH):
        X = np.zeros((BATCHES, SEQ_LENGTH))    # This will initialize a np_array with zeros of size [16x64]
        Y = np.zeros((BATCHES, SEQ_LENGTH, vocab_size))    # This will initialize a np_array with zeros of size [16x64x86]
        # Essentially Y holds the values of the next sequence character in one-hot encoded form in the Corpus (Target Variable)
        
        # Following nested loop will run thru all the batches for every char in each sequence
        for batch in range(BATCHES):
            for i in range(SEQ_LENGTH):
                X[batch, i] = data[batch * batch_chars + start + i]
#                 try:
                Y[batch, i, data[batch * batch_chars + start + i + 1]] = 1
#                 except:
#                     Y[batch, i, data[0]] = 1    # For the last character, the Target will point to the 1st character

        yield (X, Y)

In [0]:
def get_model(batch_size, seq_len, vocab_size):
#     model = Sequential()
    input1 = Input(batch_shape=(batch_size, seq_len))
    embeddings = Embedding(vocab_size, 512)(input1)
    
    lstm_out, state_h, state_c = LSTM(512, return_sequences=True, return_state=True)(embeddings)
    lstm_out, state_h, state_c = LSTM(512, return_sequences=True, return_state=True)(lstm_out)
    lstm_out = LSTM(512, return_sequences=True, activation='tanh')(lstm_out)
    lstm_out = Dropout(0.5)(lstm_out)
    
    lstm_out, state_h, state_c = LSTM(512, return_sequences=True, return_state=True)(lstm_out)
    lstm_out, state_h, state_c = LSTM(512, return_sequences=True, return_state=True)(lstm_out)
    lstm_out = LSTM(512, return_sequences=True, activation='tanh')(lstm_out)
    lstm_out = Dropout(0.5)(lstm_out)
    
    lstm_out, state_h, state_c = LSTM(512, return_sequences=True, return_state=True)(lstm_out)
    lstm_out, state_h, state_c = LSTM(512, return_sequences=True, return_state=True)(lstm_out)
    lstm_out = LSTM(512, return_sequences=True, activation='tanh')(lstm_out)
    lstm_out = Dropout(0.5)(lstm_out)
    
    output1 = TimeDistributed(Dense(vocab_size, activation='softmax'))(lstm_out)
    model = Model(inputs=input1, outputs=output1)
    
#     model.add(Embedding(vocab_size, 512, batch_input_shape=(batch_size, seq_len)))
    
#     for i in range(3):
#         model.add(LSTM(256, return_sequences=(i != 2), return_state=True))
#         model.add(LSTM(256, return_sequences=True, return_state=False))
#         model.add(Dropout(0.3))
#     model.add(LSTM(256, return_sequences=True, return_state=True))
#     model.add(Dropout(0.3))
#     model.add(LSTM(256, return_sequences=True, return_state=True))
#     model.add(Dropout(0.3))
#     model.add(LSTM(256))
    
#     model.add(TimeDistributed(Dense(vocab_size, activation='softmax')))
    return model

In [0]:
def save_weights(epoch, model):
    if not os.path.exists(MODEL_DIR):
        os.makedirs(MODEL_DIR)
    model.save_weights(os.path.join(MODEL_DIR, f'weights.{epoch}.h5'))
    print(f"Saved checkpoint to 'weights.{epoch}.h5'")

In [0]:
%%time
def train_data(text, epochs=2000, save_freq=50):
    char_to_idx = { ch: i for (i, ch) in enumerate(sorted(set(text))) }
    
    with open(os.path.join(DATA_DIR, 'char_to_idx.json'), 'w') as file:
        json.dump(char_to_idx, file)
    
    vocab_size = len(char_to_idx)
    print(f"Number of unique characters: {vocab_size}")

    indexed_data = np.array([char_to_idx[i] for i in text], dtype=np.int32)
    print(f"Total Characters in File-'{INPUT_FILE}' ==> {indexed_data.size}")

    with open('progress.csv', 'w') as file:
        file.write("epoch,loss,accuracy\n")

    model = get_model(BATCHES, SEQ_LENGTH, vocab_size)
    print(model.summary())
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    if os.path.exists(os.path.join(MODEL_DIR, 'weights.final.h5')):
        model.load_weights(os.path.join(MODEL_DIR, 'weights.final.h5'))

    for epoch in range(1, epochs+1):
        losses, accs = [], []
        for i, (x, y) in enumerate(read_batches(indexed_data, vocab_size), start=1):
            loss, acc = model.train_on_batch(x, y)
#             print("I AM HERE ===>", i)
            if i % 10 == 0:
                print(f"Epoch: {epoch} =====> Batch: {i}, Loss: {loss}, Accuracy: {acc}")
            losses.append(loss)
            accs.append(acc)
            
        if epoch % 10 == 0:
                print(f"Epoch: {epoch} =====> Batch: {i}, Loss: {loss}, Accuracy: {acc}")

        with open('progress.csv', 'a') as file:
            file.write(f"{epoch},{np.average(losses)},{np.average(accs)}\n")
        
        if epoch % save_freq == 0:
            save_weights(epoch, model)
    
    with open(os.path.join(MODEL_DIR, 'model.json'), 'w') as file:
        file.write(model.to_json())
    save_weights("final", model)

text = open(os.path.join(DATA_DIR, INPUT_FILE)).read()
train_data(text)

Number of unique characters: 73
Total Characters in File-'/content/gdrive/My Drive/Colab Notebooks/Generate Music/char-rnn-keras-master.zip (Unzipped Files)/char-rnn-keras-master/data/Christmas_input.txt' ==> 3839
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_23 (InputLayer)        (16, 128)                 0         
_________________________________________________________________
embedding_23 (Embedding)     (16, 128, 512)            37376     
_________________________________________________________________
lstm_199 (LSTM)              [(16, 128, 512), (16, 512 2099200   
_________________________________________________________________
lstm_200 (LSTM)              [(16, 128, 512), (16, 512 2099200   
_________________________________________________________________
lstm_201 (LSTM)              (16, 128, 512)            2099200   
____________________________________________________________

In [0]:
# def build_generating 

def generate_data(epoch='final', header='', max_chars=2048):
    
    with open(os.path.join(DATA_DIR, 'char_to_idx.json')) as f:
        char_to_idx = json.load(f)
    vocab_size = len(char_to_idx)
    
    idx_to_char = { i: ch for (ch, i) in char_to_idx.items() }
    indexed_data = [char_to_idx[i] for i in header]
    
#     model = open(os.path.join(MODEL_DIR, 'model.json'), 'r').read()
#     model = model_from_json(model)
    model = get_model(1, 1, vocab_size)
    model.load_weights(os.path.join(MODEL_DIR, f'weights.{epoch}.h5'))
    
    for i in range(max_chars):
        batch = np.zeros((1, 1))
        batch[0, 0] = np.random.randint(vocab_size)
        
        result = model.predict_on_batch(batch).ravel()
        indexed_data.append(np.random.choice(range(vocab_size), p=result))
    
    return ''.join(idx_to_char[i] for i in indexed_data)

print(generate_data())

XA7G"m "g"XmmmF"| m" 
XAa ":  "m"G X"ABm  "D"A "" ASn |  n" gmA"mmm"nnCA"" |G XA%N" G   mAD" m" n""|X" |""m"| "NCXgXNAX mG"" "mX """NgGm"NXXX
|A" ""%n""BG" 
N"X m"N:|M|N  mi
nX"""
"ng NDm o 
N"|   "AX:A"Ao "X||mN"NNg"gNmmX" m"N   m"mG"g  | |NXNg""|"
mNAm ngA n  Bgm"An"mmAGmgNC " A """mm "CGG 
"gM "mNnDm"mmgg"" " m
:XAA6X
mN g""
m XE
"n Cn"G"gm"mh n mG""|| n|"G"Gmg" ||  n""""m" "gm MG ""m"|A Gm gNtm""gXA:A "m"m n nW m
mAt"""AA"""NN"|Gm e:Ammnm" Gg"N"GD" "
X|mAm "mA mA |"imG"mNm"N"A"nA n X"XmAm Am  |"""X X N"gmm "GG
 "  AX
m  g "" " | Mm  g"|gAm  tm"NDmAG m "" |mmG"G A g"G "ngGuN mngg
C"m"m"m ""m""mDA"

"noA Gm/ mgm m"  N:
" """"Nm|Ang"A
gND :"Gm|n """m"G|"""aX"g"SNDX n" " m|"g GAm|m|"| gm ngDNA """ m | n1" "  m| X AmG""|n  "|GAl ""n 
 Am" |  mGT mmGB| n:mg nmGm" gm    D"| ""nGA"X"|"m    "mG"D:\m|Nmmn"X"mXXmgA"n" D"mNm"m " Xg  ""Ag nGNmm 
m m""" """ XA :F Xg n NA"ng G ""mA A
"NnnAN FM|"mn" A"A"   X  N|AAMAmX"""m
XmXAX"
 nN: ngNtm "nmX"XM|oGg AXnA mnNNA g" m|
""  Am nXn A"Xgm"XDX"AAm"mnmG

In [0]:
# np.array([0.1, 0.2, 0.3]).reshape((1,3,1))