In [6]:
######################################################################################
'''Copyright (c) 2004- 2024 , Prof. Radhamadhab Dalai Odisha, India
Author's email address :  rmdi115@gmail.com'''
###################################################################################
import numpy as np

# Activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Derivative of activation function
def sigmoid_derivative(x):
    return x * (1 - x)

class RNN:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        # Weight matrices
        self.U = np.random.randn(hidden_size, input_size)
        self.W = np.random.randn(hidden_size, hidden_size)
        self.V = np.random.randn(output_size, hidden_size)
        
        # Biases
        self.b_hidden = np.zeros((hidden_size, 1))
        self.b_output = np.zeros((output_size, 1))
        
    def forward(self, x):
        T = x.shape[1]
        h = np.zeros((self.hidden_size, T))
        y = np.zeros((self.output_size, T))
        
        for t in range(T):
            xt = x[:, t].reshape(-1, 1)
            if t == 0:
                h[:, t] = sigmoid(np.dot(self.U, xt) + self.b_hidden).flatten()
            else:
                h[:, t] = sigmoid(np.dot(self.U, xt) + np.dot(self.W, h[:, t-1].reshape(-1, 1)) + self.b_hidden).flatten()
            
            y[:, t] = sigmoid(np.dot(self.V, h[:, t].reshape(-1, 1)) + self.b_output).flatten()
        
        return h, y
    
    def backward(self, x, y_true, h, y_pred, lr):
        T = x.shape[1]
        dU = np.zeros_like(self.U)
        dW = np.zeros_like(self.W)
        dV = np.zeros_like(self.V)
        db_hidden = np.zeros_like(self.b_hidden)
        db_output = np.zeros_like(self.b_output)
        
        delta_hidden_next = np.zeros((self.hidden_size, 1))
        
        for t in range(T-1, -1, -1):
            yt = y_true[:, t].reshape(-1, 1)
            yp = y_pred[:, t].reshape(-1, 1)
            delta_output = (yp - yt) * sigmoid_derivative(yp)
            dV += np.dot(delta_output, h[:, t].reshape(1, -1))
            db_output += delta_output
            
            delta_hidden = np.dot(self.V.T, delta_output) * sigmoid_derivative(h[:, t].reshape(-1, 1))
            if t > 0:
                delta_hidden += np.dot(self.W.T, delta_hidden_next)
            
            xt = x[:, t].reshape(-1, 1)
            dU += np.dot(delta_hidden, xt.T)
            db_hidden += delta_hidden
            
            if t > 0:
                dW += np.dot(delta_hidden, h[:, t-1].reshape(1, -1))
            
            delta_hidden_next = delta_hidden
        
        # Update weights and biases
        self.U -= lr * dU
        self.W -= lr * dW
        self.V -= lr * dV
        self.b_hidden -= lr * db_hidden
        self.b_output -= lr * db_output

# Example usage
input_size = 100  # Assuming input matrix size is 100xT
hidden_size = 64
output_size = 100

# Create an RNN instance
rnn = RNN(input_size, hidden_size, output_size)

# Generate random input matrix (assuming it contains Sanskrit characters)
x = np.random.randn(input_size, 10)  # Assuming 10 time steps

# Forward pass
h, y_pred = rnn.forward(x)

# Backward pass (assuming y_pred is the target output)
lr = 0.01
rnn.backward(x, y_pred, h, y_pred, lr)

# Character mappings (for illustration)
sanskrit_to_odia = {
    'अ': 'ଅ', 'आ': 'ଆ', 'इ': 'ଇ', 'ई': 'ଈ', 'उ': 'ଉ', 
    'ऊ': 'ଊ', 'ऋ': 'ଋ', 'ए': 'ଏ', 'ऐ': 'ଐ', 'ओ': 'ଓ', 'औ': 'ଔ'
}

# Convert characters to integer representation
def char_to_int(char, vocab):
    return vocab[char] if char in vocab else vocab['UNK']

def int_to_char(index, inv_vocab):
    return inv_vocab[index]

# Build vocabularies
sanskrit_chars = sorted(list(sanskrit_to_odia.keys()))
odia_chars = sorted(list(set(sanskrit_to_odia.values())))

sanskrit_vocab = {char: idx for idx, char in enumerate(sanskrit_chars)}
odia_vocab = {char: idx for idx, char in enumerate(odia_chars)}

# Add a special 'UNK' token for unknown characters
sanskrit_vocab['UNK'] = len(sanskrit_vocab)
odia_vocab['UNK'] = len(odia_vocab)

inv_odia_vocab = {idx: char for char, idx in odia_vocab.items()}

# Prepare data
def prepare_data(text, mapping, src_vocab, tgt_vocab):
    input_data = []
    target_data = []
    for char in text:
        input_char = char_to_int(char, src_vocab)
        input_data.append(input_char)
        
        if char in mapping:
            target_char = mapping[char]
        else:
            target_char = 'UNK'
        target_data.append(char_to_int(target_char, tgt_vocab))
    
    return np.array(input_data).reshape(-1, 1), np.array(target_data).reshape(-1, 1)

# Sample input text in Sanskrit
sanskrit_text = "अआइईउऊऋएऐओऔ"

# Prepare training data
input_data, target_data = prepare_data(sanskrit_text, sanskrit_to_odia, sanskrit_vocab, odia_vocab)

# One-hot encoding for the input and target data
def one_hot_encode(data, vocab_size):
    one_hot = np.zeros((data.size, vocab_size))
    one_hot[np.arange(data.size), data.flatten()] = 1
    return one_hot

# Training
lr = 0.01
num_epochs = 1000

for epoch in range(num_epochs):
    # One-hot encode input and target data
    x_encoded = one_hot_encode(input_data, input_size)
    y_encoded = one_hot_encode(target_data, output_size)
    
    # Forward pass
    h, y_pred = rnn.forward(x_encoded.T)
    
    # Backward pass
    rnn.backward(x_encoded.T, y_encoded.T, h, y_pred, lr)
    
    if epoch % 100 == 0:
        loss = np.mean((y_pred - y_encoded.T) ** 2)
        print(f"Epoch {epoch}, Loss: {loss}")

# Inference
def predict(rnn, text, src_vocab, tgt_vocab, inv_tgt_vocab):
    input_data, _ = prepare_data(text, {}, src_vocab, tgt_vocab)
    x_encoded = one_hot_encode(input_data, len(src_vocab))
    
    _, y_pred = rnn.forward(x_encoded.T)
    
    y_pred_indices = np.argmax(y_pred, axis=0)
    output_text = ''.join([int_to_char(idx, inv_tgt_vocab) for idx in y_pred_indices])
    
    return output_text

# Sample input text in Sanskrit for prediction
sample_text = "अआइईउऊऋएऐओऔ"

# Predict Odia text
predicted_text = predict(rnn, sample_text, sanskrit_vocab, odia_vocab, inv_odia_vocab)
print("Predicted Odia Text:", predicted_text)


Epoch 0, Loss: 0.4681524318815702


  """


Epoch 100, Loss: 0.18758802808359074
Epoch 200, Loss: 0.17770087723250533
Epoch 300, Loss: 0.17636825992241711
Epoch 400, Loss: 0.15753630605875954
Epoch 500, Loss: 0.14626931173766103
Epoch 600, Loss: 0.13753374779123764
Epoch 700, Loss: 0.13749546064546003
Epoch 800, Loss: 0.13746613754177991
Epoch 900, Loss: 0.13742716125599438


ValueError: shapes (64,100) and (12,1) not aligned: 100 (dim 1) != 12 (dim 0)

In [8]:
import numpy as np

# Activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Derivative of activation function
def sigmoid_derivative(x):
    return x * (1 - x)

class RNN:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        # Weight matrices
        self.U = np.random.randn(hidden_size, input_size)
        self.W = np.random.randn(hidden_size, hidden_size)
        self.V = np.random.randn(output_size, hidden_size)
        
        # Biases
        self.b_hidden = np.zeros((hidden_size, 1))
        self.b_output = np.zeros((output_size, 1))
        
    def forward(self, x):
        T = x.shape[1]
        h = np.zeros((self.hidden_size, T))
        y = np.zeros((self.output_size, T))
        
        for t in range(T):
            xt = x[:, t].reshape(-1, 1)
            if t == 0:
                h[:, t] = sigmoid(np.dot(self.U, xt) + self.b_hidden).flatten()
            else:
                h[:, t] = sigmoid(np.dot(self.U, xt) + np.dot(self.W, h[:, t-1].reshape(-1, 1)) + self.b_hidden).flatten()
            
            y[:, t] = sigmoid(np.dot(self.V, h[:, t].reshape(-1, 1)) + self.b_output).flatten()
        
        return h, y
    
    def backward(self, x, y_true, y_pred, h, lr):
        T = x.shape[1]
        dU = np.zeros_like(self.U)
        dW = np.zeros_like(self.W)
        dV = np.zeros_like(self.V)
        db_hidden = np.zeros_like(self.b_hidden)
        db_output = np.zeros_like(self.b_output)
        
        delta_hidden_next = np.zeros((self.hidden_size, 1))
        
        for t in range(T-1, -1, -1):
            yt = y_true[:, t].reshape(-1, 1)
            yp = y_pred[:, t].reshape(-1, 1)
            delta_output = (yp - yt) * sigmoid_derivative(yp)
            dV += np.dot(delta_output, h[:, t].reshape(1, -1))
            db_output += delta_output
            
            delta_hidden = np.dot(self.V.T, delta_output) * sigmoid_derivative(h[:, t].reshape(-1, 1))
            if t > 0:
                delta_hidden += np.dot(self.W.T, delta_hidden_next)
            
            xt = x[:, t].reshape(-1, 1)
            dU += np.dot(delta_hidden, xt.T)
            db_hidden += delta_hidden
            
            if t > 0:
                dW += np.dot(delta_hidden, h[:, t-1].reshape(1, -1))
            
            delta_hidden_next = delta_hidden
        
        # Update weights and biases
        self.U -= lr * dU
        self.W -= lr * dW
        self.V -= lr * dV
        self.b_hidden -= lr * db_hidden
        self.b_output -= lr * db_output

# Example usage
input_size = 12  # Assuming 12 unique Sanskrit characters
hidden_size = 64
output_size = 12  # Assuming 12 unique Odia characters

# Create an RNN instance
rnn = RNN(input_size, hidden_size, output_size)

# Character mappings (for illustration)
sanskrit_to_odia = {
    'अ': 'ଅ', 'आ': 'ଆ', 'इ': 'ଇ', 'ई': 'ଈ', 'उ': 'ଉ', 
    'ऊ': 'ଊ', 'ऋ': 'ଋ', 'ए': 'ଏ', 'ऐ': 'ଐ', 'ओ': 'ଓ', 'औ': 'ଔ'
}

# Convert characters to integer representation
def char_to_int(char, vocab):
    return vocab[char] if char in vocab else vocab['UNK']

def int_to_char(index, inv_vocab):
    return inv_vocab[index]

# Build vocabularies
sanskrit_chars = sorted(list(sanskrit_to_odia.keys()))
odia_chars = sorted(list(set(sanskrit_to_odia.values())))

sanskrit_vocab = {char: idx for idx, char in enumerate(sanskrit_chars)}
odia_vocab = {char: idx for idx, char in enumerate(odia_chars)}

# Add a special 'UNK' token for unknown characters
sanskrit_vocab['UNK'] = len(sanskrit_vocab)
odia_vocab['UNK'] = len(odia_vocab)

inv_odia_vocab = {idx: char for char, idx in odia_vocab.items()}

# Prepare data
def prepare_data(text, mapping, src_vocab, tgt_vocab):
    input_data = []
    target_data = []
    for char in text:
        input_char = char_to_int(char, src_vocab)
        input_data.append(input_char)
        
        if char in mapping:
            target_char = mapping[char]
        else:
            target_char = 'UNK'
        target_data.append(char_to_int(target_char, tgt_vocab))
    
    return np.array(input_data).reshape(-1, 1), np.array(target_data).reshape(-1, 1)

# Sample input text in Sanskrit
sanskrit_text = "अआइईउऊऋएऐओऔ"

# Prepare training data
input_data, target_data = prepare_data(sanskrit_text, sanskrit_to_odia, sanskrit_vocab, odia_vocab)

# One-hot encoding for the input and target data
def one_hot_encode(data, vocab_size):
    one_hot = np.zeros((data.size, vocab_size))
    one_hot[np.arange(data.size), data.flatten()] = 1
    return one_hot

# Training
lr = 0.01
num_epochs = 1000

for epoch in range(num_epochs):
    # One-hot encode input and target data
    x_encoded = one_hot_encode(input_data, input_size)
    y_encoded = one_hot_encode(target_data, output_size)
    
    # Forward pass
    h, y_pred = rnn.forward(x_encoded.T)
    
    # Backward pass
    rnn.backward(x_encoded.T, y_encoded.T, y_pred, h, lr)
    
    if epoch % 100 == 0:
        loss = np.mean((y_pred - y_encoded.T) ** 2)
        print(f"Epoch {epoch}, Loss: {loss}")

# Inference
def predict(rnn, text, src_vocab, tgt_vocab, inv_tgt_vocab):
    input_data, _ = prepare_data(text, {}, src_vocab, tgt_vocab)
    x_encoded = one_hot_encode(input_data, len(src_vocab))
    
    _, y_pred = rnn.forward(x_encoded.T)
    
    y_pred_indices = np.argmax(y_pred, axis=0)
    output_text = ''.join([int_to_char(idx, inv_tgt_vocab) for idx in y_pred_indices])
    
    return output_text

# Sample input text in Sanskrit for prediction
sample_text = "अआइईउऊऋएऐओऔ"

# Predict Odia text
predicted_text = predict(rnn, sample_text, sanskrit_vocab, odia_vocab, inv_odia_vocab)
print("Predicted Odia Text:", predicted_text)


Epoch 0, Loss: 0.6088008579235998


  """


Epoch 100, Loss: 0.3516792618456343
Epoch 200, Loss: 0.28307130440126715
Epoch 300, Loss: 0.21461898024752077
Epoch 400, Loss: 0.21455239094185952
Epoch 500, Loss: 0.21449701802055263
Epoch 600, Loss: 0.21444115099812724
Epoch 700, Loss: 0.21438266757522778
Epoch 800, Loss: 0.21431990304529225
Epoch 900, Loss: 0.21425115980838186
Predicted Odia Text: ଋଋଏଋଋଋଋଋଋଋଋ


In [10]:
import numpy as np

# Activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Derivative of activation function
def sigmoid_derivative(x):
    return x * (1 - x)

class RNN:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        # Weight matrices
        self.U = np.random.randn(hidden_size, input_size)
        self.W = np.random.randn(hidden_size, hidden_size)
        self.V = np.random.randn(output_size, hidden_size)
        
        # Biases
        self.b_hidden = np.zeros((hidden_size, 1))
        self.b_output = np.zeros((output_size, 1))
        
    def forward(self, x):
        T = x.shape[1]
        h = np.zeros((self.hidden_size, T))
        y = np.zeros((self.output_size, T))
        
        for t in range(T):
            xt = x[:, t].reshape(-1, 1)
            if t == 0:
                h[:, t] = sigmoid(np.dot(self.U, xt) + self.b_hidden).flatten()
            else:
                h[:, t] = sigmoid(np.dot(self.U, xt) + np.dot(self.W, h[:, t-1].reshape(-1, 1)) + self.b_hidden).flatten()
            
            y[:, t] = sigmoid(np.dot(self.V, h[:, t].reshape(-1, 1)) + self.b_output).flatten()
        
        return h, y
    
    def backward(self, x, y_true, y_pred, h, lr):
        T = x.shape[1]
        dU = np.zeros_like(self.U)
        dW = np.zeros_like(self.W)
        dV = np.zeros_like(self.V)
        db_hidden = np.zeros_like(self.b_hidden)
        db_output = np.zeros_like(self.b_output)
        
        delta_hidden_next = np.zeros((self.hidden_size, 1))
        
        for t in range(T-1, -1, -1):
            yt = y_true[:, t].reshape(-1, 1)
            yp = y_pred[:, t].reshape(-1, 1)
            delta_output = (yp - yt) * sigmoid_derivative(yp)
            dV += np.dot(delta_output, h[:, t].reshape(1, -1))
            db_output += delta_output
            
            delta_hidden = np.dot(self.V.T, delta_output) * sigmoid_derivative(h[:, t].reshape(-1, 1))
            if t > 0:
                delta_hidden += np.dot(self.W.T, delta_hidden_next)
            
            xt = x[:, t].reshape(-1, 1)
            dU += np.dot(delta_hidden, xt.T)
            db_hidden += delta_hidden
            
            if t > 0:
                dW += np.dot(delta_hidden, h[:, t-1].reshape(1, -1))
            
            delta_hidden_next = delta_hidden
        
        # Update weights and biases
        self.U -= lr * dU
        self.W -= lr * dW
        self.V -= lr * dV
        self.b_hidden -= lr * db_hidden
        self.b_output -= lr * db_output

# Example usage
input_size = 12  # Assuming 12 unique Sanskrit characters
hidden_size = 64
output_size = 12  # Assuming 12 unique Odia characters

# Create an RNN instance
rnn = RNN(input_size, hidden_size, output_size)

# Character mappings (for illustration)
sanskrit_to_odia = {
    'अ': 'ଅ', 'आ': 'ଆ', 'इ': 'ଇ', 'ई': 'ଈ', 'उ': 'ଉ', 
    'ऊ': 'ଊ', 'ऋ': 'ଋ', 'ए': 'ଏ', 'ऐ': 'ଐ', 'ओ': 'ଓ', 'औ': 'ଔ'
}

# Convert characters to integer representation
def char_to_int(char, vocab):
    return vocab[char] if char in vocab else vocab['UNK']

def int_to_char(index, inv_vocab):
    return inv_vocab[index]

# Build vocabularies
sanskrit_chars = sorted(list(sanskrit_to_odia.keys()))
odia_chars = sorted(list(set(sanskrit_to_odia.values())))

sanskrit_vocab = {char: idx for idx, char in enumerate(sanskrit_chars)}
odia_vocab = {char: idx for idx, char in enumerate(odia_chars)}

# Add a special 'UNK' token for unknown characters
sanskrit_vocab['UNK'] = len(sanskrit_vocab)
odia_vocab['UNK'] = len(odia_vocab)

inv_odia_vocab = {idx: char for char, idx in odia_vocab.items()}

# Prepare data
def prepare_data(text, mapping, src_vocab, tgt_vocab):
    input_data = []
    target_data = []
    for char in text:
        input_char = char_to_int(char, src_vocab)
        input_data.append(input_char)
        
        if char in mapping:
            target_char = mapping[char]
        else:
            target_char = 'UNK'
        target_data.append(char_to_int(target_char, tgt_vocab))
    
    return np.array(input_data).reshape(-1, 1), np.array(target_data).reshape(-1, 1)

# Sample input text in Sanskrit
sanskrit_text = "अआइईउऊऋएऐओऔ"

# Prepare training data
input_data, target_data = prepare_data(sanskrit_text, sanskrit_to_odia, sanskrit_vocab, odia_vocab)

# One-hot encoding for the input and target data
def one_hot_encode(data, vocab_size):
    one_hot = np.zeros((data.size, vocab_size))
    one_hot[np.arange(data.size), data.flatten()] = 1
    return one_hot

# Training
lr = 0.01
num_epochs = 10000

for epoch in range(num_epochs):
    # One-hot encode input and target data
    x_encoded = one_hot_encode(input_data, input_size)
    y_encoded = one_hot_encode(target_data, output_size)
    
    # Forward pass
    h, y_pred = rnn.forward(x_encoded.T)
    
    # Backward pass
    rnn.backward(x_encoded.T, y_encoded.T, y_pred, h, lr)
    
    if epoch % 100 == 0:
        loss = np.mean((y_pred - y_encoded.T) ** 2)
        print(f"Epoch {epoch}, Loss: {loss}")

# Inference
def predict(rnn, text, src_vocab, tgt_vocab, inv_tgt_vocab):
    input_data, _ = prepare_data(text, {}, src_vocab, tgt_vocab)
    x_encoded = one_hot_encode(input_data, len(src_vocab))
    
    _, y_pred = rnn.forward(x_encoded.T)
    
    y_pred_indices = np.argmax(y_pred, axis=0)
    output_text = ''.join([int_to_char(idx, inv_tgt_vocab) for idx in y_pred_indices])
    
    return output_text

# Sample input text in Sanskrit for prediction
sample_text = "अआइईउऊऋएऐओऔ"

# Predict Odia text
predicted_text = predict(rnn, sample_text, sanskrit_vocab, odia_vocab, inv_odia_vocab)
print("Predicted Odia Text:", predicted_text)


Epoch 0, Loss: 0.35199852570175844
Epoch 100, Loss: 0.1477246497675128


  """


Epoch 200, Loss: 0.14748312401594962
Epoch 300, Loss: 0.1474274050494749
Epoch 400, Loss: 0.14739803739380355
Epoch 500, Loss: 0.14737796245611307
Epoch 600, Loss: 0.14736223148259667
Epoch 700, Loss: 0.14734887960350132
Epoch 800, Loss: 0.147336975393266
Epoch 900, Loss: 0.14732602047762164
Epoch 1000, Loss: 0.14731572275999827
Epoch 1100, Loss: 0.14730589791008714
Epoch 1200, Loss: 0.14729642189946487
Epoch 1300, Loss: 0.14728720620188854
Epoch 1400, Loss: 0.14727818394086534
Epoch 1500, Loss: 0.14726930168568542
Epoch 1600, Loss: 0.1472605143145074
Epoch 1700, Loss: 0.1472517816039789
Epoch 1800, Loss: 0.1472430658068355
Epoch 1900, Loss: 0.147234329784445
Epoch 2000, Loss: 0.14722553542007946
Epoch 2100, Loss: 0.14721664211956337
Epoch 2200, Loss: 0.14720760524111828
Epoch 2300, Loss: 0.14719837429989374
Epoch 2400, Loss: 0.147188890768692
Epoch 2500, Loss: 0.1471790852406456
Epoch 2600, Loss: 0.14716887361908013
Epoch 2700, Loss: 0.14715815182797043
Epoch 2800, Loss: 0.14714678824

In [None]:
#Try to improve further