# Assignment - 2 on BPTT

### Importing Libraries

In [8]:
import json
import numpy as np
from sklearn.metrics import classification_report, accuracy_score, precision_recall_fscore_support
from tabulate import tabulate
from sklearn.model_selection import KFold

### Loading Dataset

In [9]:
np.random.seed(1)

# Load data from JSONL file
def load_data(file_path):
    data = []
    with open(file_path, 'r') as file:
        for line in file:
            example = json.loads(line)
            data.append(example)
    return data

# Load training and test data
train_data = load_data('/content/train.jsonl')

# Print a few rows of the training data
print("Train data")
for i in range(5):
    print(train_data[i])

# Load training and test data
test_data = load_data('/content/test.jsonl')

# Print a few rows of the test data
print("Test data")
for i in range(5):
    print(test_data[i])

Train data
{'tokens': ['EU', 'rejects', 'German', 'call', 'to', 'boycott', 'British', 'lamb', '.'], 'chunk_tags': [1, 1, 1, 0, 1, 1, 1, 0, 1], 'pos_tags': [1, 4, 3, 1, 4, 4, 3, 1, 4]}
{'tokens': ['Peter', 'Blackburn'], 'chunk_tags': [1, 0], 'pos_tags': [1, 1]}
{'tokens': ['BRUSSELS', '1996-08-22'], 'chunk_tags': [1, 0], 'pos_tags': [1, 4]}
{'tokens': ['The', 'European', 'Commission', 'said', 'on', 'Thursday', 'it', 'disagreed', 'with', 'German', 'advice', 'to', 'consumers', 'to', 'shun', 'British', 'lamb', 'until', 'scientists', 'determine', 'whether', 'mad', 'cow', 'disease', 'can', 'be', 'transmitted', 'to', 'sheep', '.'], 'chunk_tags': [1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1], 'pos_tags': [2, 1, 1, 4, 4, 1, 4, 4, 4, 3, 1, 4, 1, 4, 4, 3, 1, 4, 1, 4, 4, 3, 1, 1, 4, 4, 4, 4, 1, 4]}
{'tokens': ['Germany', "'s", 'representative', 'to', 'the', 'European', 'Union', "'s", 'veterinary', 'committee', 'Werner', 'Zwingmann', 'said', 'on', 'Wedne

### Preprocessing

In [10]:
# Function to one-hot encode POS tags with size 5
def one_hot_encode_prev(pos_tag):
    one_hot = [0, 0, 0, 0, 0]
    one_hot[pos_tag] = 1
    return one_hot

# Function to one-hot encode POS tags with size 4
def one_hot_encode_curr(pos_tag):
    one_hot = [0, 0, 0, 0]
    one_hot[pos_tag - 1] = 1
    return one_hot

# Function to preprocess the data
def preprocess_data(data):
    x = []
    y = []
    for sample in data:
        tokens = sample['tokens']
        pos_tags = sample['pos_tags']
        chunk_tags = sample['chunk_tags']

        input_seq = []
        target_seq = []

        for i in range(len(tokens)):
            # One-hot encode previous and current words
            prev_word = one_hot_encode_prev(pos_tags[i - 1]) if i > 0 else [1, 0, 0, 0, 0]
            current_word = one_hot_encode_curr(pos_tags[i])

            # Concatenate previous and current word one-hot encodings
            input_seq.append(prev_word + current_word)

            # Append chunk tag to target sequence
            target_seq.append(chunk_tags[i])

        x.append(input_seq)
        y.append(target_seq)

    return x, y

### Single Recurrent Perceptron

In [11]:
class SingleRecurrentPerceptron:
    def __init__(self, input_size=9, learning_rate=0.0005):
        self.input_size = input_size
        self.learning_rate = learning_rate

        # Initialize weights and biases
        self.W_in = np.random.randn(1, input_size)
        self.W_rec = np.random.randn(1, 1)
        self.b = np.zeros((1, 1))

    def forward(self, x):
        self.prev_hidden = 0  # Initialize previous hidden state
        self.hidden_states = []  # Store hidden states
        self.hidden_states.append(0)
        self.outputs = []  # Store outputs

        # Forward pass through the SRP
        for t in range(len(x)):
            net_input = np.dot(self.W_in, x[t]) + np.dot(self.W_rec, self.prev_hidden) + self.b
            hidden_state = self.sigmoid(net_input)
            self.hidden_states.append(hidden_state)
            self.outputs.append(hidden_state)
            self.prev_hidden = hidden_state
        return self.outputs

    def backward(self, x, y):
        # Backpropagation through time
        delta_out = [target - output for output, target in zip(self.outputs, y)]

        dW_in = np.zeros_like(self.W_in)
        dW_rec = np.zeros_like(self.W_rec)
        db = np.zeros_like(self.b)

        for t in range(len(x)):
          delta_hidden = delta_out[t]
          dW_rec += delta_hidden * self.hidden_states[t]
          dW_in += delta_hidden * x[t].reshape(1, -1)
          db += delta_hidden
          for i in range(t-1,-1,-1):
            delta_hidden *= self.W_rec * self.outputs[i] * (1-self.outputs[i])
            dW_rec += delta_hidden * self.hidden_states[i]
            dW_in +=  delta_hidden * x[i].reshape(1, -1)
            db += delta_hidden

        # Update weights
        self.W_in += self.learning_rate * dW_in/len(x)
        self.W_rec += self.learning_rate * dW_rec/len(x)
        self.b += self.learning_rate * db/len(x)


    def sigmoid(self, x):
        # Avoid overflow by clipping large values
        x = np.clip(x, -500, 500)
        return 1 / (1 + np.exp(-x))


In [12]:
# Function to calculate metrics
def calculate_metrics(ground_truth, predictions):
    accuracy = accuracy_score(ground_truth, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(ground_truth, predictions, average='weighted',zero_division=1)
    class_wise_precision, class_wise_recall, class_wise_f1, _ = precision_recall_fscore_support(ground_truth, predictions, average=None, zero_division=1)

    # Compute class-wise accuracy
    class_wise_accuracy = []
    for i in range(len(class_wise_precision)):
        class_wise_accuracy.append(class_wise_precision[i] * class_wise_recall[i] / class_wise_f1[i] if class_wise_f1[i] != 0 else 0)

    return accuracy, precision, recall, f1, class_wise_precision, class_wise_recall, class_wise_f1, class_wise_accuracy

def flatten(list_of_lists):
    return [item for sublist in list_of_lists for item in sublist]

In [13]:
# Preprocess training data
train_input, train_target = preprocess_data(train_data)

# Preprocess test data
test_input, test_target = preprocess_data(test_data)

# 5-fold cross validation

In [14]:
# Initialize lists to store performance metrics across all folds
accuracies = []
precisions = []
recalls = []
f1_scores = []

# Define number of folds for cross-validation
num_folds = 5
kf = KFold(n_splits=num_folds, shuffle=True, random_state=1)

# Perform cross-validation
for fold_number, (train_index, val_index) in enumerate(kf.split(train_input), 1):
    print(f"Fold {fold_number}:")
    # Initialize the SingleRecurrentPerceptron for this fold
    model = SingleRecurrentPerceptron()

    # Extract training and validation data for this fold
    X_train_fold = [train_input[i] for i in train_index]
    X_val_fold = [train_input[i] for i in val_index]
    y_train_fold = [train_target[i] for i in train_index]
    y_val_fold = [train_target[i] for i in val_index]

    # Train the model on the current training fold
    num_epochs = 10
    prev_loss = 10000     # Arbitrary large
    for epoch in range(num_epochs):
      total_loss = 0
      for i in range(len(X_train_fold)):
        x = np.array(X_train_fold[i])
        y_true = np.array(y_train_fold[i])

        y_pred = np.array(model.forward(x))

        # Compute loss
        loss = -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
        total_loss += loss

        model.backward(x, y_true)

      avg_loss = total_loss / len(X_train_fold)
      if (avg_loss > prev_loss):
        break;
      prev_loss = avg_loss
      print(f"Epoch {epoch+1}, Average Loss: {avg_loss}")

    # Validate the model on the current validation fold
    ground_truth = flatten(y_val_fold)
    predictions = []
    threshold = 0.5
    for i in range(len(X_val_fold )):
        x = np.array(X_val_fold [i])
        y_pred = model.forward(x)
        y_pred = [1 if value >= threshold else 0 for value in y_pred]
        predictions.append(y_pred)

    predictions = flatten(predictions)

    accuracy, precision, recall, f1, class_wise_precision, class_wise_recall, class_wise_f1, class_wise_accuracy = calculate_metrics(ground_truth, predictions)

    # Store performance metrics for this fold
    accuracies.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)
    f1_scores.append(f1)

# Calculate mean performance metrics across all folds
mean_accuracy = np.mean(accuracies)
mean_precision = np.mean(precisions)
mean_recall = np.mean(recalls)
mean_f1_score = np.mean(f1_scores)

print("Mean Accuracy:", mean_accuracy)
print("Mean Precision:", mean_precision)
print("Mean Recall:", mean_recall)
print("Mean F1 Score:", mean_f1_score)

print("Accuracies: ", accuracies)

Fold 1:
Epoch 1, Average Loss: 0.8995608251799676
Epoch 2, Average Loss: 0.8448879515362022
Epoch 3, Average Loss: 0.8333566233142405
Epoch 4, Average Loss: 0.8309525240551755
Fold 2:
Epoch 1, Average Loss: 0.878939359458931
Epoch 2, Average Loss: 0.8143528293044621
Epoch 3, Average Loss: 0.7976440532361837
Epoch 4, Average Loss: 0.7933155284051989
Fold 3:
Epoch 1, Average Loss: 0.7301313131952497
Epoch 2, Average Loss: 0.694963467785395
Epoch 3, Average Loss: 0.6834476006187388
Fold 4:
Epoch 1, Average Loss: 0.7438824153888468
Epoch 2, Average Loss: 0.7232081934816477
Epoch 3, Average Loss: 0.7185729777843893
Fold 5:
Epoch 1, Average Loss: 0.7268785642741791
Epoch 2, Average Loss: 0.7135362978143811
Mean Accuracy: 0.8253095372343171
Mean Precision: 0.8300079772121418
Mean Recall: 0.8253095372343171
Mean F1 Score: 0.8173119285176884
Accuracies:  [0.8120929766556457, 0.8672761508349438, 0.8672835078659207, 0.8084552685848964, 0.7714397822301787]


### Training the model

In [None]:
best_model = SingleRecurrentPerceptron()
num_epochs = 10
prev_loss = 10000     # Arbitrary large
for epoch in range(num_epochs):
  total_loss = 0
  for i in range(len(train_input)):
    x = np.array(train_input[i])
    y_true = np.array(train_target[i])

    y_pred = np.array(best_model.forward(x))

    # Compute loss
    loss = -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
    total_loss += loss

    best_model.backward(x, y_true)

  avg_loss = total_loss / len(train_input)
  if (avg_loss > prev_loss):
    break;
  prev_loss = avg_loss
  print(f"Epoch {epoch+1}, Average Loss: {avg_loss}")

# Get ground truth from training data
ground_truth = flatten(train_target)
predictions = []
threshold = 0.5
for i in range(len(train_input )):
    x = np.array(train_input[i])
    y_pred = best_model.forward(x)
    y_pred = [1 if value >= threshold else 0 for value in y_pred]
    predictions.append(y_pred)

predictions = flatten(predictions)

accuracy, precision, recall, f1, class_wise_precision, class_wise_recall, class_wise_f1, class_wise_accuracy = calculate_metrics(ground_truth, predictions)

In [16]:
print("Train Accuracy:", accuracy)
print("Train Precision:", precision)
print("Train Recall:", recall)
print("Train F1 Score:", f1)
print("Train Class-wise Accuracy:", class_wise_accuracy)
print("Train Class-wise Precision:", class_wise_precision)
print("Train Class-wise Recall:", class_wise_recall)
print("Train Class-wise F1 Score:", class_wise_f1)

Train Accuracy: 0.862632046792816
Train Precision: 0.8785432111369627
Train Recall: 0.862632046792816
Train F1 Score: 0.8524650495490824
Train Class-wise Accuracy: [0.7836333523196484, 0.9128948051638008]
Train Class-wise Precision: [0.9653911 0.8362878]
Train Class-wise Recall: [0.60187561 0.98950181]
Train Class-wise F1 Score: [0.74147604 0.90646621]


In [17]:
# Print final weights
print("Final Weights:")
print("W_in:", best_model.W_in)
print("W_rec:", best_model.W_rec)
print("b:", best_model.b)

Final Weights:
W_in: [[ 0.84893976 -0.62494005 -1.33371963 -0.57746627 -0.04825618  0.00992997
   0.95409663  0.79556792  0.90009379]]
W_rec: [[0.43053134]]
b: [[0.01739229]]


# Testing the model performance

In [18]:
ground_truth = flatten(test_target)
predictions = []
threshold = 0.5
for i in range(len(test_input)):
        x = test_input[i]
        y_pred = best_model.forward(x)
        y_pred = [1 if value > threshold else 0 for value in y_pred]
        predictions.append(y_pred)

predictions = flatten(predictions)

# Calculate metrics for test data
test_accuracy, test_precision, test_recall, test_f1, test_class_precision, test_class_recall, test_class_f1,class_wise_accuracy = calculate_metrics(ground_truth, predictions)
print("Test Accuracy:", test_accuracy)
print("Test Precision:", test_precision)
print("Test Recall:", test_recall)
print("Test F1 Score:", test_f1)
print("Test Class-wise Accuracy:", class_wise_accuracy)
print("Test Class-wise Precision:", test_class_precision)
print("Test Class-wise Recall:", test_class_recall)
print("Test Class-wise F1 Score:", test_class_f1)

Test Accuracy: 0.8377516959190267
Test Precision: 0.861607769021668
Test Recall: 0.8377516959190267
Test F1 Score: 0.8240459771818623
Test Class-wise Accuracy: [0.7599091399147977, 0.897674275261382]
Test Class-wise Precision: [0.96613095 0.80572597]
Test Class-wise Recall: [0.55368733 0.98962258]
Test Class-wise F1 Score: [0.7039453  0.88825606]


### Language constraints inequalities

In [19]:
W_in = np.array(best_model.W_in)
W_rec = np.array(best_model.W_rec)
b = np.array(best_model.b)

# Extracting values from arrays
V_cap = W_in[0,0]
VNN = W_in[0,1]
VDT = W_in[0,2]
VJJ = W_in[0,3]
VOT = W_in[0,4]
WNN = W_in[0,5]
WDT = W_in[0,6]
WJJ = W_in[0,7]
WOT = W_in[0,8]
W_rec = W_rec[0,0]
b = -b[0,0]

inequalities = [
    ("V_cap + WDT > b", V_cap + WDT > b, V_cap + WDT - b),
    ("V_cap + WJJ > b", V_cap + WJJ > b, V_cap + WJJ - b),
    ("V_cap + WNN > b", V_cap + WNN > b, V_cap + WNN - b),
    ("V_cap + WOT > b", V_cap + WOT > b, V_cap + WOT - b),
    ("W_rec + VDT + WJJ < b", W_rec + VDT + WJJ < b, (W_rec + VDT + WJJ) - b),
    ("W_rec + VDT + WNN < b", W_rec + VDT + WNN < b, (W_rec + VDT + WNN) - b),
    ("VJJ + WJJ < b", VJJ + WJJ < b, VJJ + WJJ - b),
    ("VJJ + WNN < b", VJJ + WNN < b, VJJ + WNN - b),
    ("W_rec + VJJ + WJJ < b", W_rec + VJJ + WJJ < b, (W_rec + VJJ + WJJ) - b),
    ("W_rec + VJJ + WNN < b", W_rec + VJJ + WNN < b, (W_rec + VJJ + WNN) - b),
    ("VNN + WOT > b", VNN + WOT > b, VNN + WOT - b),
    ("W_rec + VNN + WOT > b", W_rec + VNN + WOT > b, (W_rec + VNN + WOT) - b),
    ("W_rec + VOT + WDT > b", W_rec + VOT + WDT > b, (W_rec + VOT + WDT) - b),
    ("W_rec + VOT + WJJ > b", W_rec + VOT + WJJ > b, (W_rec + VOT + WJJ) - b),
    ("W_rec + VOT + WNN > b", W_rec + VOT + WNN > b, (W_rec + VOT + WNN) - b),
    ("W_rec + VOT + WOT > b", W_rec + VOT + WOT > b, (W_rec + VOT + WOT) - b)
]

# Extracting expressions, boolean values, and results
expressions = [inequality[0] for inequality in inequalities]
boolean_values = [inequality[1] for inequality in inequalities]
results = [inequality[2] for inequality in inequalities]

# Creating a table
table = zip(expressions, boolean_values, results)
print(tabulate(table, headers=["Expression", "Boolean Value", "Difference"]))


Expression               Boolean Value    Difference
---------------------  ---------------  ------------
V_cap + WDT > b                      1     1.82043
V_cap + WJJ > b                      1     1.6619
V_cap + WNN > b                      1     0.876262
V_cap + WOT > b                      1     1.76643
W_rec + VDT + WJJ < b                1    -0.0902281
W_rec + VDT + WNN < b                1    -0.875866
VJJ + WJJ < b                        0     0.235494
VJJ + WNN < b                        1    -0.550144
W_rec + VJJ + WJJ < b                0     0.666025
W_rec + VJJ + WNN < b                1    -0.119613
VNN + WOT > b                        1     0.292546
W_rec + VNN + WOT > b                1     0.723077
W_rec + VOT + WDT > b                1     1.35376
W_rec + VOT + WJJ > b                1     1.19524
W_rec + VOT + WNN > b                1     0.409597
W_rec + VOT + WOT > b                1     1.29976
