In [11]:
# Add the Model Scripts folder to the path
import sys
sys.path.append("Model Scripts")
sys.path.append("Model Weights")


# Import the necessary libraries
import torch
import random
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt


In [12]:
# Import the function to create the model
from Model_Scripts.ArithmeticTransformer import create_arithmetic_transformer

# Create a model with default parameters
model = create_arithmetic_transformer()

# Or create a model with custom parameters
model = create_arithmetic_transformer(
    vocab_size=14,
    embed_size=128,
    num_heads=4,
    ff_dim=512,
    num_layers=3,
    max_length=64
)

In [20]:
import torch

def load_model(model_path, device=None):
    # Determine device
    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    # Load checkpoint
    print(f"Loading model on {device}")
    checkpoint = torch.load(model_path, map_location=device)
    
    # Extract config
    config = checkpoint['model_config']
    print("Model configuration:", config)
    
    # Create model
    model = create_arithmetic_transformer(
        vocab_size=config['vocab_size'],
        embed_size=config['embed_size'],
        num_heads=config['num_heads'],
        ff_dim=config['ff_dim'],
        num_layers=config['num_layers'],
        max_length=config['max_length'],
        dropout=config['dropout']
    )
    
    # Load state dict
    model.load_state_dict(checkpoint['model_state_dict'])
    
    # Move model to device and set to eval mode
    model = model.to(device)
    model.eval()
    
    # Get vocab if available
    vocab = checkpoint.get('vocab')
    inv_vocab = checkpoint.get('inv_vocab')
    
    print(f"Model loaded successfully! Best accuracy: {checkpoint['accuracy']:.4f}")
    
    return model, vocab, inv_vocab, device, config

# Usage:
model_path = './Model_Weights/medium_addition_model.pth'
model, vocab, inv_vocab, device, config = load_model(model_path)

Loading model on cpu
Model configuration: {'vocab_size': 14, 'embed_size': 256, 'num_heads': 4, 'ff_dim': 1024, 'num_layers': 4, 'max_length': 42, 'dropout': 0.1}
Model loaded successfully! Best accuracy: 0.9987


In [21]:
# If you need to use the model for inference, you'll want these helper functions:
def preprocess_input(input_str, max_length, vocab):
    # Reverse the input string
    input_str = input_str[::-1]
    # Tokenize
    tokens = [vocab[c] for c in input_str if c in vocab]
    # Pad
    padded = tokens + [vocab['<PAD>']] * (max_length - len(tokens))
    return torch.tensor(padded).unsqueeze(0)  # Add batch dimension

def decode_output(output_tensor, inv_vocab):
    _, predicted = output_tensor.max(2)
    decoded = []
    for token in predicted[0]:
        token_val = token.item()
        if token_val == vocab['<EOS>']:
            break
        if token_val != vocab['<PAD>']:
            decoded.append(inv_vocab[token_val])
    return ''.join(decoded)[::-1]  # Reverse at the end

# Example usage:
def test_addition(num1, num2, model, vocab, inv_vocab, max_length):
    input_str = f"{num1}+{num2}="
    input_tensor = preprocess_input(input_str, max_length, vocab)
    with torch.no_grad():
        output = model(input_tensor)
        result = decode_output(output, inv_vocab)
    print(f"{num1} + {num2} = {result}")
    print(f"Correct result: {num1 + num2}")
    print(f"Model's prediction is {'correct' if int(result) == num1 + num2 else 'incorrect'}")

In [19]:
# Test a simple addition
test_addition(123, 456, model, vocab, inv_vocab, config['max_length'])

# or test multiple additions in a loop
test_cases = [
    (5, 7),
    (42, 58),
    (123, 456),
    (1234, 5678)
]

for num1, num2 in test_cases:
    test_addition(num1, num2, model, vocab, inv_vocab, config['max_length'])

123 + 456 = 589
Correct result: 579
Model's prediction is incorrect
5 + 7 = 12
Correct result: 12
Model's prediction is correct
42 + 58 = 00
Correct result: 100
Model's prediction is incorrect
123 + 456 = 589
Correct result: 579
Model's prediction is incorrect
1234 + 5678 = 7912
Correct result: 6912
Model's prediction is incorrect


In [25]:
  # Test a simple addition
test_addition(123, 456, model, vocab, inv_vocab, config['max_length'])

# or test multiple additions in a loop
test_cases = [
    (5, 7),
    (42, 58),
    (123, 456),
    (1234, 5678),
    (10304923, 123123123),
    (123123123, 10304923)

]

for num1, num2 in test_cases:
    test_addition(num1, num2, model, vocab, inv_vocab, config['max_length'])

123 + 456 = 579
Correct result: 579
Model's prediction is correct
5 + 7 = 12
Correct result: 12
Model's prediction is correct
42 + 58 = 100
Correct result: 100
Model's prediction is correct
123 + 456 = 579
Correct result: 579
Model's prediction is correct
1234 + 5678 = 6912
Correct result: 6912
Model's prediction is correct
10304923 + 123123123 = 133428046
Correct result: 133428046
Model's prediction is correct
123123123 + 10304923 = 133428046
Correct result: 133428046
Model's prediction is correct
