In [35]:
import torch
import random
from tqdm import tqdm
from model import addGPT  
from model import encode, decode  ,num_digits,val_data
device1 =  device='cuda' if torch.cuda.is_available() else 'cpu'
saved_model = 'addition_weights.pth'

In [36]:
def initialize_model(model_path=saved_model, device=device1):
    
    model = addGPT(train=False)  
    model.to(device)
    
    try:
        model.load_state_dict(torch.load(model_path))
        print(f"\nSuccessfully loaded weights from {model_path}")
        model.eval()
        return model
    except Exception as e:
        print(f"Error loading weights: {e}")
        return None

def run_inference():

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = initialize_model()
    if model is None:
        return
    print("Enter 'q' or 'quit' to exit")
    
    while True:
    
        while True:
            num1 = input("\nEnter first number: ").strip()
            if num1.lower() in ['q', 'quit']:
                
                return
            if num1.isdigit() and len(num1) <= num_digits:
                num1 = num1.zfill(num_digits)
                break
            print(f'Please enter a valid number up to {num_digits} digits.')
        
        while True:
            num2 = input("Enter second number: ").strip()
            if num2.lower() in ['q', 'quit']:
                
                return
            if num2.isdigit() and len(num2) <= num_digits:
                num2 = num2.zfill(num_digits)
                break
            print(f'Please enter a valid number up to {num_digits} digits.')
        
        problem = num1 + num2
        problem_tokens = encode(problem)
        problem_tensor = torch.tensor(problem_tokens, dtype=torch.long).unsqueeze(0).to(device)
        
        
        with torch.no_grad():
            generated_tokens = model.generate(
                problem_tensor,
                max_new_tokens=num_digits+1
            )
        
        # Process output
        predicted_answer = decode(generated_tokens[0].tolist())[-(num_digits+1):]
        predicted_answer = predicted_answer[::-1]
        
        # Calculate actual answer
        actual_answer = str(int(num1) + int(num2)).zfill(num_digits+1)
        
        
        print("\nProblem:", f"{int(num1)} + {int(num2)}")
        print("Model's Answer:", int(predicted_answer))
        print("Actual Answer:", int(actual_answer))
        
        if int(predicted_answer) == int(actual_answer):
            print("Model predicted correctly")
        else:
            print("Model prediction was incorrect")
        
        
        cont = input("\nTry another problem? (y/n): ").strip().lower()
        if cont != 'y':
            break

if __name__ == "__main__":
    run_inference()


Successfully loaded weights from addition_weights.pth
Enter 'q' or 'quit' to exit



Enter first number:  9548357757
Enter second number:  5890543975



Problem: 9548357757 + 5890543975
Model's Answer: 15438901732
Actual Answer: 15438901732
Model predicted correctly



Try another problem? (y/n):  y

Enter first number:  2939
Enter second number:  546



Problem: 2939 + 546
Model's Answer: 3485
Actual Answer: 3485
Model predicted correctly



Try another problem? (y/n):  0


In [27]:
def evaluate_model_val(model, num_samples=1000):
    model.eval()
    correct_predictions = 0
    incorrect_predictions = 0
    evaluation_samples = random.sample(val_data, min(num_samples, len(val_data)))
    progress_bar = tqdm(evaluation_samples, 
                       desc="Evaluating model",
                       unit="sample",
                       total=len(evaluation_samples))
    for eqn in progress_bar:
        #correct format from val
        problem_tokens = encode(eqn[:(2*num_digits)]) 
        problem_tensor = torch.tensor(problem_tokens, dtype=torch.long).unsqueeze(0).to(device)
       
        generated_tokens = model.generate(
            problem_tensor,
            max_new_tokens = num_digits+1
        )
        
        predicted_answer = decode(generated_tokens[0].tolist())[-(num_digits+1):]
        predicted_answer = predicted_answer[::-1]
        # Calculate the actual answer
        actual_answer = eqn[(2*num_digits):]
        actual_answer = actual_answer[::-1]
        if predicted_answer == actual_answer:
            correct_predictions += 1
           
        else:
            incorrect_predictions += 1
            if(incorrect_predictions<10):
                print(f'Incorrect Prediction : {eqn[:num_digits]} + {eqn[num_digits:2*num_digits]} = {predicted_answer} | {actual_answer}')
    current_accuracy = (correct_predictions / (correct_predictions + incorrect_predictions)) * 100
    progress_bar.set_description(f"Evaluating model (Accuracy: {current_accuracy:.2f}%)")
    total_samples = correct_predictions + incorrect_predictions
    accuracy = (correct_predictions / total_samples) * 100 if total_samples > 0 else 0

    print(f"Evaluation Summary:")
    print(f"  Total Samples: {total_samples}")
    print(f"  Correct Predictions: {correct_predictions}")
    print(f"  Incorrect Predictions: {incorrect_predictions}")
    print(f"  Accuracy: {accuracy:.4f}%")
    
    model.train()

In [34]:

model = initialize_model()
evaluate_model_val(model)



Successfully loaded weights from addition_weights.pth


Evaluating model:   4%|██▎                                                        | 39/1000 [00:05<02:00,  7.98sample/s]

5450279056 + 6910234450 = 12360513406 | 12360513506


Evaluating model:   8%|████▍                                                      | 76/1000 [00:10<02:12,  6.97sample/s]

9834372352 + 8718111147 = 18552493499 | 18552483499


Evaluating model:   9%|█████▌                                                     | 94/1000 [00:12<01:57,  7.71sample/s]

4666137419 + 3844376482 = 08510513911 | 08510513901


Evaluating model:  11%|██████▍                                                   | 111/1000 [00:14<02:00,  7.40sample/s]

7221870788 + 6521662839 = 13743433627 | 13743533627


Evaluating model:  15%|████████▍                                                 | 146/1000 [00:19<01:36,  8.85sample/s]

2979641338 + 2303721220 = 06283362558 | 05283362558


Evaluating model:  20%|███████████▋                                              | 201/1000 [00:26<01:42,  7.76sample/s]

8841017378 + 9010116415 = 17951133793 | 17851133793


Evaluating model:  23%|█████████████▏                                            | 228/1000 [00:30<01:55,  6.67sample/s]

5612620385 + 3981924009 = 08594544394 | 09594544394


Evaluating model:  30%|█████████████████▎                                        | 298/1000 [00:40<01:26,  8.10sample/s]

6370535540 + 1995663414 = 08365198954 | 08366198954


Evaluating model:  33%|███████████████████                                       | 329/1000 [00:44<01:22,  8.14sample/s]

9579555043 + 5031974464 = 14611529407 | 14611529507


Evaluating model: 100%|█████████████████████████████████████████████████████████| 1000/1000 [02:13<00:00,  7.50sample/s]

Evaluation Summary:
  Total Samples: 1000
  Correct Predictions: 977
  Incorrect Predictions: 23
  Accuracy: 97.7000%



