In [1]:
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from transformers import BartTokenizer
import torch
import copy

## Splitting data into Train and Val

In [2]:
df = pd.read_excel("ArithOpsTrain.xlsx", header=1)  # header=1 indicates that the second row should be used as the column names

# Drop the first column
df.drop(df.columns[0], axis=1, inplace=True)

df = df.sample(frac=1)

df.head()

Unnamed: 0,Description,Question,Equation,Input Numbers,Output
283,marian also baked oatmeal cookies for her clas...,how many trays will she need to prepare number...,/ number1 number0,12 276,23.0
270,in haley 's class number0 are boys who love to...,how many will each of the boys receive ?,/ number1 number0,5 35,7.0
428,jane can arrange number0 vases of flowers in a...,how many days are needed for her to finish all...,/ number1 number0,16 248,15.5
139,because of the decision sofia asked the studen...,how many students participated in the suggesti...,+ number0 number1,279 234,513.0
583,a fast food restaurant had number0 hotdogs . a...,what 's the difference between the number of h...,- number0 number1,91 20,71.0


In [3]:
def fun(equation):
    return equation.count('+') + equation.count('-') + equation.count('/') + equation.count('*') + 1
df['numbers'] = df['Equation'].apply(fun)
df.head()

Unnamed: 0,Description,Question,Equation,Input Numbers,Output,numbers
283,marian also baked oatmeal cookies for her clas...,how many trays will she need to prepare number...,/ number1 number0,12 276,23.0,2
270,in haley 's class number0 are boys who love to...,how many will each of the boys receive ?,/ number1 number0,5 35,7.0,2
428,jane can arrange number0 vases of flowers in a...,how many days are needed for her to finish all...,/ number1 number0,16 248,15.5,2
139,because of the decision sofia asked the studen...,how many students participated in the suggesti...,+ number0 number1,279 234,513.0,2
583,a fast food restaurant had number0 hotdogs . a...,what 's the difference between the number of h...,- number0 number1,91 20,71.0,2


In [4]:
df['numbers'].value_counts()

2    754
3    225
Name: numbers, dtype: int64

In [5]:
# # Split the DataFrame into a training set (70%) and a testing set (30%)
# train_df = df.sample(frac=0.7, random_state=1)  # You can change the random_state for reproducibility
# val_df = df.drop(train_df.index)

# # Save the training set to a CSV file
# train_df.to_csv('training_data.csv', index=False)

# # Save the testing set to a CSV file
# val_df.to_csv('val_data.csv', index=False)

In [8]:
train_df = pd.read_csv("training_data.csv")
val_df = pd.read_csv('val_data.csv')

train_df.head()

Unnamed: 0,Description,Question,Equation,Input Numbers,Output,numbers
0,allen shiela 's brother likes to play with blo...,how many colors did shiela use ?,/ number0 number1,49 7,7.0,2
1,isabel received number0 dollars for her birthd...,how many of the toys could she buy ?,/ number0 number1,14 2,7.0,2
2,nick saved $ number0 . if nick saved $ number1...,how much did lee save ?,- number0 number1,68.50 25.43,43.07,2
3,if lewis earns $ number0 every week during the...,how much money does he earn during harvest sea...,* number0 number1,1367.00 5,6835.0,2
4,amy uploaded number0 pics to facebook . if she...,how many photos were in each album ?,/ number0 number1,180 9,20.0,2


## Model training

In [12]:
train_descriptions = list(train_df['Description'].values)
train_questions = list(train_df['Question'].values)
train_equations = list(train_df['Equation'].values)

val_descriptions = list(val_df['Description'].values)
val_questions = list(val_df['Question'].values)
val_equations = list(val_df['Equation'].values)

In [3]:
# Initialize tokenizer
tokenizer = BartTokenizer.from_pretrained("facebook/bart-large")

In [7]:
# # Max description+question token length
# source_max_len = 0
# for i in range(len(descriptions)):
#     text = descriptions[i]+" "+questions[i]
#     tokens = tokenizer(text)['input_ids']
#     source_max_len = max(source_max_len,len(list(tokens)))
# source_max_len
    

94

In [8]:
# # Max equation token length
# target_max_len = 0
# for i in range(len(descriptions)):
#     text = equations[i]
#     tokens = tokenizer(text)['input_ids']
#     target_max_len = max(target_max_len,len(list(tokens)))
# target_max_len
    

10

In [2]:
class MathEquationDataset(Dataset):
    def __init__(self, descriptions, questions, equations, tokenizer, src_max_length=100,tgt_max_len = 15):
        self.descriptions = descriptions
        self.questions = questions
        self.equations = equations
        self.tokenizer = tokenizer
        self.src_max_length = src_max_length
        self.tgt_max_length = tgt_max_len

    def __len__(self):
        return len(self.descriptions)

    def __getitem__(self, idx):
        src_text = self.descriptions[idx] + " " + self.questions[idx]
        tgt_text = self.equations[idx]
        
        encoding = self.tokenizer(src_text, return_tensors='pt', max_length=self.src_max_length, padding='max_length', truncation=True)
        target = self.tokenizer(tgt_text, return_tensors='pt', max_length=self.tgt_max_length, padding='max_length', truncation=True)
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': target['input_ids'].flatten()
        }


In [15]:
BATCH_SIZE = 32
source_max_len = 100
target_max_len = 10

# Initialize the Dataset
train_dataset = MathEquationDataset(train_descriptions, train_questions, train_equations, tokenizer,src_max_length=source_max_len,tgt_max_len=target_max_len)
val_dataset = MathEquationDataset(val_descriptions, val_questions, val_equations, tokenizer,src_max_length=source_max_len,tgt_max_len=target_max_len)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

print("Train_size == ",len(train_dataset))
print("Val_size == ",len(val_dataset))

Train_size ==  685
Val_size ==  294


In [16]:
train_iter = iter(train_loader)
batch = next(train_iter)
batch

{'input_ids': tensor([[    0, 20235,   816,  ...,     1,     1,     1],
         [    0, 27816,   927,  ...,     1,     1,     1],
         [    0, 15796, 28459,  ...,     1,     1,     1],
         ...,
         [    0,  6709,  7876,  ...,     1,     1,     1],
         [    0,   627,  2793,  ...,     1,     1,     1],
         [    0,  7456,    21,  ...,     1,     1,     1]]),
 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         ...,
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0]]),
 'labels': tensor([[   0,   12,  346,  288,  346,  134,    2,    1,    1,    1],
         [   0, 2744, 2055,  346,  134,  346,  176,  346,  246,    2],
         [   0, 3226,  346,  134,  346,  176,    2,    1,    1,    1],
         [   0, 2744, 2055,  346,  288,  346,  134,  346,  176,    2],
         [   0,   12,  346,  288,  346,  134,    2,    1,    1,    1],
        

In [17]:
ids = tokenizer('robyn and lucy are members of their village s girl scout troop . during weekends and some weekdays they go around selling cookies in the neighborhood . they have a week before the month ends and they are doing their best to get a badge from selling cookies . working overtime robyn sold number0 packs of cookies while lucy sold number1 how	many packs of cookies were they able to sell that day ?',max_length=100, padding='max_length', truncation=True)['input_ids']
tokenizer.decode(ids)

'<s>robyn and lucy are members of their village s girl scout troop. during weekends and some weekdays they go around selling cookies in the neighborhood. they have a week before the month ends and they are doing their best to get a badge from selling cookies. working overtime robyn sold number0 packs of cookies while lucy sold number1 how\tmany packs of cookies were they able to sell that day?</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>'

In [18]:
tokenizer.decode(batch['input_ids'][3])

'<s>the following week they decided to go to lake huron and lake michigan. during their stay there they caught a total of number0 pikes number1 sturgeons and number2 herrings. how many fishes did they catch from the number3 lakes?</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>'

In [19]:
tokenizer.decode(batch['labels'][3])

'<s>+ + number0 number1 number2</s>'

In [20]:
device = 'cuda:0'

In [16]:
from transformers import BartForConditionalGeneration, AdamW

model = BartForConditionalGeneration.from_pretrained("facebook/bart-large")
model.to('cuda:0')  # or 'cpu' if you are not using a GPU


In [17]:
output = model(input_ids=batch['input_ids'].to(device), attention_mask=batch['attention_mask'].to(device),labels = batch['labels'].to(device))

In [18]:
output['logits'].shape

torch.Size([32, 10, 50265])

In [19]:
output['loss']

tensor(8.9398, device='cuda:0', grad_fn=<NllLossBackward0>)

In [4]:
def eval(model,val_loader,val_dataset):
    model.eval()
    total_val_loss = 0
    total_true = []
    total_pred = []

    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids'].to('cuda')
            attention_mask = batch['attention_mask'].to('cuda')
            labels = batch['labels'].to('cuda')


            pred_tokens = model.generate(input_ids)
            pred_output = [tokenizer.decode(output_id, skip_special_tokens=True) for output_id in pred_tokens]
            true_output = [tokenizer.decode(output_id, skip_special_tokens=True) for output_id in labels]

            total_true.extend(true_output)
            total_pred.extend(pred_output)

            # print("True == ",true_output)
            # print("Predicted == ",pred_output)
            # print("\n\n")
            
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            val_loss = outputs.loss
            total_val_loss += val_loss.item()
        
        avg_val_loss = total_val_loss / len(val_loader)

        matching_strings = [a for a, b in zip(total_true, total_pred) if a == b]

        # Get the count of matching strings
        acc = len(matching_strings)/len(val_dataset)
        # print(f"Validation Loss: {avg_val_loss:.4f}, Accuracy: {acc:.4f}")
        
    return avg_val_loss,acc,total_true,total_pred


In [22]:
optimizer = AdamW(model.parameters(), lr=5e-5)
train_loss_stat = []
val_loss_stat = []
best_val_loss = 0.5
best_model_wts = copy.deepcopy(model.state_dict())
best_accuracy = 0.0
best_epoch = 0



In [27]:
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    print(f"Epoch {epoch+1}/{num_epochs}")
    total_loss = 0
    
    for i,batch in enumerate(train_loader):
        input_ids = batch['input_ids'].to('cuda')
        attention_mask = batch['attention_mask'].to('cuda')
        labels = batch['labels'].to('cuda')
        
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if i% 10== 0:
            model.eval()
            avg_val_loss,val_acc,total_true,total_pred = eval(model,val_loader,val_dataset)
            model.train()
            
            avg_train_loss = total_loss / len(train_loader)

            if val_acc > best_accuracy:
                best_model_wts = copy.deepcopy(model.state_dict())
                best_accuracy = val_acc
                best_epoch = epoch+1
                print("--------- Accuracy Noted ----------")
            train_loss_stat.append(avg_train_loss)
            val_loss_stat.append(avg_val_loss)

            print(f"Iter : {i}, Training Loss: {avg_train_loss:.4f}, Validation Loss : {avg_val_loss:.4f}, Val_Accuracy : {val_acc:.4f}")
    
    model.eval()
    _,train_acc,_,_ = eval(model,train_loader,train_dataset)
    print(f"Train_Accuracy : {train_acc:.4f}")    
    model.train()       


print(f"Best_acc : {best_accuracy:.4f}, Best_epoch : {best_epoch}")
model.load_state_dict(best_model_wts)

Epoch 1/10
--------- Accuracy Noted ----------
Iter : 0, Training Loss: 0.0081, Validation Loss : 0.3209, Val_Accuracy : 0.5986
Iter : 10, Training Loss: 0.0838, Validation Loss : 0.3140, Val_Accuracy : 0.5578
--------- Accuracy Noted ----------
Iter : 20, Training Loss: 0.1665, Validation Loss : 0.2997, Val_Accuracy : 0.6054
Train_Accuracy : 0.6876
Epoch 2/10
Iter : 0, Training Loss: 0.0067, Validation Loss : 0.3127, Val_Accuracy : 0.5578
Iter : 10, Training Loss: 0.0733, Validation Loss : 0.3333, Val_Accuracy : 0.5442
--------- Accuracy Noted ----------
Iter : 20, Training Loss: 0.1564, Validation Loss : 0.3456, Val_Accuracy : 0.6088
Train_Accuracy : 0.7255
Epoch 3/10
Iter : 0, Training Loss: 0.0060, Validation Loss : 0.4428, Val_Accuracy : 0.5816
--------- Accuracy Noted ----------
Iter : 10, Training Loss: 0.0740, Validation Loss : 0.2871, Val_Accuracy : 0.6429
--------- Accuracy Noted ----------
Iter : 20, Training Loss: 0.1576, Validation Loss : 0.2882, Val_Accuracy : 0.6531
Trai

<All keys matched successfully>

In [28]:
avg_val_loss,acc,total_true,total_pred = eval(model,val_loader,val_dataset)
print(f"Validation Loss: {avg_val_loss:.4f}, Val_Accuracy: {acc:.4f}")


Validation Loss: 0.3767, Val_Accuracy: 0.7313


In [35]:
unmatched_true = []
unmatched_pred = []
for true,pred in zip(total_true,total_pred):
    if true != pred:
        unmatched_true.append(true)
        unmatched_pred.append(pred)
model_df = pd.DataFrame({'True': unmatched_true, 'Pred': unmatched_pred})
model_df

Unnamed: 0,True,Pred
0,- number1 number0,+ number1 number0
1,- + number2 number0 number1,- - number0 number1 number2
2,/ number1 number0,* number1 number0
3,- number0 * number1 number2,- - number0 number1 number2
4,- number0 + number1 number2,- + number0 number1 number2
...,...,...
85,- number1 number0,- number0 number1
86,- number1 number0,+ number1 number0
87,- number0 number1,+ number0 number1
88,- number1 number0,+ number1 number0


In [29]:
torch.save(model.state_dict(), f'checkpoint_{20}_temp_acc_0.77')

In [23]:
val_iter = iter(val_loader)
batch = next(val_iter)
pred_tokens = model.generate(batch['labels'].to(device))
pred_output = [tokenizer.decode(output_id, skip_special_tokens=True) for output_id in pred_tokens]
true_output = [tokenizer.decode(output_id, skip_special_tokens=True) for output_id in batch['labels']]
print("True == ",true_output)
print("Pred == ",pred_output)

True ==  ['* number0 number1', '- number0 number1', '- number0 number1', '- number0 * number1 number2', '+ number0 number1', '- number0 number1', '+ number0 number1', '+ number0 number1', '+ number0 number1', '/ + number0 number1 number2', '- number0 number1', '- number0 number1', '+ number0 number1', '* number0 number1', '* number0 number1', '- number0 number1', '* number0 number1', '- number0 number1', '* number0 number1', '- number0 number1', '- number0 number1', '+ number0 number1', '+ number0 number1', '* number1 number2', '+ + number0 number1 number2', '/ number1 number0', '* number0 number1', '+ number1 number2', '+ number0 number1', '+ number0 number1', '- number0 number1', '- number0 number1', '+ + number1 number2 number3', '- number0 number1', '+ number5 number1', '+ * number0 number1 number2', '- + number0 number1 number2', '- - number0 number1 number2', '/ number1 number0', '- number0 number1', '* / number1 number0 number2', '* / number1 number0 number2', '- + number0 numbe

## Loading the saved model

In [5]:
from transformers import BartForConditionalGeneration, AdamW
model = BartForConditionalGeneration.from_pretrained("facebook/bart-large")
model.load_state_dict(torch.load('checkpoint_20_acc_0.77'))
model.eval()
model.to('cuda:0')  # or 'cpu' if you are not using a GPU

BartForConditionalGeneration(
  (model): BartModel(
    (shared): Embedding(50265, 1024, padding_idx=1)
    (encoder): BartEncoder(
      (embed_tokens): Embedding(50265, 1024, padding_idx=1)
      (embed_positions): BartLearnedPositionalEmbedding(1026, 1024)
      (layers): ModuleList(
        (0): BartEncoderLayer(
          (self_attn): BartAttention(
            (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=1024, out_features=4096, bias=True)
          (fc2): Linear(in_features=4096, out_features=1024, bias=True)
          (final_layer_norm): LayerNorm((102

In [23]:
avg_val_loss,acc,total_true,total_pred = eval(model,train_loader,train_dataset)
print(f"Train Loss: {avg_val_loss:.4f}, Train_Accuracy: {acc:.4f}")




Train Loss: 0.1961, Train_Accuracy: 0.9416


In [24]:
avg_val_loss,acc,total_true,total_pred = eval(model,val_loader,val_dataset)
print(f"Val Loss: {avg_val_loss:.4f}, Val_Accuracy: {acc:.4f}")


Val Loss: 0.2062, Val_Accuracy: 0.9252


In [25]:
unmatched_true = []
unmatched_pred = []
for true,pred in zip(total_true,total_pred):
    if true != pred:
        unmatched_true.append(true)
        unmatched_pred.append(pred)
model_df = pd.DataFrame({'True': unmatched_true, 'Pred': unmatched_pred})
model_df

Unnamed: 0,True,Pred
0,- number0 number2,- number0 * number2 number1
1,- number1 number0,- number0 number1
2,* + number0 number1 number2,/ + number0 number1 number2
3,+ + number0 number2 number1,+ + number0 number1 number2
4,- number0 number1,- - number0 number1 number2
5,+ number1 number0,+ number0 number1
6,+ + number0 number1 number2,- - number0 number1 number2
7,+ + number0 number2 number1,+ + number0 number1 number2
8,/ number1 number2,/ number1 number2 number0
9,- - number0 number1 number2,+ - number0 number1 number2


In [13]:
def generate_equation(model,sentence):
    model.eval()
    inputs = tokenizer(sentence, return_tensors="pt", max_length=100, truncation=True)
    inputs = {k: v.to(model.device) for k, v in inputs.items()}
    outputs = model.generate(**inputs)
    decoded_output = [tokenizer.decode(output_id, skip_special_tokens=True) for output_id in outputs]
    return outputs,decoded_output


In [27]:
description = "the kids from oakwood elementary school are visiting a bird zoo for their field trip . to get to the bird zoo from the school the kids have to ride some buses . if there are number0 buses and each bus has number1 adult supervisors to guide the children	how many supervisors are there in total ?"
outputs,predicted_equation = generate_equation(model,description)
print(predicted_equation)


['* number0 number1']




In [28]:
print(outputs)
print(tokenizer.decode(outputs[0]))

tensor([[   2,    0, 3226,  346,  288,  346,  134,    2]], device='cuda:0')
</s><s>* number0 number1</s>


In [29]:
tgt = "* number0 number1"
ids = tokenizer(tgt)['input_ids']
print(ids)
print(tokenizer.decode(ids))

[0, 3226, 346, 288, 346, 134, 2]
<s>* number0 number1</s>


In [30]:
tokenizer.decode(outputs[0])

'</s><s>* number0 number1</s>'

## Test set results

In [10]:
def evaluate_prefix(expression, operands):
    # Split the expression into tokens
    tokens = expression.split()

    # Define a stack to store operands
    stack = []

    # Iterate through the tokens in reverse order (as it's a prefix expression)
    for token in reversed(tokens):
        try:
            if token.startswith('number'):
                # If the token starts with 'number', use it to index into the 'operands' list
                operand_index = int(token[6:])  # Extract the index from the token
                if 0 <= operand_index < len(operands):
                    stack.append(float(operands[operand_index]))
                else:
                    raise ValueError("Invalid operand index: " + str(operand_index))
            elif token in '+-*/':
                # If the token is an operator, pop two operands from the stack and apply the operator
                operand1 = stack.pop()
                operand2 = stack.pop()
                if token == '+':
                    stack.append(operand1 + operand2)
                elif token == '-':
                    stack.append(operand1 - operand2)
                elif token == '*':
                    stack.append(operand1 * operand2)
                elif token == '/':
                    if operand2 == 0:
                        raise ValueError("Division by zero")
                    stack.append(operand1 / operand2)
            else:
                raise ValueError("Invalid token: " + token)
        except (ValueError, IndexError):
            return None  # Handle the error and return None

    if len(stack) != 1:
        return None  # Return None for any errors

    return stack[0]

# Example usage:
expression = '+ * number0 number1 number2'
operands = [1, 3, 5]
result = evaluate_prefix(expression, operands)

if result is not None:
    print("Result:", result)
else:
    print("Error: Invalid expression")


Result: 8.0


In [6]:
test_df = pd.read_excel('ArithOpsTestDataWithoutOutput.xlsx')
test_df.head()

Unnamed: 0,Description,Question,Input Numbers
0,number0 red apples and number1 green apples ar...,how many apples are in the basket ?,7 2
1,ellen has number0 more balls than marin . mari...,how many balls does ellen have ?,6 9
2,janet has number0 oranges and sharon has numbe...,how many oranges do janet and sharon have toge...,9 7
3,allan brought number0 balloons and jake brough...,how many balloons did allan and jake have in t...,2 4
4,adam has number0 more apples than jackie . jac...,how many apples does adam have ?,5 9


In [14]:
# true = list(test_df['Output'].values)
pred = []
ind = 0
for index, row in test_df.iterrows():
    ind+=1
    if ind%50 == 0:
        print(ind)
    input = row['Description']+" "+row['Question']
    _,predicted_equation = generate_equation(model,input)
    predicted_equation = predicted_equation[0]
    pred_output = evaluate_prefix(predicted_equation, row['Input Numbers'].split())
    pred.append(pred_output)
     



50
100
150
200


In [22]:
df = pd.DataFrame(pred)
df.to_excel('output.xlsx', index=False, header=False)
df.head()

Unnamed: 0,0
0,Bandla Manikanta
1,9.0
2,15.0
3,2.0
4,6.0


In [26]:
df = pd.read_excel('ArithOpsTestDataOnlyOutput.xlsx')
true = list(df['Output'].values)
df.head()

Unnamed: 0,Output
0,9.0
1,15.0
2,16.0
3,6.0
4,14.0


In [27]:
correct = 0
for a,b in zip(true,pred):
    if a==b:
        correct+=1
correct/len(true)

0.7563025210084033

In [28]:
correct

180