In [1]:
import pandas as pd
import numpy as np
import re
from unidecode import unidecode
import nltk
import spacy
nlp = spacy.load('en_core_web_sm')

In [2]:
class WordEncoder:
    def __init__(self):
        self.encoding_map = {}
        self.decoding_map = {}
        self.current_index = 43
    
    def encode_word(self, word):
        if word not in self.encoding_map:
            self.encoding_map[word] = self.current_index
            self.decoding_map[self.current_index] = word
            self.current_index += 1
        return self.encoding_map[word]
    
    def decode_word(self, encoded_word):
        if encoded_word in self.decoding_map:
            return self.decoding_map[encoded_word]
        else:
            #if encoded_word==0:
                
            print(f"Invalid encoded word: {self,encoded_word}")
            raise ValueError("Invalid encoded word.")

In [3]:
encoder = WordEncoder()

In [4]:
wordnet_lemmatizer = nltk.stem.WordNetLemmatizer()


In [5]:
df = pd.read_csv('final_1.csv').reset_index()[['Question','Equation','Type']].fillna("General")
df

Unnamed: 0,Question,Equation,Type
0,gino has number0 popsicle sticks . i have numb...,+ number0 number1,General
1,lino picked up number0 shells at the seashore ...,+ number0 number1,General
2,there were number0 parents in the program and ...,+ number0 number1,General
3,last saturday marie sold number0 magazines and...,+ number0 number1,General
4,there are number0 birds on the fence . number1...,+ number0 number1,General
...,...,...,...
6526,an industrial machine made number0 shirts yest...,/ + number0 number1 number2,General
6527,an industrial machine made number0 shirts yest...,/ number1 number2,General
6528,an industrial machine can make number0 shirts ...,/ + number1 number2 number0,General
6529,an industrial machine can make number0 shirts ...,* number0 + number1 number2,General


In [6]:
df.Type.unique()

array(['General', 'Common', 'LCM_HCF', 'Physics', 'Time'], dtype=object)

In [7]:
values_to_replace = ['Addition', 'Subtraction', 'Sum', 'TVQ-Change',
                     'Multiplication', 'Common-Division', 'TVQ-Final',
                     'TVQ-Initial', 'Ceil-Division', 'Floor-Division', 'Difference']

# Replace the values in the 'Type' column with 'General'
df['Type'] = df['Type'].replace(values_to_replace, 'General')

In [8]:
df.Type.unique()

array(['General', 'Common', 'LCM_HCF', 'Physics', 'Time'], dtype=object)

In [9]:
level1DataFrame = df[['Question','Type']]
level1DataFrame

Unnamed: 0,Question,Type
0,gino has number0 popsicle sticks . i have numb...,General
1,lino picked up number0 shells at the seashore ...,General
2,there were number0 parents in the program and ...,General
3,last saturday marie sold number0 magazines and...,General
4,there are number0 birds on the fence . number1...,General
...,...,...
6526,an industrial machine made number0 shirts yest...,General
6527,an industrial machine made number0 shirts yest...,General
6528,an industrial machine can make number0 shirts ...,General
6529,an industrial machine can make number0 shirts ...,General


In [10]:
def l1PreProcessingX(text):
        text = text.replace(".", " ")
        text = text.replace(",", "")
        doc = nlp(text.lower())
        tagged_words = np.array(list(map(lambda x: (x.text, x.pos_), doc)))

        verbs_raw = tagged_words[tagged_words[:, 1] == 'VERB'][:, 0]
        verbs_raw = list(dict.fromkeys([wordnet_lemmatizer.lemmatize(i,pos='v') for i in verbs_raw]))
        encoded_verbs = [encoder.encode_word(i) for i in verbs_raw]
        if len(encoded_verbs) < 10:
                encoded_verbs = np.pad(encoded_verbs, (0, 10 - len(encoded_verbs)))

        
        
        adj_raw = tagged_words[tagged_words[:, 1] == 'ADJ'][:, 0]
        encoded_adjs = [encoder.encode_word(i) for i in adj_raw]
        if len(encoded_adjs) < 5:
                encoded_adjs = np.pad(encoded_adjs, (0, 5 - len(encoded_adjs)))

        adv_raw = tagged_words[tagged_words[:, 1] == 'ADV'][:, 0]
        encoded_advs = [encoder.encode_word(i) for i in adv_raw]
        if len(encoded_advs) < 5:
                encoded_advs = np.pad(encoded_advs, (0, 5 - len(encoded_advs)))

        aux_raw = tagged_words[tagged_words[:, 1] == 'AUX'][:, 0]
        encoded_auxs = [encoder.encode_word(i) for i in aux_raw]
        if len(encoded_auxs) < 5:
                encoded_auxs = np.pad(encoded_auxs, (0, 5 - len(encoded_auxs)))


        return np.array(list(encoded_verbs[:10]) + list(encoded_adjs[:5]) + list(encoded_advs[:5]) + list(encoded_auxs[:5]))


In [11]:
l1X = [l1PreProcessingX(i) for i in list(level1DataFrame['Question'].values)]

In [12]:
l1Y = [encoder.encode_word(i) for i in list(level1DataFrame['Type'].values)]

In [13]:
from sklearn.ensemble import RandomForestClassifier
l1model = RandomForestClassifier()
l1model.fit(l1X,l1Y)

In [14]:
def predictType(text) :
    return(encoder.decode_word(l1model.predict([l1PreProcessingX(text)])[0]))

In [15]:
df.drop_duplicates(subset=["Question"], keep="first", inplace=True)
df

Unnamed: 0,Question,Equation,Type
0,gino has number0 popsicle sticks . i have numb...,+ number0 number1,General
1,lino picked up number0 shells at the seashore ...,+ number0 number1,General
2,there were number0 parents in the program and ...,+ number0 number1,General
3,last saturday marie sold number0 magazines and...,+ number0 number1,General
4,there are number0 birds on the fence . number1...,+ number0 number1,General
...,...,...,...
6526,an industrial machine made number0 shirts yest...,/ + number0 number1 number2,General
6527,an industrial machine made number0 shirts yest...,/ number1 number2,General
6528,an industrial machine can make number0 shirts ...,/ + number1 number2 number0,General
6529,an industrial machine can make number0 shirts ...,* number0 + number1 number2,General


In [16]:
dfGeneral = df[df.Type=="General"][["Question","Equation"]]
"""dfAddition = df[df.Type=="Addition"][["Question","Equation"]]
dfSubtraction = df[df.Type=="Subtraction"][["Question","Equation"]]
dfSum = df[df.Type=="Sum"][["Question","Equation"]]
dfTVQChange = df[df.Type=="TVQ-Change"][["Question","Equation"]]
dfCommonDivision = df[df.Type=="Multiplication"][["Question","Equation"]]
dfCommonDivision = df[df.Type=="Common-Division"][["Question","Equation"]]
dfTVQFinal = df[df.Type=="TVQ-Final"][["Question","Equation"]]
dfTVQInitial = df[df.Type=="TVQ-Initial"][["Question","Equation"]]
dfCeilDivision = df[df.Type=="Ceil-Division"][["Question","Equation"]]
dfFloorDivision = df[df.Type=="Floor-Division"][["Question","Equation"]]
dfDifference = df[df.Type=="Difference"][["Question","Equation"]]"""
dfCommon = df[df.Type=="Common"][["Question","Equation"]]
dfPhysics = df[df.Type=="Physics"][["Question","Equation"]]
dfTime = df[df.Type=="Time"][["Question","Equation"]]
dfLcm_Hcf = df[df.Type=="LCM_HCF"][["Question","Equation"]]



In [17]:
dfPhysics


Unnamed: 0,Question,Equation
4852,a fill pipe can fill number0 / number1 of cist...,/ number2 number0
4853,what is the distance covered by a train if it ...,* number0 number1
4854,one pipe can fill a tank four times as fast as...,* + 1 4 number0
4855,the speed of a boat in still water is number0 ...,- number0 number1
4856,a space shuttle orbits the earth at about numb...,* number0 3600
...,...,...
5020,a boat can travel with a speed of number0 km /...,/ number1 + number0 number0
5021,a boat running up stram takes number0 hours to...,- number1 number0
5022,a tank is filled in number0 hours by number1 p...,/ number0 / 2 + number1 4
5023,a man goes from a to b at a speed of number0 k...,/ + number0 number0 2


In [18]:
import time
delay=5
def l2PreProcessingX(text) :
    text = text.replace(".", "")
    text = text.replace(",", "")
    doc = nlp(text.lower())
    tagged_words = np.array(list(map(lambda x: (x.text, x.pos_), doc)))
    #print("Tagged_words",tagged_words)
    #time.sleep(delay)
    verbs = []
    advs = []
    adjs = []
    nouns = []
    nums = []
    auxs = []
    for i in tagged_words :
        
        if re.search('^number', i[0]) :
            nums.append(i[0])

        else :
            if i[1]== "VERB":
                verbs.append(i[0])
            if i[1]== "ADV":
                advs.append(i[0])
            if i[1]== "ADJ":
                adjs.append(i[0])
            if i[1]== "AUX":
                auxs.append(i[0])
            if i[1]== "NOUN":
                nouns.append(i[0])



    verbs = list(dict.fromkeys([wordnet_lemmatizer.lemmatize(i,pos='v') for i in verbs]))

    #print("Verbs",verbs,"\nAdv",advs,"\nAdj",adjs,"\nAux",auxs,"\nNum",nums,"\nNoun",nouns)
    #time.sleep(delay)
    encoded_verbs = [encoder.encode_word(i) for i in verbs]
    if len(encoded_verbs) < 5:
            encoded_verbs = np.pad(encoded_verbs, (0, 5 - len(encoded_verbs)))

    encoded_adjs = [encoder.encode_word(i) for i in adjs]
    if len(encoded_adjs) < 5:
            encoded_adjs = np.pad(encoded_adjs, (0, 5 - len(encoded_adjs)))

    encoded_advs = [encoder.encode_word(i) for i in advs]
    if len(encoded_advs) < 5:
            encoded_advs = np.pad(encoded_advs, (0, 5 - len(encoded_advs)))

    encoded_auxs = [encoder.encode_word(i) for i in auxs]
    if len(encoded_auxs) < 5:
            encoded_auxs = np.pad(encoded_auxs, (0, 5 - len(encoded_auxs)))

    encoded_nums = [encoder.encode_word(i) for i in nums]
    if len(encoded_nums) < 5:
            encoded_nums = np.pad(encoded_nums, (0, 5 - len(encoded_nums)))

    encoded_nouns = [encoder.encode_word(i) for i in nouns]
    if len(encoded_nouns) < 10:
            encoded_nouns = np.pad(encoded_nouns, (0, 10 - len(encoded_nouns)))
    encoded_nouns = []
    return np.hstack((encoded_verbs[:5] , encoded_advs[:5] , encoded_adjs[:5] , encoded_auxs[:5] , encoded_nums[:5] , encoded_nouns[:10]))

In [19]:
def l2PreProcessingY(text) :
    formula = text.split(" ")
    encoded_formula = [encoder.encode_word(i) for i in formula]
    if len(encoded_formula) < 10:
            encoded_formula = np.pad(encoded_formula, (0, 10 - len(encoded_formula)))
    return np.array(list(encoded_formula[:10]))

In [20]:
l2generalX = [l2PreProcessingX(i) for i in dfGeneral.Question.values]
l2generalY = [l2PreProcessingY(i) for i in dfGeneral.Equation.values]

In [21]:
l2GeneralModel = RandomForestClassifier()
l2GeneralModel.fit(l2generalX,l2generalY)

"""from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

# Assuming you have l2generalX and l2generalY as your features and labels, respectively

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(l2generalX, l2generalY, test_size=0.2, random_state=42)

# Create and train the model
l2GeneralModel = RandomForestClassifier()
l2GeneralModel.fit(X_train, y_train)"""

'from sklearn.model_selection import train_test_split\nfrom sklearn.ensemble import RandomForestClassifier\n\n# Assuming you have l2generalX and l2generalY as your features and labels, respectively\n\n# Split the data into training and testing sets\nX_train, X_test, y_train, y_test = train_test_split(l2generalX, l2generalY, test_size=0.2, random_state=42)\n\n# Create and train the model\nl2GeneralModel = RandomForestClassifier()\nl2GeneralModel.fit(X_train, y_train)'

In [22]:
#y_train

In [23]:
"""l2AdditionX = [l2PreProcessingX(i) for i in dfAddition.Question.values]
l2AdditionY = [l2PreProcessingY(i) for i in dfAddition.Equation.values]
l2AdditionModel = RandomForestClassifier()
l2AdditionModel.fit(l2AdditionX,l2AdditionY)"""

'l2AdditionX = [l2PreProcessingX(i) for i in dfAddition.Question.values]\nl2AdditionY = [l2PreProcessingY(i) for i in dfAddition.Equation.values]\nl2AdditionModel = RandomForestClassifier()\nl2AdditionModel.fit(l2AdditionX,l2AdditionY)'

In [24]:
"""l2SubtractionX = [l2PreProcessingX(i) for i in dfSubtraction.Question.values]
l2SubtractionY = [l2PreProcessingY(i) for i in dfSubtraction.Equation.values]
l2SubtractionModel = RandomForestClassifier()
l2SubtractionModel.fit(l2SubtractionX,l2SubtractionY)"""

'l2SubtractionX = [l2PreProcessingX(i) for i in dfSubtraction.Question.values]\nl2SubtractionY = [l2PreProcessingY(i) for i in dfSubtraction.Equation.values]\nl2SubtractionModel = RandomForestClassifier()\nl2SubtractionModel.fit(l2SubtractionX,l2SubtractionY)'

In [25]:
"""l2MultiplicationX = [l2PreProcessingX(i) for i in dfMultiplication.Question.values]
l2MultiplicationY = [l2PreProcessingY(i) for i in dfMultiplication.Equation.values]
l2MultiplicationModel = RandomForestClassifier()
l2MultiplicationModel.fit(l2MultiplicationX,l2MultiplicationY)"""

'l2MultiplicationX = [l2PreProcessingX(i) for i in dfMultiplication.Question.values]\nl2MultiplicationY = [l2PreProcessingY(i) for i in dfMultiplication.Equation.values]\nl2MultiplicationModel = RandomForestClassifier()\nl2MultiplicationModel.fit(l2MultiplicationX,l2MultiplicationY)'

In [26]:
"""l2CommonDivisionX = [l2PreProcessingX(i) for i in dfCommonDivision.Question.values]
l2CommonDivisionY = [l2PreProcessingY(i) for i in dfCommonDivision.Equation.values]
l2CommonDivisionModel = RandomForestClassifier()
l2CommonDivisionModel.fit(l2CommonDivisionX,l2CommonDivisionY)"""

'l2CommonDivisionX = [l2PreProcessingX(i) for i in dfCommonDivision.Question.values]\nl2CommonDivisionY = [l2PreProcessingY(i) for i in dfCommonDivision.Equation.values]\nl2CommonDivisionModel = RandomForestClassifier()\nl2CommonDivisionModel.fit(l2CommonDivisionX,l2CommonDivisionY)'

In [27]:
l2PhysicsX = [l2PreProcessingX(i) for i in dfPhysics.Question.values]
l2PhysicsY = [l2PreProcessingY(i) for i in dfPhysics.Equation.values]
l2PhysicsModel = RandomForestClassifier()
l2PhysicsModel.fit(l2PhysicsX,l2PhysicsY)

In [28]:
def l2PreProcessingXCommon(text) :
    text = text.replace(".", "")
    text = text.replace(",", "")
    doc = nlp(text.lower())
    tagged_words = np.array(list(map(lambda x: (x.text, x.pos_), doc)))
    verbs = []
    advs = []
    adjs = []
    nouns = []
    nums = []
    auxs = []
    for i in tagged_words :
        
        if re.search('^number', i[0]) :
            nums.append(i[0])

        else :
            if i[1]== "VERB":
                verbs.append(i[0])
            if i[1]== "ADV":
                advs.append(i[0])
            if i[1]== "ADJ":
                adjs.append(i[0])
            if i[1]== "AUX":
                auxs.append(i[0])
            if i[1]== "NOUN":
                nouns.append(i[0])



    verbs = list(dict.fromkeys([wordnet_lemmatizer.lemmatize(i,pos='v') for i in verbs]))

    print(verbs,advs,adjs,auxs,nums,nouns)

    encoded_verbs = [encoder.encode_word(i) for i in verbs]
    if len(encoded_verbs) < 5:
            encoded_verbs = np.pad(encoded_verbs, (0, 5 - len(encoded_verbs)))

    encoded_adjs = [encoder.encode_word(i) for i in adjs]
    if len(encoded_adjs) < 5:
            encoded_adjs = np.pad(encoded_adjs, (0, 5 - len(encoded_adjs)))

    encoded_advs = [encoder.encode_word(i) for i in advs]
    if len(encoded_advs) < 5:
            encoded_advs = np.pad(encoded_advs, (0, 5 - len(encoded_advs)))

    encoded_auxs = [encoder.encode_word(i) for i in auxs]
    if len(encoded_auxs) < 5:
            encoded_auxs = np.pad(encoded_auxs, (0, 5 - len(encoded_auxs)))

    encoded_nums = [encoder.encode_word(i) for i in nums]
    if len(encoded_nums) < 5:
            encoded_nums = np.pad(encoded_nums, (0, 5 - len(encoded_nums)))

    encoded_nouns = [encoder.encode_word(i) for i in nouns]
    if len(encoded_nouns) < 10:
            encoded_nouns = np.pad(encoded_nouns, (0, 10 - len(encoded_nouns)))
    encoded_verbs = []
    return np.hstack((encoded_verbs[:5] , encoded_advs[:5] , encoded_adjs[:5] , encoded_auxs[:5] , encoded_nums[:5] , encoded_nouns[:5]))

In [59]:
dfGeneral

Unnamed: 0,Question,Equation
0,gino has number0 popsicle sticks . i have numb...,+ number0 number1
1,lino picked up number0 shells at the seashore ...,+ number0 number1
2,there were number0 parents in the program and ...,+ number0 number1
3,last saturday marie sold number0 magazines and...,+ number0 number1
4,there are number0 birds on the fence . number1...,+ number0 number1
...,...,...
6526,an industrial machine made number0 shirts yest...,/ + number0 number1 number2
6527,an industrial machine made number0 shirts yest...,/ number1 number2
6528,an industrial machine can make number0 shirts ...,/ + number1 number2 number0
6529,an industrial machine can make number0 shirts ...,* number0 + number1 number2


In [29]:
def predictionPreprocessingCommon(text) :
    text = text.replace(".", "")
    text = text.replace(",", "")
    doc = nlp(text.lower())
    tagged_words = np.array(list(map(lambda x: (x.text, x.pos_), doc)))
    advs = []
    adjs = []
    nouns = []
    nums = []
    auxs = []
    count=0
    for i in tagged_words :
        if i[1]== "ADV":
            advs.append(i[0])
        if i[1]== "ADJ":
            adjs.append(i[0])
        if i[1]== "AUX":
            auxs.append(i[0])
        if i[1]== "NOUN":
            nouns.append(i[0])
        if i[1]== "NUM" and i[0].isnumeric() :
              nums.append("number{}".format(count))
              count+=1

    encoded_adjs = [encoder.encode_word(i) for i in adjs]
    if len(encoded_adjs) < 5:
            encoded_adjs = np.pad(encoded_adjs, (0, 5 - len(encoded_adjs)))

    encoded_advs = [encoder.encode_word(i) for i in advs]
    if len(encoded_advs) < 5:
            encoded_advs = np.pad(encoded_advs, (0, 5 - len(encoded_advs)))

    encoded_auxs = [encoder.encode_word(i) for i in auxs]
    if len(encoded_auxs) < 5:
            encoded_auxs = np.pad(encoded_auxs, (0, 5 - len(encoded_auxs)))

    encoded_nums = [encoder.encode_word(i) for i in nums]
    if len(encoded_nums) < 5:
            encoded_nums = np.pad(encoded_nums, (0, 5 - len(encoded_nums)))

    encoded_nouns = [encoder.encode_word(i) for i in nouns]
    if len(encoded_nouns) < 10:
            encoded_nouns = np.pad(encoded_nouns, (0, 10 - len(encoded_nouns)))
    return np.hstack((encoded_advs[:5] , encoded_adjs[:5] , encoded_auxs[:5] , encoded_nums[:5] , encoded_nouns[:5]))

In [30]:
l2CommonX = [l2PreProcessingXCommon(i) for i in dfCommon.Question.values]
l2CommonY = [l2PreProcessingY(i) for i in dfCommon.Equation.values]
l2CommonModel = RandomForestClassifier()
l2CommonModel.fit(l2CommonX,l2CommonY)

[] ['there'] ['many'] ['are'] ['number0'] ['seconds', 'hours']
[] ['there'] ['many'] ['are'] ['number0'] ['hours', 'seconds']
[] ['there'] ['many'] ['are'] ['number0'] ['seconds', 'minutes']
[] ['there'] ['many'] ['are'] ['number0'] ['minutes', 'seconds']
[] ['there'] ['many'] ['are'] ['number0'] ['minutes', 'hours']
[] ['there'] ['many'] ['are'] ['number0'] ['hours', 'minutes']


In [31]:
def predictionPreprocessing(text) :
    text = text.replace(".", "")
    text = text.replace(",", "")
    doc = nlp(text.lower())
    tagged_words = np.array(list(map(lambda x: (x.text, x.pos_), doc)))
    verbs = []
    advs = []
    adjs = []
    nouns = []
    nums = []
    auxs = []
    count=0
    for i in tagged_words :
        if i[1]== "VERB":
            verbs.append(i[0])
        if i[1]== "ADV":
            advs.append(i[0])
        if i[1]== "ADJ":
            adjs.append(i[0])
        if i[1]== "AUX":
            auxs.append(i[0])
        if i[1]== "NOUN":
            nouns.append(i[0])
        if i[1]== "NUM" and i[0].isnumeric() :
              nums.append("number{}".format(count))
              count+=1



    verbs = list(dict.fromkeys([wordnet_lemmatizer.lemmatize(i,pos='v') for i in verbs]))


    encoded_verbs = [encoder.encode_word(i) for i in verbs]
    if len(encoded_verbs) < 5:
            encoded_verbs = np.pad(encoded_verbs, (0, 5 - len(encoded_verbs)))

    encoded_adjs = [encoder.encode_word(i) for i in adjs]
    if len(encoded_adjs) < 5:
            encoded_adjs = np.pad(encoded_adjs, (0, 5 - len(encoded_adjs)))

    encoded_advs = [encoder.encode_word(i) for i in advs]
    if len(encoded_advs) < 5:
            encoded_advs = np.pad(encoded_advs, (0, 5 - len(encoded_advs)))

    encoded_auxs = [encoder.encode_word(i) for i in auxs]
    if len(encoded_auxs) < 5:
            encoded_auxs = np.pad(encoded_auxs, (0, 5 - len(encoded_auxs)))

    encoded_nums = [encoder.encode_word(i) for i in nums]
    if len(encoded_nums) < 5:
            encoded_nums = np.pad(encoded_nums, (0, 5 - len(encoded_nums)))

    encoded_nouns = [encoder.encode_word(i) for i in nouns]
    if len(encoded_nouns) < 10:
            encoded_nouns = np.pad(encoded_nouns, (0, 10 - len(encoded_nouns)))
    encoded_nouns = []
    return np.hstack((encoded_verbs[:5] , encoded_advs[:5] , encoded_adjs[:5] , encoded_auxs[:5] , encoded_nums[:5] , encoded_nouns[:10]))

In [32]:
def predictFormula(text,model,predalgo) :
    formula = model.predict([predalgo(text)])
    #print (formula)
    formula = list(formula[0])
#print ("hjgjh",formula)
    L = []
    for i in formula :
        if i != 0  :
            L.append(encoder.decode_word(i))
        else :
            break
    print("L: ",L)
    nums = dict()
    doc = nlp(text.lower())
    tagged_words = np.array(list(map(lambda x: (x.text, x.pos_), doc)))
    count = 0
    for i in tagged_words :
        if i[1]== "NUM" and i[0].isnumeric() :
              nums["number{}".format(count)] = i[0]
              
              count+=1
    #print(nums)
    final = [nums[i] if re.search('^number',i) else i for i in L]
    return(final)

In [33]:
import math
def evalPrediction(text,model,predAlgo) :
    final = predictFormula(text,model,predAlgo)
    #print('final in eval:',final)
    operators = {'+': lambda x, y: x + y,
                 '-': lambda x, y: x - y,
                 '*': lambda x, y: x * y,
                 '/': lambda x, y: x / y,
                 '√': lambda x:math.sqrt(x),
                 '^': lambda x,y: x**y}

    stack = []
    for token in reversed(final):
        if token.isnumeric():
            stack.append(int(token))
        elif token in operators:
            op1 = stack.pop()
            op2 = stack.pop()
            result = operators[token](op1, op2)
            stack.append(result)
    answer = stack[-1]
    if answer < 0 :
        answer = -answer
    if answer < 1 and answer > 0 :
        answer = 1/answer   
    return answer

    

In [34]:
import time
def generateText(text) :
    t = predictType(text)
    delay = 0.02
    text1 = "Predicted Type of the question is {}.".format(t)
    predalgo = predictionPreprocessing
    if t=="General" :
        model = l2GeneralModel
    """if t=="Addition" :
        model = l2AdditionModel
    if t=="Subtraction" :
        model = l2SubtractionModel
    if t=="Multiplication" :
        model = l2MultiplicationModel"""
    if t=="Common" :
        model = l2CommonModel
        predalgo = predictionPreprocessingCommon
    if t=='Physics':
        model = l2PhysicsModel
        #predalgo = predictionPreprocessingCommon

    text2 = "The Equation generated for the above question is \"{}\".".format(" ".join(predictFormula(text,model,predalgo)))
    text3 = "The result predicted for the above question is {}.".format(evalPrediction(text,model,predalgo))
    for char in text1 :
        print(char,end="",flush=True)
        time.sleep(delay)
    print("\n")
    print("Using {} Model ...\n".format(t))
    for char in text2 :
        print(char,end="",flush=True)
        time.sleep(delay)
    print("\n")
    for char in text3 :
        print(char,end="",flush=True)
        time.sleep(delay)
    print("\n")


In [52]:
txt = "Alex bought a toy for $18 and a puzzle for $7. How much money did he spend in total?"
generateText(txt)


L:  ['+', 'number0', 'number1']
L:  ['+', 'number0', 'number1']
Predicted Type of the question is General.

Using General Model ...

The Equation generated for the above question is "+ 18 7".

The result predicted for the above question is 25.



In [117]:
txt = "Susan has 15 stickers. If she wants to distribute them equally among her 5 friends, how many stickers will each friend get?"
generateText(txt)

L:  ['/', 'number0', 'number1']
L:  ['/', 'number0', 'number1']
Predicted Type of the question is General.

Using General Model ...

The Equation generated for the above question is "/ 15 5".

The result predicted for the above question is 3.0.



In [119]:

txt = "A carpenter cuts a wooden board into 6 equal pieces. If the board's length is 48 inches, what is the length of each piece?"
generateText(txt)


L:  ['/', 'number0', 'number1']
L:  ['/', 'number0', 'number1']
Predicted Type of the question is General.

Using General Model ...

The Equation generated for the above question is "/ 6 48".

The result predicted for the above question is 8.0.



In [43]:
text = ''
t = predictType(text)
generateText(text)
text1 = "Predicted Type of the question is {}.".format(t)
print(text1)

L:  ['*', 'number0', 'number0', 'number1', 'number2']
L:  ['*', 'number0', 'number0', 'number1', 'number2']
Predicted Type of the question is General.

Using General Model ...

The Equation generated for the above question is "* 4 4 2 6".

The result predicted for the above question is 16.

Predicted Type of the question is General.


In [108]:
text = 'A monkey climbs 4 steps above and falls down 2 steps so how many steps is he taking'
t = predictType(text)
generateText(text)
text1 = "Predicted Type of the question is {}.".format(t)
print(text1)

L:  ['/', '√', 'number0', '', '√', '']
L:  ['/', '√', 'number0', '', '√', '']


IndexError: pop from empty list

In [40]:
"""# Step 1: Decode y_train equations
decoded_y_train = [["".join(encoder.decode_word(i) for i in equation)] for equation in y_train]

# Step 2: Solve y_train equations to get true answers
true_answers = [evalPrediction(equation[0], model=None, predAlgo=None) for equation in decoded_y_train]

# Step 3: Predict y_pred equations using the trained model
y_pred = l2GeneralModel.predict(X_test)

# Step 4: Decode y_pred equations and solve them to get predicted answers
decoded_y_pred = [["".join(encoder.decode_word(i) for i in equation)] for equation in y_pred]
predicted_answers = [evalPrediction(equation[0], model=None, predAlgo=None) for equation in decoded_y_pred]

# Step 5: Compare true_answers and predicted_answers to calculate accuracy
def calculate_accuracy(true_answers, predicted_answers):
    correct_count = sum(1 for true, pred in zip(true_answers, predicted_answers) if abs(true - pred) < 1e-6)
    total_count = len(true_answers)
    return correct_count / total_count

accuracy = calculate_accuracy(true_answers, predicted_answers)
print("Accuracy:", accuracy)
"""

'# Step 1: Decode y_train equations\ndecoded_y_train = [["".join(encoder.decode_word(i) for i in equation)] for equation in y_train]\n\n# Step 2: Solve y_train equations to get true answers\ntrue_answers = [evalPrediction(equation[0], model=None, predAlgo=None) for equation in decoded_y_train]\n\n# Step 3: Predict y_pred equations using the trained model\ny_pred = l2GeneralModel.predict(X_test)\n\n# Step 4: Decode y_pred equations and solve them to get predicted answers\ndecoded_y_pred = [["".join(encoder.decode_word(i) for i in equation)] for equation in y_pred]\npredicted_answers = [evalPrediction(equation[0], model=None, predAlgo=None) for equation in decoded_y_pred]\n\n# Step 5: Compare true_answers and predicted_answers to calculate accuracy\ndef calculate_accuracy(true_answers, predicted_answers):\n    correct_count = sum(1 for true, pred in zip(true_answers, predicted_answers) if abs(true - pred) < 1e-6)\n    total_count = len(true_answers)\n    return correct_count / total_coun

In [41]:
"""from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

# Assuming you have l2generalX and l2generalY as your features and labels, respectively

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(l2generalX, l2generalY, test_size=0.2, random_state=42)

# Create and train the model
l2GeneralModel = RandomForestClassifier()
l2GeneralModel.fit(X_train, y_train)"""


'from sklearn.model_selection import train_test_split\nfrom sklearn.ensemble import RandomForestClassifier\n\n# Assuming you have l2generalX and l2generalY as your features and labels, respectively\n\n# Split the data into training and testing sets\nX_train, X_test, y_train, y_test = train_test_split(l2generalX, l2generalY, test_size=0.2, random_state=42)\n\n# Create and train the model\nl2GeneralModel = RandomForestClassifier()\nl2GeneralModel.fit(X_train, y_train)'

In [42]:
df_test=pd.read_csv("/Users/gayathrinettem/Desktop/Project/final_test_1.csv")
df_test

FileNotFoundError: [Errno 2] No such file or directory: '/Users/gayathrinettem/Desktop/Project/final_test_1.csv'

In [None]:
dfGeneral_t = df_test[df_test.Type=="General"][["Question","Equation"]]
dfCommon_t = df_test[df_test.Type=="Common"][["Question","Equation"]]
dfPhysics_t = df_test[df_test.Type=="Physics"][["Question","Equation"]]
dfTime_t = df_test[df_test.Type=="Time"][["Question","Equation"]]
dfLcm_Hcf_t = df_test[df_test.Type=="LCM_HCF"][["Question","Equation"]]

In [None]:
l2generalX_t = [l2PreProcessingX(i) for i in dfGeneral_t.Question.values]
l2generalY_t = [l2PreProcessingY(i) for i in dfGeneral_t.Equation.values]

In [None]:
y_test = l2generalY_t

In [None]:
y_test = [list(equation) for equation in y_test]
y_test

[[5551, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [5551, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [5551, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [5551, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [5551, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [5551, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [5551, 55, 140, 0, 0, 0, 0, 0, 0, 0],
 [1966, 55, 140, 0, 0, 0, 0, 0, 0, 0],
 [5551, 55, 140, 0, 0, 0, 0, 0, 0, 0],
 [5551, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [1966, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [5551, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [5551, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [1966, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [5551, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [1966, 55, 140, 0, 0, 0, 0, 0, 0, 0],
 [1966, 55, 140, 0, 0, 0, 0, 0, 0, 0],
 [1966, 55, 140, 0, 0, 0, 0, 0, 0, 0],
 [1966, 55, 140, 0, 0, 0, 0, 0, 0, 0],
 [5551, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [1966, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [1966, 55, 140, 0, 0, 0, 0, 0, 0, 0],
 [1966, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [1966, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [5551, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [1966, 140, 55, 0, 0, 0,

In [None]:
y_pred = l2GeneralModel.predict(l2generalX_t)
y_pred = [list(equation) for equation in y_pred]
y_pred

[[5551, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [5551, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [5551, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [5551, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [5551, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [5551, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [5551, 55, 140, 0, 0, 0, 0, 0, 0, 0],
 [1966, 55, 140, 0, 0, 0, 0, 0, 0, 0],
 [5551, 55, 140, 0, 0, 0, 0, 0, 0, 0],
 [5551, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [1966, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [5551, 55, 140, 0, 0, 0, 0, 0, 0, 0],
 [5551, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [1966, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [5551, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [1966, 55, 140, 0, 0, 0, 0, 0, 0, 0],
 [1966, 55, 140, 0, 0, 0, 0, 0, 0, 0],
 [1966, 55, 140, 0, 0, 0, 0, 0, 0, 0],
 [1966, 55, 140, 0, 0, 0, 0, 0, 0, 0],
 [5551, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [1966, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [1966, 55, 140, 0, 0, 0, 0, 0, 0, 0],
 [1966, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [1966, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [5551, 140, 55, 0, 0, 0, 0, 0, 0, 0],
 [1966, 140, 55, 0, 0, 0,

In [None]:
import numpy as np
from sklearn.metrics import accuracy_score

# Convert y_test and y_pred into NumPy arrays
y_test_array = np.array(y_test)
y_pred_array = np.array(y_pred)

# Flatten the arrays, as accuracy_score expects 1D arrays
y_test_flat = y_test_array.flatten()
y_pred_flat = y_pred_array.flatten()

# Calculate the accuracy
accuracy = accuracy_score(y_test_flat, y_pred_flat)

print("Accuracy:", accuracy*100)


Accuracy: 91.92406508706823


In [None]:
def predict_Formula(formula) :
    
    #formula = model.predict([predalgo(text)])
    print (formula)
    #formula = list(formula[0])
    print ("hjgjh",formula)
    L = []
    for i in formula :
        if i != 0  :
            L.append(encoder.decode_word(i))
        else :
            break
    nums = dict()
    doc = nlp(text.lower())
    tagged_words = np.array(list(map(lambda x: (x.text, x.pos_), doc)))
    count = 0
    for i in tagged_words :
        if i[1]== "NUM" and i[0].isnumeric() :
              nums["number{}".format(count)] = i[0]
              
              count+=1
    print(nums)
    final = ["10000" if i == "number0" else "1000" if i == "number1" else "100" if i == "number2" else "10" if i == "number3" else "2" if i == "number4" else "5" if i == "number5" else i for i in L]
    return final

In [None]:
y_pred = [predict_Formula(equation) for equation in y_pred]
y_test = [predict_Formula(equation) for equation in y_test]

[5551, 140, 55, 0, 0, 0, 0, 0, 0, 0]
hjgjh [5551, 140, 55, 0, 0, 0, 0, 0, 0, 0]
{'number0': '4', 'number1': '2', 'number2': '6'}
[5551, 140, 55, 0, 0, 0, 0, 0, 0, 0]
hjgjh [5551, 140, 55, 0, 0, 0, 0, 0, 0, 0]
{'number0': '4', 'number1': '2', 'number2': '6'}
[5551, 140, 55, 0, 0, 0, 0, 0, 0, 0]
hjgjh [5551, 140, 55, 0, 0, 0, 0, 0, 0, 0]
{'number0': '4', 'number1': '2', 'number2': '6'}
[5551, 140, 55, 0, 0, 0, 0, 0, 0, 0]
hjgjh [5551, 140, 55, 0, 0, 0, 0, 0, 0, 0]
{'number0': '4', 'number1': '2', 'number2': '6'}
[5551, 140, 55, 0, 0, 0, 0, 0, 0, 0]
hjgjh [5551, 140, 55, 0, 0, 0, 0, 0, 0, 0]
{'number0': '4', 'number1': '2', 'number2': '6'}
[5551, 140, 55, 0, 0, 0, 0, 0, 0, 0]
hjgjh [5551, 140, 55, 0, 0, 0, 0, 0, 0, 0]
{'number0': '4', 'number1': '2', 'number2': '6'}
[5551, 55, 140, 0, 0, 0, 0, 0, 0, 0]
hjgjh [5551, 55, 140, 0, 0, 0, 0, 0, 0, 0]
{'number0': '4', 'number1': '2', 'number2': '6'}
[1966, 55, 140, 0, 0, 0, 0, 0, 0, 0]
hjgjh [1966, 55, 140, 0, 0, 0, 0, 0, 0, 0]
{'number0': '4', 

In [None]:
y_pred

[['+', '10000', '1000'],
 ['+', '10000', '1000'],
 ['+', '10000', '1000'],
 ['+', '10000', '1000'],
 ['+', '10000', '1000'],
 ['+', '10000', '1000'],
 ['+', '1000', '10000'],
 ['-', '1000', '10000'],
 ['+', '1000', '10000'],
 ['+', '10000', '1000'],
 ['-', '10000', '1000'],
 ['+', '1000', '10000'],
 ['+', '10000', '1000'],
 ['-', '10000', '1000'],
 ['+', '10000', '1000'],
 ['-', '1000', '10000'],
 ['-', '1000', '10000'],
 ['-', '1000', '10000'],
 ['-', '1000', '10000'],
 ['+', '10000', '1000'],
 ['-', '10000', '1000'],
 ['-', '1000', '10000'],
 ['-', '10000', '1000'],
 ['-', '10000', '1000'],
 ['+', '10000', '1000'],
 ['-', '10000', '1000'],
 ['+', '10000', '1000'],
 ['+', '+', '10000', '1000', '100'],
 ['+', '10000', '1000'],
 ['+', '1000', '100'],
 ['-', '10000', '1000'],
 ['-', '10000', '10000', '1000', '100'],
 ['-', '10000', '1000'],
 ['+', '+', '10000', '1000', '100'],
 ['-', '10000', '1000'],
 ['-', '10000', '1000'],
 ['-', '10000', '1000'],
 ['+', '10000', '1000'],
 ['+', '1000

In [None]:
import math

def evalPrediction_1(final) :
    operators = {'+': lambda x, y: x + y,
                 '-': lambda x, y: x - y,
                 '*': lambda x, y: x * y,
                 '/': lambda x, y: x / y,
                 '√': lambda x: math.sqrt(x),
                 '^': lambda x, y: x**y}

    stack = []
    for token in reversed(final):
        if token.isnumeric():
            stack.append(int(token))
        elif token in operators:
            if len(stack) >= 2:  # Check if there are at least two operands in the stack
                op1 = stack.pop()
                op2 = stack.pop()
                result = operators[token](op1, op2)
                stack.append(result)
            else:
                return 0  # Return 0 if there's an IndexError

    if len(stack) == 1:
        answer = stack[-1]
        if answer < 0:
            answer = -answer
        if 0 < answer < 1:
            answer = 1 / answer   
        return answer
    else:
        return 0  # Return 0 if there are multiple elements in the stack

# Example usage
"""y_pred = [['/', '20', '4'], ['-', '20', '20'], ['+', '+', '20', '5', '3']]
y_pred_result = [evalPrediction(equation) for equation in y_pred]
print(y_pred_result)  # Output: [5.0, 0, 0]"""


"y_pred = [['/', '20', '4'], ['-', '20', '20'], ['+', '+', '20', '5', '3']]\ny_pred_result = [evalPrediction(equation) for equation in y_pred]\nprint(y_pred_result)  # Output: [5.0, 0, 0]"

In [None]:
y_pred = [evalPrediction_1(equation) for equation in y_pred]
y_test = [evalPrediction_1(equation) for equation in y_test]

In [None]:
# Assuming y_pred is a list of predicted values
zero_count = len([value for value in y_pred if value == 0])
print("Number of zeros in y_pred:", zero_count)
print(len(y_test))

Number of zeros in y_pred: 552
7006


In [None]:
def calculate_accuracy(y_pred, y_test):
    
    if len(y_pred) != len(y_test):
        raise ValueError("y_pred and y_test must have the same length.")

    correct_predictions = 0
    total_predictions = len(y_pred)
    cr=0
    for pred, true_val in zip(y_pred, y_test):
        if pred == true_val:
            correct_predictions += 1
        if pred-true_val==0:
            cr+=1
        
            
    acc=cr/total_predictions
    print("Acc:",cr)
    accuracy = correct_predictions / total_predictions
    return accuracy

# Assuming y_pred and y_test are lists of predicted and actual values, respectively
accuracy = calculate_accuracy(y_pred, y_test)
print("Accuracy:", accuracy*100 )


Acc: 4686
Accuracy: 66.88552669140736
