In [16]:
def getKeywords(filename: str):
    keywords = {}
    with open(filename, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        for line in lines:
            line = line.replace('\n', '')
            key, val = line.split(':')
            keywords[key] = val
    return keywords

keywords = getKeywords('alfabet.csv')
print(keywords)

{'ID': '0', 'CONST': '1', '(': '2', ')': '3', ',': '4', 'int': '5', 'float': '6', 'string': '7', '{': '8', '}': '9', ';': '10', '<-': '11', 'scrie': '12', 'citeste': '13', '+': '14', '-': '15', '*': '16', '/': '17', '%': '18', 'while': '19', '!=': '20', '==': '21', '<': '22', '>': '23', '<=': '24', '>=': '25', 'if': '26', '': '27', '"': '28', 'main': '29', '=': '30'}


In [17]:
class Node:
    def __init__(self, key):
        self.key = key
        self.left = None
        self.right = None

class BinarySearchTree:
    def __init__(self):
        self.root = None

    def insert(self, key):
        self.root = self._insert_recursive(self.root, key)

    def _insert_recursive(self, root, key):
        if root is None:
            return Node(key)
        if key < root.key:
            root.left = self._insert_recursive(root.left, key)
        else:
            root.right = self._insert_recursive(root.right, key)
        return root

    def get_index(self, key):
        return self._get_index_recursive(self.root, key, 0)

    def _get_index_recursive(self, root, key, index):
        if root is None:
            return None
        if key == root.key:
            return index + self._count_nodes(root.left)
        if key < root.key:
            return self._get_index_recursive(root.left, key, index)
        return self._get_index_recursive(root.right, key, index + self._count_nodes(root.left) + 1)

    def _count_nodes(self, root):
        if root is None:
            return 0
        return 1 + self._count_nodes(root.left) + self._count_nodes(root.right)

    def inorder_traversal_print(self):
        return self._inorder_recursive_print_(self.root)

    def _inorder_recursive_print_(self, root):
        if root is not None:
            self._inorder_recursive_print_(root.left)
            print(root.key + " : " + str(self.get_index(root.key)))
            self._inorder_recursive_print_(root.right)

In [18]:
import re
from collections import OrderedDict

def getFIP(cod: str, keywords: dict, ts):
    token_specification = [
        ('NUMBER',   r'\d+(\.\d*)?'),
        ('ASSIGN',   r'<-'),
        ('NEQ',      r'!='),
        ('END',      r';'),
        ('ID',       r'[A-Za-z_]\w*'),
        ('OP',       r'[+\-*/%]'),
        ('LPAREN',   r'\('),
        ('RPAREN',   r'\)'),
        ('LBRACE',   r'\{'),
        ('RBRACE',   r'\}'),
        ('NEWLINE',  r'\n'),
        ('SKIP',     r'[ \t]+'),
        ('MISMATCH', r'.'),
    ]
    tok_regex = '|'.join('(?P<%s>%s)' % pair for pair in token_specification)
    get_token = re.compile(tok_regex).match
    line = cod
    pos = 0
    linie = 0
    fip = OrderedDict()
    exception = None
    mo = get_token(line)
    while mo is not None:
        kind = mo.lastgroup
        value = mo.group(kind)
        if kind == 'NUMBER':
            fip[value] = str(keywords['CONST']) + ' | ' + str(ts.get_index(value))
        elif kind == 'ID':
            if value in keywords:
                fip[value] = keywords[value]
            else:
                fip[value] = str(keywords['ID']) + ' | ' + str(ts.get_index(value))
        elif kind == 'ASSIGN':
            fip[value] = keywords['<-']
        elif kind == 'NEQ':
            fip[value] = keywords['!=']
        elif kind == 'OP':
            fip[value] = keywords[value]
        elif kind == 'LPAREN':
            fip[value] = keywords['(']
        elif kind == 'RPAREN':
            fip[value] = keywords[')']
        elif kind == 'LBRACE':
            fip[value] = keywords['{']
        elif kind == 'RBRACE':
            fip[value] = keywords['}']
        elif kind == 'NEWLINE' or kind == 'SKIP':
            linie += 1
            pass
        elif kind == 'MISMATCH':
            exception = Exception(f'Unexpected character {value!r}')
            break
        pos = mo.end()
        mo = get_token(line, pos)
    if pos != len(line):
        exception = Exception(f'Unexpected character {line[pos]!r} at position {pos} at line {pos//10}')
    return fip, exception


In [19]:
import re

def getTS(cod: str, keywords: dict):

    ts = BinarySearchTree()
    exception = None

    pattern_variables = r'\b(float|int|string)\s+(\w+)\b'
    variables = re.findall(pattern_variables, cod)
    variables = [var[1] for var in variables]

    filtered_variables = []
    for var in variables:
        if var in keywords:
            continue
        elif len(var) > 8:
            exception = Exception(f"A variable must have less than 9 characters: {var}")
            continue
        filtered_variables.append(var)
        ts.insert(var)
    variables = filtered_variables

    pattern_constants = r'\b-?\d+(?:\.\d+)?\b'
    constants = re.findall(pattern_constants, cod)

    pattern_strings = r'"([^"]*)"'
    strings = re.findall(pattern_strings, cod)

    # Combine all constants (numbers and strings)
    constants_full = constants + strings

    # Remove duplicates and exclude variables and keywords
    constants_set = set(constants_full)
    constants_set = {c for c in constants_set if c not in variables and c not in keywords}

    # Insert constants into the symbol table
    for c in constants_set:
        ts.insert(c)

    return ts, exception


In [20]:
keywords = getKeywords('alfabet.csv')

filename = "code.txt"
#filename = "wrong.txt"
with open(filename, 'r', encoding='utf-8') as f:
    cod = f.read()

ts, exception = getTS(cod, keywords)

print('TS')
print('--------------------')
ts.inorder_traversal_print()
print('--------------------')
print()


fip, exception2 = getFIP(cod, keywords, ts)

print('FIP')
print('--------------------')
for key in fip.keys():
    print(key + " : " + fip[key])
print('--------------------')

if exception != None:
    raise exception

if exception2 != None:
    raise exception2

TS
--------------------
0 : 0
1 : 1
a : 2
b : 3
r : 4
--------------------

FIP
--------------------
int : 5
main : 29
( : 2
) : 3
{ : 8
a : 0 | 2
b : 0 | 3
citeste : 13
r : 0 | 4
<- : 11
1 : 1 | 1
while : 19
!= : 20
0 : 1 | 0
% : 18
} : 9
--------------------
