In [103]:
with open("os2-070-128u.json", "r", encoding="utf-8") as file:
    text = file.read()

In [116]:
SYMBOLS = {
    "{": "LBRACE",
    "}": "RBRACE",
    "[": "LBRACKET",
    "]": "RBRACKET",
    ",": "COMMA",
    ":": "COLON",
}
KEYWORDS = {
    "true": ("BOOLEAN", True),
    "false": ("BOOLEAN", False),
    "null": ("NULL", None)
}

class Lexer:
    def __init__(self, text):
        self.text = text
        self.pos = 0
        self.length = len(text)

    def next_token(self):
        while self.pos < self.length and self.text[self.pos].isspace():
            self.pos += 1

        if self.pos >= self.length:
            return None

        char = self.text[self.pos]

        if char in SYMBOLS.keys():
            self.pos += 1
            return (SYMBOLS[char], char)
        elif char == '"':
            return self.expect_string()
        elif char.isdigit() or char == '-':
            return self.expect_number()
        elif char.isalpha():
            return self.expect_keyword()
        else:
            raise ValueError(f"Invalid char: {char}")

    def expect_string(self):
        self.pos += 1  # Skip '"'
        start = self.pos

        while self.pos < self.length and self.text[self.pos] != '"':
            self.pos += 1

        if self.pos >= self.length:
            raise ValueError("Symbol '\"' doesn't match.")

        value = self.text[start:self.pos]
        self.pos += 1  # Skip '"'
        return ("STRING", value)

    def expect_number(self):
        start = self.pos
        while self.pos < self.length and (self.text[self.pos].isdigit() or self.text[self.pos] in ".-"):
            self.pos += 1
        value = self.text[start:self.pos]

        return ("NUMBER", float(value) if '.' in value else int(value))

    def expect_keyword(self):
        start = self.pos
        while self.pos < self.length and self.text[self.pos].isalpha():
            self.pos += 1
        value = self.text[start:self.pos]

        if value in KEYWORDS.keys():
            return KEYWORDS[value]
        else:
            raise ValueError(f"Invalid Keyword: {value}")

In [124]:
normal_cases = ["STRING", "NUMBER", "BOOLEAN", "NULL"]

class Parser:
    def __init__(self, lexer):
        self.lexer = lexer
        self.current_token = self.lexer.next_token()
    
    def parse_value(self):
        t_type, t_value = self.current_token

        if t_type in normal_cases:
            self.forward()
            return t_value
        elif t_type == "LBRACE":
            return self.parse_object()
        elif t_type == "LBRACKET":
            return self.parse_array()
        else:
            raise ValueError(f"Invalid value: {t_value}")

    def parse_object(self):
        obj = {}
        self.forward()  # Skip '{'

        while self.current_token and self.current_token[1] != "}":
            key = self.parse_value()
            if self.current_token[1] != ":":
                raise ValueError("Lack of symbol ':'")
            self.forward()
            obj[key] = self.parse_value()

            if self.current_token[1] == ",":  # Other values exist
                self.forward()
            else:
                break

        self.forward()  # Skip '}'
        return obj

    def parse_array(self):
        arr = []
        self.forward()  # Skip '['

        while self.current_token and self.current_token[1] != "]":
            arr.append(self.parse_value())
            if self.current_token[1] == ",":  # Other values exist
                self.forward()
            else:
                break

        self.forward()  # Skip ']'
        return arr

    def forward(self):
        self.current_token = self.lexer.next_token()    

In [125]:
def test_lexer(text):
    lexer = JSONLexer(text)
    tokens = []

    while True:
        token = lexer.next_token()
        if token is None:
            break
        tokens.append(token)

    for t in tokens:
        print(t)

test_lexer(text)

('LBRACE', '{')
('STRING', '.params')
('COLON', ':')
('LBRACE', '{')
('STRING', 'cut_angle')
('COLON', ':')
('STRING', '0')
('COMMA', ',')
('STRING', 'host')
('COLON', ':')
('STRING', '10.0.0.5')
('COMMA', ',')
('STRING', 'hw_time')
('COLON', ':')
('STRING', 'no')
('COMMA', ',')
('STRING', 'hw_time_offset')
('COLON', ':')
('STRING', '0')
('COMMA', ',')
('STRING', 'mode')
('COLON', ':')
('STRING', '2048x10')
('COMMA', ',')
('STRING', 'multiplier')
('COLON', ':')
('STRING', '1')
('COMMA', ',')
('STRING', 'name')
('COLON', ':')
('STRING', 'os2')
('COMMA', ',')
('STRING', 'profile')
('COLON', ':')
('STRING', 'single')
('COMMA', ',')
('STRING', 'range')
('COLON', ':')
('STRING', '[0.5,1000]')
('COMMA', ',')
('STRING', 'window')
('COLON', ':')
('STRING', '[0,360000]')
('RBRACE', '}')
('COMMA', ',')
('STRING', 'beam_intrinsics')
('COLON', ':')
('LBRACE', '{')
('STRING', 'beam_altitude_angles')
('COLON', ':')
('LBRACKET', '[')
('NUMBER', 10.84)
('COMMA', ',')
('NUMBER', 10.67)
('COMMA', ',')
(

In [111]:
lexer = Lexer(text)
parser = Parser(lexer)
parsed_data = parser.parse_value()

In [112]:
for item in parsed_data.items():
    print(item)

('.params', {'cut_angle': '0', 'host': '10.0.0.5', 'hw_time': 'no', 'hw_time_offset': '0', 'mode': '2048x10', 'multiplier': '1', 'name': 'os2', 'profile': 'single', 'range': '[0.5,1000]', 'window': '[0,360000]'})
('beam_intrinsics', {'beam_altitude_angles': [10.84, 10.67, 10.51, 10.34, 10.2, 10.01, 9.85, 9.67, 9.51, 9.33, 9.17, 8.99, 8.83, 8.66, 8.49, 8.3, 8.14, 7.97, 7.79, 7.61, 7.45, 7.28, 7.11, 6.92, 6.76, 6.57, 6.41, 6.23, 6.06, 5.89, 5.72, 5.53, 5.37, 5.2, 5.02, 4.83, 4.68, 4.5, 4.32, 4.13, 3.98, 3.8, 3.62, 3.43, 3.28, 3.1, 2.92, 2.73, 2.58, 2.39, 2.22, 2.03, 1.86, 1.69, 1.51, 1.33, 1.17, 0.99, 0.82, 0.62, 0.46, 0.29, 0.11, -0.08, -0.24, -0.41, -0.6, -0.79, -0.95, -1.12, -1.3, -1.48, -1.66, -1.82, -2, -2.17, -2.35, -2.53, -2.71, -2.89, -3.05, -3.23, -3.41, -3.58, -3.75, -3.92, -4.11, -4.29, -4.45, -4.63, -4.8, -4.99, -5.14, -5.33, -5.5, -5.68, -5.83, -6.03, -6.21, -6.38, -6.53, -6.72, -6.91, -7.07, -7.23, -7.42, -7.59, -7.76, -7.92, -8.1, -8.28, -8.46, -8.61, -8.8, -8.97, -9.14, -