In [106]:
import re
from dataclasses import dataclass

@dataclass
class Token:
    category: str
    value: any = None

def process_token_value(category, value):
    if category in ['OPEN_BRACE', 'CLOSE_BRACE', 'OPEN_BRACKET', 
                   'CLOSE_BRACKET', 'COLON', 'COMMA']:
        return None
    
    if category == 'NUMBER':
        return float(value) if '.' in value else int(value)
    elif category == 'BOOLEAN':
        return value == 'true'
    elif category == 'STRING':
        return value.strip('"')
    return value


def lexer(json_string):
    token_spec = [
        ('STRING', re.compile(r'"(?:[^"\\]|\\.)*"')),
        ('NUMBER', re.compile(r'-?\b\d+(\.\d+)?\b')),
        ('BOOLEAN', re.compile(r'\b(true|false)\b')),
        ('OPEN_BRACE', re.compile(r'\{')),
        ('CLOSE_BRACE', re.compile(r'\}')),
        ('OPEN_BRACKET', re.compile(r'\[')),
        ('CLOSE_BRACKET', re.compile(r'\]')),
        ('COLON', re.compile(r':')),
        ('COMMA', re.compile(r',')),
        ('WHITESPACE', re.compile(r'\s+')),
    ]
    
    position = 0
    while position < len(json_string):
        match_found = False
        for category, regex in token_spec:
            match = regex.match(json_string, position)
            if match:
                match_found = True
                value = match.group(0)
                if category != 'WHITESPACE':
                    processed_value = process_token_value(category, value)
                    yield Token(category, processed_value)
                position = match.end()
                break
        if not match_found:
            raise ValueError(f"Unexpected character at position {position}: {json_string[position]}")
        


In [107]:
def parse_json_obj(token_gen):
    res = {}
    
    while True:
        key_token = next(token_gen)
        if key_token.category == "CLOSE_BRACE":
            break
        
        if key_token.category != "STRING":
            raise ValueError(f"Expected STRING for key, but got {token.category}")
        
        key = key_token.value
        
        colon_token = next(token_gen)
        if colon_token.category != "COLON":
            raise ValueError("Expected ':' after key")
        
        value_token = next(token_gen)
        if value_token.category in ["STRING", "NUMBER", "BOOLEAN"]:
            res[key] = value_token.value
        elif value_token.category == "OPEN_BRACE":
            res[key] = parse_json_obj(token_gen)  
        elif value_token.category == "OPEN_BRACKET":
            res[key] = parse_array(token_gen) 
        else:
            raise ValueError(f"Unexpected token for value: {value_token}")

        next_token = next(token_gen)
        if next_token.category == "COMMA":
            continue  
        elif next_token.category == "CLOSE_BRACE":
            break  
        else:
            raise ValueError(f"Expected ',' or '}}' but got {next_token.category}")

    return res
        
        

def parse_array(token_gen):
    res = []
    
    while True:
        token = next(token_gen)
        if token.category == "CLOSE_BRACKET":
            break
    
        if token.category in ["STRING", "NUMBER", "BOOLEAN"]:
            res.append(token.value)
            
        elif token.category == "OPEN_BRACE":
            res.append(parse_json_obj(token_gen))  # Nested object
            
        elif token.category == "OPEN_BRACKET":
            res.append(parse_array(token_gen))  # Nested array
        
        elif token.category == "COMMA":
            continue
        
        else:
            raise ValueError("Invalid token")
            
        
    return res
        



In [108]:

def parser(json_stirng):
    token_gen = lexer(json_stirng) 
    first_token = next(token_gen)
    
    if first_token.category == "OPEN_BRACE":
        return parse_json_obj(token_gen)
    
    elif first_token.category == "OPEN_BRACKET":
        return parse_array(token_gen)
    
    else:
        raise ValueError("JSON must start with '{' or '['")




json_string_object = '''
{
  "name": "John",
  "age": 30,
  "isStudent": true,
  "address": {
    "street": "123 Main St",
    "city": "New York",
    "zipcode": 10001,
    "coordinates": [40.7128, -74.0060]
  },
  "grades": [85, 90, 88],
  "courses": [
    {
      "title": "Math",
      "instructor": "Dr. Smith",
      "schedule": ["Monday", "Wednesday", "Friday"]
    },
    {
      "title": "Science",
      "instructor": "Dr. Brown",
      "schedule": ["Tuesday", "Thursday"]
    }
  ]
}
'''

json_obj = parser(json_string_object)
print(json_obj)
print(type(json_obj))


{'name': 'John', 'age': 30, 'isStudent': True, 'address': {'street': '123 Main St', 'city': 'New York', 'zipcode': 10001, 'coordinates': [40.7128, -74.006]}, 'grades': [85, 90, 88], 'courses': [{'title': 'Math', 'instructor': 'Dr. Smith', 'schedule': ['Monday', 'Wednesday', 'Friday']}, {'title': 'Science', 'instructor': 'Dr. Brown', 'schedule': ['Tuesday', 'Thursday']}]}
<class 'dict'>
