In [1]:
import re
import json

class CSVCompiler:
    def __init__(self):
        self.token_specification = [
            ('COMMA', r','),                # Comma
            ('NEWLINE', r'\n'),             # Line endings
            ('STRING', r'"(.*?)"'),         # Quoted string
            ('NUMBER', r'\b\d+(\.\d+)?\b'), # Integer or decimal number
            ('NAME', r'\b\w+\b'),           # Name
            ('WS', r'\s+'),                 # Whitespace
            ('MISMATCH', r'.'),             # Any other character
        ]

    def tokenize(self, code):
        tokens = []
        token_re = '|'.join(f'(?P<{pair[0]}>{pair[1]})' for pair in self.token_specification)
        line_num = 1
        line_start = 0
        for mo in re.finditer(token_re, code):
            kind = mo.lastgroup
            value = mo.group(kind)
            column = mo.start() - line_start
            if kind == 'NEWLINE':
                line_start = mo.end()
                line_num += 1
            elif kind == 'WS':
                continue
            elif kind == 'MISMATCH':
                raise RuntimeError(f'{value!r} unexpected on line {line_num}')
            tokens.append((kind, value, line_num, column))
        tokens.append(('EOF', '', line_num, column))  # Append EOF token to mark end of input
        return tokens

    def parse(self, tokens):
        self.tokens = tokens
        self.pos = 0
        self.current_token = self.tokens[self.pos]
        return self.csv()

    def csv(self):
        rows = []
        while self.pos < len(self.tokens) and self.current_token[0] != 'EOF':
            row = self.row()
            rows.append(row)
            self.match('NEWLINE') if self.current_token[0] == 'NEWLINE' else None
        return rows

    def row(self):
        row = []
        while self.current_token[0] not in ('NEWLINE', 'EOF'):
            value = self.value()
            row.append(value)
            if self.current_token[0] == 'COMMA':
                self.match('COMMA')
        return row

    def value(self):
        if self.current_token[0] == 'NUMBER':
            return float(self.match('NUMBER'))
        elif self.current_token[0] == 'STRING':
            return self.match('STRING').strip('"')
        elif self.current_token[0] == 'NAME':
            return self.match('NAME')
        else:
            raise RuntimeError(f'Unexpected value {self.current_token}')

    def match(self, token_type):
        if self.current_token[0] == token_type:
            value = self.current_token[1]
            self.pos += 1
            if self.pos < len(self.tokens):
                self.current_token = self.tokens[self.pos]
            return value
        else:
            raise RuntimeError(f'Expected {token_type} but got {self.current_token}')

def main():
    print("Please enter your CSV data \n NOTE: \n-Each line should be separated by a newline \n-Please enter double enter when your data is complete \n-Do not enter empty values like this (name, , ) Use \"null\" instead of empty values like (name,null,null)")
    input_lines = []
    while True:
        line = input()
        if line == "":
            break
        input_lines.append(line)
    input_code = "\n".join(input_lines)

    try:
        compiler = CSVCompiler()
        tokens = compiler.tokenize(input_code)

        print("Tokens:")
        for token in tokens:
            print(token)

        parsed_csv = compiler.parse(tokens)
        print("\nParsed CSV:")
        for row in parsed_csv:
            print(row)

    except RuntimeError as e:
        print(f"Error: {e}")

if __name__ == "__main__":
    main()


Please enter your CSV data 
 NOTE: 
-Each line should be separated by a newline 
-Please enter double enter when your data is complete 
-Do not enter empty values like this (name, , ) Use "null" instead of empty values like (name,null,null)
Tokens:
('NAME', 'abc', 1, 0)
('COMMA', ',', 1, 3)
('NAME', 'zyz', 1, 4)
('EOF', '', 1, 4)

Parsed CSV:
['abc', 'zyz']

JSON Output:
[]
