In [81]:
from tree_sitter import Language, Parser

# Language.build_library(f'/tmp/python.so', [f"../tree-sitter-python"]) 
# language = Language("/tmp/python.so", "python")
# Language.build_library(f'/tmp/javascript.so', [f"../tree-sitter-javascript"]) 
# language = Language("/tmp/javascript.so", "javascript")
# Language.build_library(f'/tmp/typescript.so', [f"../tree-sitter-typescript/tsx"]) 
# language = Language("/tmp/typescript.so", "tsx")
Language.build_library(f'/tmp/c-sharp.so', [f"../tree-sitter-c-sharp"]) 
language = Language("/tmp/c-sharp.so", "c_sharp")
parser = Parser()
parser.set_language(language)

In [55]:
# python_example = open("example_code/chroma_fastapi.py").read()
python_example = open("example_code/query_builder.tsx").read()
python_example = bytes(python_example, "utf-8")
tree = parser.parse(python_example)

In [77]:
from dataclasses import dataclass

@dataclass
class Span:
    start: int
    end: int

    def extract(self, s: str) -> str:
        return "\n".join(s.splitlines()[self.start:self.end])
        # return s[self.start:self.end]

    def __add__(self, other):
        if isinstance(other, int):
            return Span(self.start + other, self.end + other)
        elif isinstance(other, Span):
            return Span(self.start, other.end)
        else:
            raise NotImplementedError()
    
    def __len__(self):
        return self.end - self.start

def get_line_number(index: int, source_code: str) -> int:
    lines = source_code.splitlines(keepends=True)
    total_chars = 0
    line_number = 0
    while total_chars <= index:
        total_chars += len(lines[line_number])
        line_number += 1
    return line_number - 1

def chunker(source_code_bytes, max_chunk_size = 512 * 2.5, coalesce = 50):
    tree = parser.parse(source_code_bytes)

    # Recursively form chunks with a maximum chunk size of max_chunk_size
    def chunker_helper(node, source_code_bytes, start_position=0):
        chunks = []
        current_chunk = Span(start_position, start_position)
        for child in node.children:
            child_span = Span(child.start_byte, child.end_byte)
            if len(child_span) > max_chunk_size:
                chunks.append(current_chunk)
                chunks.extend(chunker_helper(child, source_code_bytes, child.start_byte))
                current_chunk = Span(child.end_byte, child.end_byte)
            elif len(current_chunk) + len(child_span) > max_chunk_size:
                chunks.append(current_chunk)
                current_chunk = child_span
            else:
                current_chunk += child_span
        return chunks
    chunks = chunker_helper(tree.root_node, source_code_bytes)

    # removing gaps
    for prev, curr in zip(chunks[:-1], chunks[1:]):
        prev.end = curr.start
    
    # combining small chunks with bigger ones
    new_chunks = []
    i = 0
    current_chunk = Span(0, 0)
    while i < len(chunks):
        current_chunk += chunks[i]
        if len(chunks[i]) > coalesce \
            and "\n" in source_code_bytes[current_chunk.start:current_chunk.end].decode("utf-8"):
            new_chunks.append(current_chunk)
            current_chunk = Span(chunks[i].end, chunks[i].end)
        i += 1
    
    return [
        Span(
            get_line_number(chunk.start, source_code=source_code_bytes), 
            get_line_number(chunk.end, source_code=source_code_bytes)
        ) for chunk in new_chunks
    ]

for chunk in chunker(python_example):
    print(chunk.extract(python_example.decode("utf-8")) + "\n======================\n")

import { useAuthContext } from '@authentication/AuthContext'
import { Button } from '@components/Button'
import InfoTooltip from '@components/InfoTooltip/InfoTooltip'
import { KeyboardShortcut } from '@components/KeyboardShortcut/KeyboardShortcut'
import Popover from '@components/Popover/Popover'
import { GetHistogramBucketSize } from '@components/SearchResultsHistogram/SearchResultsHistogram'
import { Skeleton } from '@components/Skeleton/Skeleton'
import TextHighlighter from '@components/TextHighlighter/TextHighlighter'
import { default as OldTooltip } from '@components/Tooltip/Tooltip'
import {
	BackendSearchQuery,
	BaseSearchContext,
	normalizeParams,
} from '@context/BaseSearchContext'
import {
	useEditErrorSegmentMutation,
	useGetAppVersionsQuery,
	useGetErrorSegmentsQuery,
} from '@graph/hooks'
import { GetFieldTypesQuery, namedOperations } from '@graph/operations'
import {
	ErrorSearchParamsInput,
	ErrorSegment,
	ErrorState,
	Exact,
	Field,
	SearchParamsInput,
} from '@graph/sc