Skip to content

Commit

Permalink
support c and c++
Browse files Browse the repository at this point in the history
  • Loading branch information
krlvi committed Nov 17, 2022
1 parent 21d2224 commit b5ec890
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 4 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ You can experiment with your own sentence transformer models with the `--model`
## Bugs and limitations

- Currently, the `.embeddings` index is not updated when repository files change. As a temporary workaround, `sem embed` can be re-ran occasionally.
- Supported languages: `{ 'python', 'javascript', 'typescript', 'ruby', 'go', 'rust', 'java' }`
- Supported languages: `{ 'python', 'javascript', 'typescript', 'ruby', 'go', 'rust', 'java', 'c', 'c++' }`
- Supported text editors for opening results in: `{ 'vscode', 'vim' }`

## License
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name='semantic-code-search',
version='0.1.3',
version='0.2.0',
author='Kiril Videlov',
author_email='kiril@codeball.ai',
description='Search your codebase with natural language.',
Expand Down
10 changes: 8 additions & 2 deletions src/semantic_code_search/embed.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import numpy as np
from tree_sitter import Tree
from tree_sitter_languages import get_parser
from tqdm import tqdm


def _supported_file_extensions():
Expand All @@ -17,7 +18,11 @@ def _supported_file_extensions():
'.java': 'java',
'.js': 'javascript',
'.ts': 'typescript',
'.py': 'python'
'.py': 'python',
'.c': 'c',
'.h': 'c',
'.cpp': 'cpp',
'.hpp': 'cpp',
}


Expand Down Expand Up @@ -52,7 +57,8 @@ def _extract_functions(nodes, fp, file_content, relevant_node_types):

def _get_repo_functions(root, supported_file_extensions, relevant_node_types):
functions = []
for fp in [root + '/' + f for f in os.popen('git -C {} ls-files'.format(root)).read().split('\n')]:
print('Extracting functions from {}'.format(root))
for fp in tqdm([root + '/' + f for f in os.popen('git -C {} ls-files'.format(root)).read().split('\n')]):
if not os.path.isfile(fp):
continue
with open(fp, 'r') as f:
Expand Down
5 changes: 5 additions & 0 deletions src/semantic_code_search/prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from prompt_toolkit.lexers import PygmentsLexer
from prompt_toolkit.widgets.base import Frame
from pygments.lexers.c_cpp import CLexer
from pygments.lexers.c_cpp import CppLexer
from pygments.lexers.go import GoLexer
from pygments.lexers.javascript import JavascriptLexer, TypeScriptLexer
from pygments.lexers.jvm import JavaLexer, KotlinLexer
Expand Down Expand Up @@ -53,6 +54,10 @@ def _syntax_highlighting(text, file):
lexer = PhpLexer
elif file.endswith('rs'):
lexer = RustLexer
elif file.endswith('c') or file.endswith('h'):
lexer = CLexer
elif file.endswith('cpp') or file.endswith('hpp'):
lexer = CppLexer

pigment = PygmentsLexer(lexer, sync_from_start=True)
lex_func = pigment.lex_document(Document(text.replace('\t', ' ')))
Expand Down

0 comments on commit b5ec890

Please sign in to comment.