diff --git a/README.md b/README.md index 91d104b..8532e95 100644 --- a/README.md +++ b/README.md @@ -111,7 +111,7 @@ Alternatively, you can install Code2Prompt using pipx, a tool for installing and 2. Install Code2Prompt using pipx: ``` - pipx install git+https://github.com/raphael.mansuy/code2prompt.git + pipx install git+https://github.com/raphaelmansuy/code2prompt.git ``` This command will clone the Code2Prompt repository and install it in an isolated environment managed by pipx. diff --git a/TEMPLATE.md b/TEMPLATE.md index 623db33..b7a54ca 100644 --- a/TEMPLATE.md +++ b/TEMPLATE.md @@ -116,8 +116,3 @@ This template groups files by language and creates separate sections for Python 3. Create user-defined variables for dynamic content that you want to input at runtime. 4. Use the `files` list to iterate over all processed files and access their properties. 5. Remember that the `content` of each file is already processed according to the command-line options (e.g., comments stripped if `--suppress-comments` was used). - -By leveraging these templating capabilities, you can create highly customized outputs tailored to your specific needs for code analysis and documentation. - -Citations: -[1] https://ppl-ai-file-upload.s3.amazonaws.com/web/direct-files/585370/cbd51bc7-ed1f-4321-9a7b-b756920ca298/paste.txt \ No newline at end of file diff --git a/code2prompt/count_tokens.py b/code2prompt/count_tokens.py new file mode 100644 index 0000000..420d000 --- /dev/null +++ b/code2prompt/count_tokens.py @@ -0,0 +1,21 @@ +import click +import tiktoken + + +def count_tokens(text: str, encoding: str) -> int: + """ + Count the number of tokens in the given text using the specified encoding. + + Args: + text (str): The text to tokenize and count. + encoding (str): The encoding to use for tokenization. + + Returns: + int: The number of tokens in the text. + """ + try: + encoder = tiktoken.get_encoding(encoding) + return len(encoder.encode(text)) + except Exception as e: + click.echo(f"Error counting tokens: {str(e)}", err=True) + return 0 \ No newline at end of file diff --git a/code2prompt/create_template_directory.py b/code2prompt/create_template_directory.py new file mode 100644 index 0000000..d04add8 --- /dev/null +++ b/code2prompt/create_template_directory.py @@ -0,0 +1,77 @@ +from pathlib import Path + + +def create_templates_directory(): + """ + Create a 'templates' directory in the current working directory and + populate it with example template files. + """ + # Define the path for the templates directory + templates_dir = Path.cwd() / "templates" + + # Create the templates directory if it doesn't exist + templates_dir.mkdir(exist_ok=True) + + # Define example templates + example_templates = { + "basic.j2": """# Code Summary + +{% for file in files %} +## {{ file.path }} + +```{{ file.language }} +{{ file.content }} +``` + +{% endfor %} +""", + "detailed.j2": """# Project Code Analysis + +{% for file in files %} +## File: {{ file.path }} + +- **Language**: {{ file.language }} +- **Size**: {{ file.size }} bytes +- **Last Modified**: {{ file.modified }} + +### Code: + +```{{ file.language }} +{{ file.content }} +``` + +### Analysis: +[Your analysis for {{ file.path }} goes here] + +{% endfor %} +""", + "custom.md": """# {{ project_name }} + +{{ project_description }} + +{% for file in files %} +## {{ file.path }} + +{{ file_purpose }} + +```{{ file.language }} +{{ file.content }} +``` + +{% endfor %} + +## Next Steps: +{{ next_steps }} +""", + } + + # Write example templates to files + for filename, content in example_templates.items(): + file_path = templates_dir / filename + with file_path.open("w") as f: + f.write(content) + + print(f"Templates directory created at: {templates_dir}") + print("Example templates added:") + for filename, _ in example_templates.items(): + print(f"- {filename}") diff --git a/code2prompt/generate_content.py b/code2prompt/generate_content.py new file mode 100644 index 0000000..dc4eba2 --- /dev/null +++ b/code2prompt/generate_content.py @@ -0,0 +1,26 @@ +from code2prompt.template_processor import get_user_inputs, load_template, process_template +from code2prompt.utils.generate_markdown_content import generate_markdown_content + + +def generate_content(files_data, options): + """ + Generate content based on the provided files data and options. + + This function either processes a Jinja2 template with the given files data and user inputs + or generates markdown content directly from the files data, depending on whether a + template option is provided. + + Args: + files_data (list): A list of dictionaries containing processed file data. + options (dict): A dictionary containing options such as template path and whether + to wrap code inside markdown code blocks. + + Returns: + str: The generated content as a string, either from processing a template or + directly generating markdown content. + """ + if options['template']: + template_content = load_template(options['template']) + user_inputs = get_user_inputs(template_content) + return process_template(template_content, files_data, user_inputs) + return generate_markdown_content(files_data, options['no_codeblock']) \ No newline at end of file diff --git a/code2prompt/get_gitignore_patterns.py b/code2prompt/get_gitignore_patterns.py new file mode 100644 index 0000000..59d2809 --- /dev/null +++ b/code2prompt/get_gitignore_patterns.py @@ -0,0 +1,27 @@ +from code2prompt.utils.parse_gitignore import parse_gitignore +from pathlib import Path + +def get_gitignore_patterns(path, gitignore): + """ + Retrieve gitignore patterns from a specified path or a default .gitignore file. + + This function reads the .gitignore file located at the specified path or uses + the default .gitignore file in the project root if no specific path is provided. + It then parses the file to extract ignore patterns and adds a default pattern + to ignore the .git directory itself. + + Args: + path (Path): The root path of the project where the default .gitignore file is located. + gitignore (Optional[str]): An optional path to a specific .gitignore file to use instead of the default. + + Returns: + Set[str]: A set of gitignore patterns extracted from the .gitignore file. + """ + if gitignore: + gitignore_path = Path(gitignore) + else: + gitignore_path = Path(path) / ".gitignore" + + patterns = parse_gitignore(gitignore_path) + patterns.add(".git") + return patterns \ No newline at end of file diff --git a/code2prompt/main.py b/code2prompt/main.py index 96c4624..6dda253 100644 --- a/code2prompt/main.py +++ b/code2prompt/main.py @@ -1,163 +1,121 @@ import click from pathlib import Path -import tiktoken -from code2prompt.utils.is_binary import is_binary -from code2prompt.utils.generate_markdown_content import generate_markdown_content -from code2prompt.utils.is_filtered import is_filtered -from code2prompt.utils.is_ignored import is_ignored -from code2prompt.utils.parse_gitignore import parse_gitignore -from code2prompt.process_file import process_file +from code2prompt.count_tokens import count_tokens +from code2prompt.generate_content import generate_content +from code2prompt.process_files import process_files from code2prompt.write_output import write_output -from code2prompt.template_processor import load_template, process_template, get_user_inputs +from code2prompt.create_template_directory import create_templates_directory + +VERSION = "0.6.0" # Define the version of the CLI tool @click.command() -@click.option("--path", "-p", type=click.Path(exists=True), required=True, help="Path to the directory to navigate.") -@click.option("--output", "-o", type=click.Path(), help="Name of the output Markdown file.") -@click.option("--gitignore", "-g", type=click.Path(exists=True), help="Path to the .gitignore file.") -@click.option("--filter", "-f", type=str, help='Comma-separated filter patterns to include files (e.g., "*.py,*.js").') -@click.option("--exclude", "-e", type=str, help='Comma-separated patterns to exclude files (e.g., "*.txt,*.md").') -@click.option("--case-sensitive", is_flag=True, help="Perform case-sensitive pattern matching.") -@click.option("--suppress-comments", "-s", is_flag=True, help="Strip comments from the code files.", default=False) -@click.option("--line-number", "-ln", is_flag=True, help="Add line numbers to source code blocks.", default=False) -@click.option("--no-codeblock", is_flag=True, help="Disable wrapping code inside markdown code blocks.") -@click.option("--template", "-t", type=click.Path(exists=True), help="Path to a Jinja2 template file for custom prompt generation.") -@click.option("--tokens", is_flag=True, help="Display the token count of the generated prompt.") -@click.option("--encoding", type=click.Choice(['cl100k_base', 'p50k_base', 'p50k_edit', 'r50k_base']), - default='cl100k_base', help="Specify the tokenizer encoding to use.") +@click.version_option( + VERSION, "-v", "--version", message="code2prompt version %(version)s" +) +@click.option( + "--path", "-p", + type=click.Path(exists=True), + required=True, + multiple=True, # Allow multiple paths + help="Path(s) to the directory or file to process.", +) +@click.option( + "--output", "-o", + type=click.Path(), + help="Name of the output Markdown file." +) +@click.option( + "--gitignore", "-g", + type=click.Path(exists=True), + help="Path to the .gitignore file.", +) +@click.option( + "--filter", "-f", + type=str, + help='Comma-separated filter patterns to include files (e.g., "*.py,*.js").', +) +@click.option( + "--exclude", "-e", + type=str, + help='Comma-separated patterns to exclude files (e.g., "*.txt,*.md").', +) +@click.option( + "--case-sensitive", + is_flag=True, + help="Perform case-sensitive pattern matching." +) +@click.option( + "--suppress-comments", "-s", + is_flag=True, + help="Strip comments from the code files.", + default=False, +) +@click.option( + "--line-number", "-ln", + is_flag=True, + help="Add line numbers to source code blocks.", + default=False, +) +@click.option( + "--no-codeblock", + is_flag=True, + help="Disable wrapping code inside markdown code blocks.", +) +@click.option( + "--template", "-t", + type=click.Path(exists=True), + help="Path to a Jinja2 template file for custom prompt generation.", +) +@click.option( + "--tokens", + is_flag=True, + help="Display the token count of the generated prompt." +) +@click.option( + "--encoding", + type=click.Choice(["cl100k_base", "p50k_base", "p50k_edit", "r50k_base"]), + default="cl100k_base", + help="Specify the tokenizer encoding to use.", +) +@click.option( + "--create-templates", + is_flag=True, + help="Create a templates directory with example templates.", +) def create_markdown_file(**options): """ Creates a Markdown file based on the provided options. - This function orchestrates the process of reading files from the specified path, + This function orchestrates the process of reading files from the specified paths, processing them according to the given options (such as filtering, excluding certain files, handling comments, etc.), and then generating a Markdown file with the processed content. The output file name and location can be customized through the options. Args: - **options (dict): Key-value pairs of options to customize the behavior of the function. - Possible keys include 'path', 'output', 'gitignore', 'filter', 'exclude', - 'case_sensitive', 'suppress_comments', 'line_number', 'no_codeblock', - 'template', 'tokens', and 'encoding'. - - Returns: - None - """ - files_data = process_files(options) - content = generate_content(files_data, options) - - if options['tokens']: - token_count = count_tokens(content, options['encoding']) - click.echo(f"Token count: {token_count}") - - write_output(content, options['output']) - -def process_files(options): - """ - Processes files within a specified directory, applying filters and transformations - based on the provided options. - - Args: - options (dict): A dictionary containing options such as path, gitignore patterns, - and flags for processing files. + **options (dict): Key-value pairs of options to customize the behavior of the function. + Possible keys include 'path', 'output', 'gitignore', 'filter', 'exclude', 'case_sensitive', + 'suppress_comments', 'line_number', 'no_codeblock', 'template', 'tokens', 'encoding', + and 'create_templates'. Returns: - list: A list of dictionaries containing processed file data. + None """ - path = Path(options['path']) - gitignore_patterns = get_gitignore_patterns(path, options['gitignore']) - files_data = [] - for file_path in path.rglob("*"): - if should_process_file(file_path, gitignore_patterns, path, options): - result = process_file(file_path, options['suppress_comments'], options['line_number'], options['no_codeblock']) - if result: - files_data.append(result) - return files_data + if options["create_templates"]: + create_templates_directory() + return -def get_gitignore_patterns(path, gitignore): - """ - Retrieve gitignore patterns from a specified path or a default .gitignore file. + all_files_data = [] + for path in options['path']: + files_data = process_files({**options, 'path': path}) + all_files_data.extend(files_data) - This function reads the .gitignore file located at the specified path or uses - the default .gitignore file in the project root if no specific path is provided. - It then parses the file to extract ignore patterns and adds a default pattern - to ignore the .git directory itself. + content = generate_content(all_files_data, options) - Args: - path (Path): The root path of the project where the default .gitignore file is located. - gitignore (Optional[str]): An optional path to a specific .gitignore file to use instead of the default. - - Returns: - Set[str]: A set of gitignore patterns extracted from the .gitignore file. - """ - gitignore_path = Path(gitignore) if gitignore else path / ".gitignore" - patterns = parse_gitignore(gitignore_path) - patterns.add(".git") - return patterns - -def should_process_file(file_path, gitignore_patterns, root_path, options): - """ - Determine whether a file should be processed based on several criteria. - - Checks if the file is indeed a file, not ignored according to gitignore patterns, - matches the filter criteria, is not excluded, is case sensitive if specified, - and is not a binary file. - - Args: - file_path (Path): The path to the file being considered. - gitignore_patterns (set): A set of patterns to ignore files. - root_path (Path): The root path of the project for relative comparisons. - options (dict): A dictionary of options including filter, exclude, and case sensitivity settings. - - Returns: - bool: True if the file should be processed, False otherwise. - """ - return ( - file_path.is_file() - and not is_ignored(file_path, gitignore_patterns, root_path) - and is_filtered(file_path, options['filter'], options['exclude'], options['case_sensitive']) - and not is_binary(file_path) - ) - -def generate_content(files_data, options): - """ - Generate content based on the provided files data and options. - - This function either processes a Jinja2 template with the given files data and user inputs - or generates markdown content directly from the files data, depending on whether a - template option is provided. - - Args: - files_data (list): A list of dictionaries containing processed file data. - options (dict): A dictionary containing options such as template path and whether - to wrap code inside markdown code blocks. - - Returns: - str: The generated content as a string, either from processing a template or - directly generating markdown content. - """ - if options['template']: - template_content = load_template(options['template']) - user_inputs = get_user_inputs(template_content) - return process_template(template_content, files_data, user_inputs) - return generate_markdown_content(files_data, options['no_codeblock']) - -def count_tokens(text: str, encoding: str) -> int: - """ - Count the number of tokens in the given text using the specified encoding. - - Args: - text (str): The text to tokenize and count. - encoding (str): The encoding to use for tokenization. + if options["tokens"]: + token_count = count_tokens(content, options["encoding"]) + click.echo(f"Token count: {token_count}") - Returns: - int: The number of tokens in the text. - """ - try: - encoder = tiktoken.get_encoding(encoding) - return len(encoder.encode(text)) - except Exception as e: - click.echo(f"Error counting tokens: {str(e)}", err=True) - return 0 + write_output(content, options["output"]) if __name__ == "__main__": # pylint: disable=no-value-for-parameter diff --git a/code2prompt/process_files.py b/code2prompt/process_files.py new file mode 100644 index 0000000..88615aa --- /dev/null +++ b/code2prompt/process_files.py @@ -0,0 +1,55 @@ +from pathlib import Path +from code2prompt.get_gitignore_patterns import get_gitignore_patterns +from code2prompt.process_file import process_file +from code2prompt.should_process_file import should_process_file + +def process_files(options): + """ + Processes files or directories based on the provided paths. + + Args: + options (dict): A dictionary containing options such as paths, gitignore patterns, + and flags for processing files. + + Returns: + list: A list of dictionaries containing processed file data. + """ + files_data = [] + + # Ensure 'path' is always a list for consistent processing + paths = options['path'] if isinstance(options['path'], list) else [options['path']] + + for path in paths: + path = Path(path) + + # Get gitignore patterns for the current path + gitignore_patterns = get_gitignore_patterns( + path.parent if path.is_file() else path, + options['gitignore'] + ) + + if path.is_file(): + # Process single file + if should_process_file(path, gitignore_patterns, path.parent, options): + result = process_file( + path, + options['suppress_comments'], + options['line_number'], + options['no_codeblock'] + ) + if result: + files_data.append(result) + else: + # Process directory + for file_path in path.rglob("*"): + if should_process_file(file_path, gitignore_patterns, path, options): + result = process_file( + file_path, + options['suppress_comments'], + options['line_number'], + options['no_codeblock'] + ) + if result: + files_data.append(result) + + return files_data \ No newline at end of file diff --git a/code2prompt/should_process_file.py b/code2prompt/should_process_file.py new file mode 100644 index 0000000..cdcbda5 --- /dev/null +++ b/code2prompt/should_process_file.py @@ -0,0 +1,23 @@ +from code2prompt.utils.is_binary import is_binary +from code2prompt.utils.is_filtered import is_filtered +from code2prompt.utils.is_ignored import is_ignored + +def should_process_file(file_path, gitignore_patterns, root_path, options): + """ + Determine whether a file should be processed based on several criteria. + + Args: + file_path (Path): The path to the file being considered. + gitignore_patterns (set): A set of patterns to ignore files. + root_path (Path): The root path of the project for relative comparisons. + options (dict): A dictionary of options including filter, exclude, and case sensitivity settings. + + Returns: + bool: True if the file should be processed, False otherwise. + """ + return ( + file_path.is_file() + and not is_ignored(file_path, gitignore_patterns, root_path) + and is_filtered(file_path, options['filter'], options['exclude'], options['case_sensitive']) + and not is_binary(file_path) + ) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 60201f1..cfdf805 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "code2prompt" -version = "0.5.0" +version = "0.6.0" description = "" authors = ["Raphael MANSUY "] readme = "README.md" diff --git a/tests/test_create_template_directory.py b/tests/test_create_template_directory.py new file mode 100644 index 0000000..9db16fd --- /dev/null +++ b/tests/test_create_template_directory.py @@ -0,0 +1,42 @@ +import pytest +import os +from pathlib import Path +from code2prompt.create_template_directory import create_templates_directory + + +@pytest.fixture +def temp_dir(tmp_path): + """Fixture to provide a temporary directory for testing.""" + original_cwd = Path.cwd() + os.chdir(tmp_path) + yield tmp_path + os.chdir(original_cwd) + +def test_create_templates_directory_existing(temp_dir, monkeypatch): + # Create the templates directory beforehand + templates_dir = temp_dir / "templates" + templates_dir.mkdir() + + # Mock the print function to capture output + printed_messages = [] + monkeypatch.setattr('builtins.print', lambda *args: printed_messages.append(' '.join(map(str, args)))) + + # Call the function + create_templates_directory() + + # Verify that the function doesn't raise an exception when the directory already exists + assert templates_dir.exists() + assert templates_dir.is_dir() + + # Check if the example template files were created + expected_files = ["basic.j2", "detailed.j2", "custom.md"] + for file in expected_files: + assert (templates_dir / file).exists() + assert (templates_dir / file).is_file() + + # Verify the printed output + assert len(printed_messages) == 5 + assert f"Templates directory created at: {templates_dir}" in printed_messages[0] + assert "Example templates added:" in printed_messages[1] + for i, file in enumerate(expected_files, start=2): + assert f"- {file}" in printed_messages[i] \ No newline at end of file