From 126c685634a1bd18eea2ca0d979a78a4f43ef552 Mon Sep 17 00:00:00 2001 From: Raphael MANSUY Date: Sun, 24 Mar 2024 12:24:26 +0100 Subject: [PATCH] improve markdown --- code2prompt/language_inference.py | 83 ++++++++++++++----------------- code2prompt/main.py | 6 ++- 2 files changed, 41 insertions(+), 48 deletions(-) diff --git a/code2prompt/language_inference.py b/code2prompt/language_inference.py index 8c8c53b..105dcd9 100644 --- a/code2prompt/language_inference.py +++ b/code2prompt/language_inference.py @@ -1,4 +1,6 @@ -""" This module contains the function to infer the programming language based on the file extension. """ +""" +This module contains the function to infer the programming language based on the file extension. +""" import os @@ -13,49 +15,36 @@ def infer_language(filename: str) -> str: _, extension = os.path.splitext(filename) extension = extension.lower() - if extension in [".c", ".h"]: - return "c" - elif extension in [".cpp", ".hpp", ".cc", ".cxx"]: - return "cpp" - elif extension in [".java"]: - return "java" - elif extension in [".js", ".jsx"]: - return "javascript" - elif extension in [".cs"]: - return "csharp" - elif extension in [".php"]: - return "php" - elif extension in [".go"]: - return "go" - elif extension in [".rs"]: - return "rust" - elif extension in [".kt"]: - return "kotlin" - elif extension in [".swift"]: - return "swift" - elif extension in [".scala"]: - return "scala" - elif extension in [".dart"]: - return "dart" - elif extension in [".py"]: - return "python" - elif extension in [".rb"]: - return "ruby" - elif extension in [".pl", ".pm"]: - return "perl" - elif extension in [".sh"]: - return "bash" - elif extension in [".ps1"]: - return "powershell" - elif extension in [".html", ".htm"]: - return "html" - elif extension in [".xml"]: - return "xml" - elif extension in [".sql"]: - return "sql" - elif extension in [".m"]: - return "matlab" - elif extension in [".r"]: - return "r" - else: - return "unknown" + language_map = { + ".c": "c", + ".h": "c", + ".cpp": "cpp", + ".hpp": "cpp", + ".cc": "cpp", + ".cxx": "cpp", + ".java": "java", + ".js": "javascript", + ".jsx": "javascript", + ".cs": "csharp", + ".php": "php", + ".go": "go", + ".rs": "rust", + ".kt": "kotlin", + ".swift": "swift", + ".scala": "scala", + ".dart": "dart", + ".py": "python", + ".rb": "ruby", + ".pl": "perl", + ".pm": "perl", + ".sh": "bash", + ".ps1": "powershell", + ".html": "html", + ".htm": "html", + ".xml": "xml", + ".sql": "sql", + ".m": "matlab", + ".r": "r" + } + + return language_map.get(extension, "unknown") \ No newline at end of file diff --git a/code2prompt/main.py b/code2prompt/main.py index 6388047..f39557f 100644 --- a/code2prompt/main.py +++ b/code2prompt/main.py @@ -152,7 +152,11 @@ def create_markdown_file(path, output, gitignore, filter, suppress_comments): file_info += f"- Created: {file_creation_time}\n" file_info += f"- Modified: {file_modification_time}\n\n" - file_code = f"### Code\n```{file_extension}\n{file_content}\n```\n\n" + language = infer_language(file_path.name) + if language == "unknown": + language = format(file_extension[1:]) + + file_code = f"### Code\n```{language}\n{file_content}\n```\n\n" content.append(file_info + file_code) table_of_contents.append(