<a href="https://colab.research.google.com/github/komoleekag/document-classifier/blob/main/Code_Documentation_Generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install gradio transformers

Collecting gradio
  Downloading gradio-5.15.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.8-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.7.0 (from gradio)
  Downloading gradio_client-1.7.0-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.9.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.meta

In [7]:
import gradio as gr
from transformers import pipeline
import ast
import re
from typing import Dict, List, Tuple
import inspect
import gradio as gr
from transformers import pipeline
import ast
import re
from typing import Dict, List, Tuple
import inspect

# ... (security and quality analysis functions remain the same)
def my_function(x, y):
    """
    This function adds two numbers.

    :param x: The first number (int or float).
    :param y: The second number (int or float).

    :returns: The sum of x and y.
    :raises TypeError: If x or y are not numeric.
    """
    # ... function code ...

def generate_documentation(code: str) -> str:
    try:
        tree = ast.parse(code)
        functions = [node for node in ast.walk(tree) if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.MethodDef))]
        classes = [node for node in ast.walk(tree) if isinstance(node, ast.ClassDef)]

        doc_string = ""

        # Overall code description (using GPT, but more focused)
        prompt = f"Give a concise overview of the purpose of this Python code:\n```python\n{code}\n```"
        generator = pipeline('text-generation', model='gpt2', max_new_tokens=150)
        overview = generator(prompt, max_length=200, num_return_sequences=1)[0]['generated_text']
        doc_string += overview + "\n\n"

        for cls in classes:
            class_doc = ast.get_docstring(cls) or "No docstring provided."  # Extract existing docstring
            doc_string += f"## Class: {cls.name}\n{class_doc}\n\n"
            for method in [node for node in ast.walk(cls) if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.MethodDef))]:
                method_doc = ast.get_docstring(method) or "No docstring provided."
                arg_string = ", ".join([arg.arg for arg in method.args.args])
                doc_string += f"### Method: {cls.name}.{method.name}({arg_string})\n{method_doc}\n\n"
                # Add argument details (types and descriptions if available in the docstring)
                doc_string += extract_arg_details(method) # Helper function defined below
                doc_string += "\n"

        for func in functions:
            func_doc = ast.get_docstring(func) or "No docstring provided."
            arg_string = ", ".join([arg.arg for arg in func.args.args])
            doc_string += f"## Function: {func.name}({arg_string})\n{func_doc}\n\n"
            doc_string += extract_arg_details(func) # Helper function defined below
            doc_string += "\n"

        return doc_string

    except Exception as e:
        return f"Could not generate documentation: {e}"

def extract_arg_details(node): # Helper function
    arg_details = ""
    if ast.get_docstring(node): # If docstring exists
        docstring_lines = ast.get_docstring(node).splitlines()
        for line in docstring_lines:
            match = re.match(r"^:param\s+(\w+):\s+(.*)$", line) # look for the :param tag in the docstring
            if match:
                arg_name = match.group(1)
                arg_desc = match.group(2)
                arg_details += f"* `{arg_name}`: {arg_desc}\n"
    return arg_details


# ... (rest of the code - analyze_code, Gradio interface)
def analyze_code_security(code: str) -> List[Dict]:
    # Enhanced security pattern checking (more patterns, improved regex)
    security_patterns = {
        r"eval\(": "🚨 Potentially dangerous eval() usage detected (consider ast.literal_eval)",
        r"exec\(": "🚨 Potentially dangerous exec() usage detected",
        r"os\.system\(": "🚨 Direct system command execution detected (use subprocess)",
        r"input\(": "🚨 Unvalidated input usage detected (sanitize input)",
        r"open\(.*?\).*?\.(read|write|readline)": "🚨 File operation without explicit path validation (check paths)",
        r"pickle\.(loads|dumps)": "🚨 Potential pickle injection vulnerability",
        r"re\.compile\(.*?\)\.": "🚨 Unsafe regex compilation (consider escaping user input)",
        r"password.*=": "🚨 Potential hardcoded password detected (use secrets management)",
        r"http://": "⚠️ Plaintext HTTP communication (use HTTPS)",
        r"assert\s+False": "⚠️ Assertion failure can halt execution (use logging for non-critical checks)",
    }

    findings = []
    for pattern, message in security_patterns.items():
        if re.search(pattern, code, re.IGNORECASE):  # Case-insensitive matching
            findings.append({"type": "security", "message": message})
    return findings

def analyze_code_quality(code: str) -> List[Dict]:
    try:
        tree = ast.parse(code)
        issues = []

        # Analyze function/method complexity (cyclomatic complexity - more advanced)
        for node in ast.walk(tree):
            if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.MethodDef)):
                complexity = 1  # Start with 1 for the function itself
                for sub_node in ast.walk(node):
                    if isinstance(sub_node, (ast.If, ast.For, ast.While, ast.ExceptHandler)):
                        complexity += 1  # Increment for each control flow statement
                if complexity > 10:  # Adjust threshold as needed
                    issues.append({
                        "type": "quality",
                        "message": f"⚠️ Function/Method '{node.name}' is too complex (Cyclomatic Complexity: {complexity} > 10)"
                    })

                if len(node.body) > 20: #increased function body length
                    issues.append({
                        "type": "quality",
                        "message": f"⚠️ Function '{node.name}' is too long (>{len(node.body)} lines)"
                    })

        # Check variable naming (more comprehensive)
        for node in ast.walk(tree):
            if isinstance(node, ast.Name):
                if len(node.id) < 2 and not node.id.isupper(): #added a check to ignore the uppercase variable names
                    issues.append({
                        "type": "quality",
                        "message": f"⚠️ Variable '{node.id}' name is too short (consider descriptive names)"
                    })
                if not re.match(r"^[a-z_][a-zA-Z0-9_]*$", node.id) and not node.id.isupper(): #variable name should start with lowercase or underscore
                    issues.append({
                        "type": "quality",
                        "message": f"⚠️ Variable '{node.id}' name is not following snake_case convention"
                    })

        # Check for unused variables (more advanced)
        assigned_vars = set()
        used_vars = set()
        for node in ast.walk(tree):
            if isinstance(node, ast.Assign):
                for target in node.targets:
                    if isinstance(target, ast.Name):
                        assigned_vars.add(target.id)
            elif isinstance(node, ast.Name) and isinstance(node.ctx, ast.Load):
                used_vars.add(node.id)
        unused = assigned_vars - used_vars
        for var in unused:
            issues.append({"type": "quality", "message": f"⚠️ Unused variable '{var}'"})
        return issues
    except Exception as e:
        return [{"type": "error", "message": f"Could not parse code: {e}"}]


def generate_documentation(code: str) -> str:
    try:
        tree = ast.parse(code)
        functions = [node.name for node in ast.walk(tree) if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.MethodDef))]
        classes = [node.name for node in ast.walk(tree) if isinstance(node, ast.ClassDef)]

        context = f"Code contains functions/methods: {', '.join(functions)}"
        if classes:
            context += f" and classes: {', '.join(classes)}"

        # Generate better documentation with more context and structure
        prompt = f"""Write comprehensive Python documentation for the following code that {context}.

        Include:
        * A general description of the code's purpose.
        * For each function/method:
            * A docstring explaining its functionality, arguments, return values, and any exceptions raised.
            * Examples of how to use the function/method.
        * For each class:
            * A docstring explaining its purpose and attributes.
            * Documentation for each method within the class.

        ```python
        {code}
        ```
        """
        generator = pipeline('text-generation', model='gpt2', max_new_tokens=500) #increased the max_new_tokens
        documentation = generator(prompt, max_length=1000, num_return_sequences=1)[0]['generated_text'] #increased the max_length
        return documentation
    except Exception as e:
        return f"Could not generate documentation: {e}"

def analyze_code(code: str) -> Tuple[str, str, str]:
    security_issues = analyze_code_security(code)
    security_report = "\n".join([f"{issue['message']}" for issue in security_issues])

    quality_issues = analyze_code_quality(code)
    quality_report = "\n".join([f"{issue['message']}" for issue in quality_issues])

    docs = generate_documentation(code)

    security_report = security_report if security_report else "✅ No security issues found"
    quality_report = quality_report if quality_report else "✅ No quality issues found"

    return security_report, quality_report, docs


iface = gr.Interface(
    fn=analyze_code,
    inputs=gr.Code(language="python", lines=20, value = """def calculate_sum(a, b):
    x = eval(input("Enter expression: "))
    return a + b + x"""),
    outputs=[
        gr.Textbox(label="Security Analysis", lines=10),
        gr.Textbox(label="Code Quality Analysis", lines=10),
        gr.Textbox(label="Generated Documentation", lines=20)
    ],
    title="CodeCritique: AI-Powered Code Analysis",
    description="Paste your Python code to get security analysis, quality recommendations, and auto-generated documentation.",
    examples=[
        ["""def calculate_sum(a, b):
    x = eval(input("Enter expression: "))
    return a + b + x"""],
        ["""def process_data(filename):
    with open(filename) as f:
        data = f.read()
    return data.split()"""],
        ["""class MyClass:
    def __init__(self, value):
        self.value = value

    def get_value(self):
        return self.value

def my_function(x, y):
    if x > 0:
        return x + y
    else:
        return x - y"""]
    ],
    theme=gr.themes.Soft()
)

if __name__ == "__main__":
    iface.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://d8370b76215b3eb041.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
