In [1]:
import ast
import json
import os

In [11]:
def validate_json_classes_functions(data):
    """
        Return:
            Valid: True when no error was found otherwise false
            Error_string: contains a string describing all errors
            filtered_data: only contains data which is correct
        """
    def get_definitions(file_content):
        """Parse the Python file and return a dictionary with the classes and functions found."""
        tree = ast.parse(file_content)
        definitions = {"classes": set(), "functions": set()}

        for node in ast.walk(tree):
            if isinstance(node, ast.ClassDef):
                definitions["classes"].add(node.name)
            elif isinstance(node, ast.FunctionDef):
                definitions["functions"].add(node.name)

        return definitions

    valid = True
    error_messages = []
    filtered_data = []

    for item in data:
        file_path = item["file_path"]
        selected_functions = set(item["selected_functions"])
        selected_classes = set(item["selected_classes"])

        if not os.path.exists(file_path):
            valid = False
            error_messages.append(f"File not found: {file_path}")
            continue  # Skip to the next item, do not add this file to filtered_data

        try:
            with open(file_path, 'r') as file:
                file_content = file.read()

            definitions = get_definitions(file_content)
            
            missing_functions = selected_functions - definitions["functions"]
            missing_classes = selected_classes - definitions["classes"]

            # Filter out missing functions and classes
            valid_functions = selected_functions - missing_functions
            valid_classes = selected_classes - missing_classes

            if missing_functions:
                valid = False
                error_messages.append(
                    f"Missing functions in {file_path}: {', '.join(missing_functions)}"
                )
            if missing_classes:
                valid = False
                error_messages.append(
                    f"Missing classes in {file_path}: {', '.join(missing_classes)}"
                )

            if valid_functions or valid_classes:
                filtered_data.append({
                    "file_path": file_path,
                    "selected_functions": list(valid_functions),
                    "selected_classes": list(valid_classes)
                })

        except Exception as e:
            valid = False
            error_messages.append(f"Error processing {file_path}: {str(e)}")

    return valid, "\n".join(error_messages), filtered_data

In [20]:
def parse_json_string(json_string):
        """
        Returns:
            Valid: True when json is vlaid otherwise false
            data: Either the json data, or an error code
        """
        try:
            # Remove the '--- END OF LIST ---' if it's present
            if '--- END OF LIST ---' in json_string:
                json_string = json_string.replace('--- END OF LIST ---', '').strip()
            
            # Attempt to parse the JSON string into a Python object
            data = json.loads(json_string)
            
            # Check if the top-level structure is a list
            if not isinstance(data, list):
                return (False, "Error: JSON should start with a list of dictionaries.")
            
            for i, item in enumerate(data):
                if not isinstance(item, dict):
                    return (False, f"Error: Element at index {i} should be a dictionary.")
                
                # Check for the 'file_path' key and its type
                if 'file_path' not in item:
                    return (False, f"Error: Missing 'file_path' key in element at index {i}.")
                if not isinstance(item['file_path'], str):
                    return (False, f"Error: 'file_path' at index {i} should be a string.")
                
               # Check if 'selected_functions' key exists
                if 'selected_functions' not in item:
                    return (False, f"Error: 'selected_functions' key is missing at index {i}.")
                
                # Check if 'selected_functions' is a list
                if not isinstance(item['selected_functions'], list):
                    return (False, f"Error: 'selected_functions' at index {i} should be a list.")
                
                # Check if 'selected_classes' key exists
                if 'selected_classes' not in item:
                    return (False, f"Error: 'selected_classes' key is missing at index {i}.")
                
                # Check if 'selected_classes' is a list
                if not isinstance(item['selected_classes'], list):
                    return (False, f"Error: 'selected_classes' at index {i} should be a list.")
                
                # Convert all elements in 'selected_functions' and 'selected_classes' to strings
                item['selected_functions'] = [str(func) for func in item['selected_functions']]
                item['selected_classes'] = [str(cls) for cls in item['selected_classes']]
            
            return (True, data)
        
        except json.JSONDecodeError as e:
            return (False, f"Error: Failed to parse JSON. {str(e)}")
    



In [23]:
import re

def check_unified_diff(diff_string: str) -> str:
    # Step 1: Ensure the string ends with "--- END OF DIFF ---"
    if not diff_string.endswith('--- END OF DIFF ---'):
        return "The diff does not end with '--- END OF DIFF ---'."

    # Step 2: Remove the end marker and split the lines
    diff_string = diff_string.rstrip('--- END OF DIFF ---').strip()
    diff_lines = diff_string.splitlines()

    # Step 3: Validate unified diff structure
    if len(diff_lines) < 4:
        return "The diff is too short to be valid."

    # Unified diff header lines must start with '---' and '+++'
    if not diff_lines[0].startswith('--- '):
        return "The diff header must start with '--- ' followed by the original file name."
    if not diff_lines[1].startswith('+++ '):
        return "The diff header must have '+++ ' followed by the new file name."

    # Validate chunk headers
    for i, line in enumerate(diff_lines[2:], start=2):
        if line.startswith('@@'):
            # Chunk header should follow this pattern: @@ -l,s +l,s @@
            if not re.match(r'^@@ -\d+(,\d+)? \+\d+(,\d+)? @@', line):
                return f"Invalid chunk header format at line {i+1}: '{line}'"
        elif line.startswith(' ') or line.startswith('-') or line.startswith('+'):
            # Valid lines in a chunk are context (' '), removal ('-'), or addition ('+')
            continue
        else:
            return f"Unexpected line format at line {i+1}: '{line}'"

    return "The diff is correct."

In [26]:
diff = (
"--- a/file.txt\n"
"+++ b/file.txt\n"
"@@ -1,3 +1,3 @@\n"
"-Hello\n"
"+Hi\n"
" World\n"
"--- END OF DIFF ---"
)

print(diff)
result = check_unified_diff(diff)
print(result)

--- a/file.txt
+++ b/file.txt
@@ -1,3 +1,3 @@
-Hello
+Hi
 World
--- END OF DIFF ---
The diff is correct.


In [21]:
test = """
[
  {
      "file_path": "./repos/sqlfluff/src/sqlfluff/core/cached_property.py",
      "classes": [
          "CachedProperty"
      ],
      "functions": []
  },
  {
      "file_path": "./repos/sqlfluff/src/sqlfluff/core/timing.py",
      "classes": [
          "TimingSummary"
      ],
      "functions": []
  },
  {
      "file_path": "./repos/sqlfluff/src/sqlfluff/core/linter/linted_file.py",
      "classes": [],
      "functions": ["get_violations"]
  }
  ]
"""

In [22]:
parse_json_string(test)

(False, "Error: 'selected_functions' key is missing at index 0.")

In [39]:
valid_data

[]