In [1]:
import os
from groq import Groq
import re
from typing import List, Dict, Any
import yaml
from tqdm import tqdm
import json
from openai import OpenAI

In [None]:
API_KEY = ""
DJANGO_PROJECT_PATH = "/Users/ryanmarr/Documents/saleor"
MODEL_NAME = "gpt-4o"
MAX_TOKENS = 2048
TEMPERATURE = 0.1
TOP_P = 1
STREAM = False

In [3]:
# Groq client setup
client = OpenAI(api_key=API_KEY)

In [4]:
def find_django_files(directory: str) -> List[str]:
    """Perform DFS to find all Python files in Django project"""
    django_files = []
    priority_files = []

    for root, dirs, files in os.walk(directory):
        # Skip common directories that don't contain Django code
        dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ['node_modules', '__pycache__', 'venv', 'env', '.git']]
        
        for file in files:
            if file == 'views.py':
                priority_files.append(os.path.join(root, file))
            elif file.endswith('.py'):
                file_path = os.path.join(root, file)
                django_files.append(file_path)

    return priority_files + django_files

In [5]:
# def extract_rest_apis_from_file_with_openai(file_path: str) -> List[Dict[str, Any]]:
#     """Use OpenAI to analyze file content and find REST APIs"""
#     try:
#         with open(file_path, 'r', encoding='utf-8') as f:
#             content = f.read()  
        
#         # Skip files that don't contain common Django/API keywords
#         if not any(keyword in content.lower() for keyword in ['api', 'view', 'rest', 'http', 'request', 'response', 'serializer']):
#             return []

#         prompt = f"""
#         Analyze this Python file and identify all Django API functions or classes. Only return the functions that are API endpoints. Only return entire implemenation of the function including function signature and content. 
        
#         File: {file_path}
        
#         Look for:
#         1. Functions that handle HTTP methods (GET, POST, PUT, DELETE, PATCH)
#         2. Functions that process requests and return responses
        
#         File content:
#         {content}  
        
#         IMPORTANT: Return ONLY valid JSON with this exact structure:
#         {{
#             "apis": [
#                 {{
#                     "name": "function_or_class_name",
#                     "http_method": "GET|POST|PUT|DELETE|PATCH|UNKNOWN",
#                     "description": "Brief description of what this API does",
#                     "content_django": actual function and entire implementation funtion include function signature and content,
#                     "content_dafny": Based on the django function, create a Dafny function specification with preconditions and postconditions assume db schema exists as a dafny type
#                 }}
#             ]
#         }}
        
#         If no APIs are found, return: {{"apis": []}}
#         Do not include any text before or after the JSON.
#         """
        
#         try:
#             completion = client.chat.completions.create(
#                 model=MODEL_NAME,
#                 messages=[
#                     {
#                         "role": "user",
#                         "content": prompt
#                     }
#                 ],
#                 temperature=TEMPERATURE,
#                 max_tokens=MAX_TOKENS,
#                 top_p=TOP_P,
#                 stream=STREAM
#             )
            
#             response = completion.choices[0].message.content.strip()
            
#             # Improved JSON extraction
#             try:
#                 # First, try to parse the entire response as JSON
#                 result = json.loads(response)
#                 if 'apis' in result:
#                     for api in result.get('apis', []):
#                         api['file_path'] = file_path
#                     return result.get('apis', [])
#             except json.JSONDecodeError:
#                 # If that fails, try to find JSON within the response
#                 # Look for content between the first { and last }
#                 start = response.find('{')
#                 end = response.rfind('}')
                
#                 if start != -1 and end != -1 and end > start:
#                     json_str = response[start:end + 1]
#                     try:
#                         result = json.loads(json_str)
#                         if 'apis' in result:
#                             for api in result.get('apis', []):
#                                 api['file_path'] = file_path
#                             return result.get('apis', [])
#                     except json.JSONDecodeError:
#                         print(f"Failed to parse extracted JSON from response for {file_path}")
#                         print(f"Extracted JSON string: {json_str}")
#                         return []
#                 else:
#                     print(f"No JSON structure found in response for {file_path}")
#                     print(f"Response: {response}")
#                     return []
                
#         except Exception as e:
#             print(f"Error calling OpenAI API for {file_path}: {e}")
#             return []
            
#     except Exception as e:
#         print(f"Error reading file {file_path}: {e}")
#         return []

In [6]:
def extract_rest_apis_from_file_with_openai(file_path: str) -> List[Dict[str, Any]]:
    """Use OpenAI to analyze file content and find REST APIs"""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()  
        
        # Skip files that don't contain common Django/API keywords
        if not any(keyword in content.lower() for keyword in ['api', 'view', 'rest', 'http', 'request', 'response', 'serializer']):
            return []

        prompt = f"""
        Analyze this Python file and identify all Django REST API functions or classes. Only return the functions that are REST API endpoints. Only return entire implemenation of the function including function signature and content. 
        
        File: {file_path}
        
        Look for:
        1. Functions that handle HTTP methods (GET, POST, PUT, DELETE, PATCH)
        2. Functions that process requests and return responses
        3. Any other REST API endpoints
        
        File content:
        {content}  # First 4000 chars for analysis
        
        IMPORTANT: Return ONLY valid JSON with this exact structure:
        {{
            "apis": [
                {{
                    "name": "function_or_class_name",
                    "http_method": "GET|POST|PUT|DELETE|PATCH|UNKNOWN",
                    "description": "Brief description of what this API does",
                    "content_django": actual function and entire implementation funtion include function signature and content,
                    "content_dafny": Based on the django function, create a Dafny function specification with preconditions and postconditions assume db schema exists as a dafny type
                }}
            ]
        }}
        
        If no REST APIs are found, return: {{"apis": []}}
        Do not include any text before or after the JSON.
        """
        
        try:
            completion = client.chat.completions.create(
                model=MODEL_NAME,
                messages=[
                    {
                        "role": "user",
                        "content": prompt
                    }
                ],
                temperature=TEMPERATURE,
                max_tokens=MAX_TOKENS,
                top_p=TOP_P,
                stream=STREAM
            )
            
            response = completion.choices[0].message.content.strip()
            
            # Improved JSON extraction
            try:
                # First, try to parse the entire response as JSON
                result = json.loads(response)
                if 'apis' in result:
                    for api in result.get('apis', []):
                        api['file_path'] = file_path
                    return result.get('apis', [])
            except json.JSONDecodeError:
                # If that fails, try to find JSON within the response
                # Look for content between the first { and last }
                start = response.find('{')
                end = response.rfind('}')
                
                if start != -1 and end != -1 and end > start:
                    json_str = response[start:end + 1]
                    try:
                        result = json.loads(json_str)
                        if 'apis' in result:
                            for api in result.get('apis', []):
                                api['file_path'] = file_path
                            return result.get('apis', [])
                    except json.JSONDecodeError:
                        print(f"Failed to parse extracted JSON from response for {file_path}")
                        print(f"Extracted JSON string: {json_str}")
                        return []
                else:
                    print(f"No JSON structure found in response for {file_path}")
                    print(f"Response: {response}")
                    return []
                
        except Exception as e:
            print(f"Error calling OpenAI API for {file_path}: {e}")
            return []
            
    except Exception as e:
        print(f"Error reading file {file_path}: {e}")
        return []

In [7]:
"""Analyze Django project and generate Dafny specifications"""

# Find all Python files
django_files = find_django_files(DJANGO_PROJECT_PATH)
print(f"Found {len(django_files)} Python files")

# Extract REST APIs using OpenAI
all_apis = []
for file_path in tqdm(django_files, desc="Analyzing files"):
    apis = extract_rest_apis_from_file_with_openai(file_path)
    print(f'apis: {apis}')
    all_apis.extend(apis)

Found 4045 Python files


Analyzing files:   0%|          | 1/4045 [00:30<33:56:38, 30.22s/it]

apis: [{'name': 'handle_thumbnail', 'http_method': 'UNKNOWN', 'description': 'Create and return thumbnail for given instance in provided size and format.', 'content_django': 'def handle_thumbnail(request, instance_id: str, size: str, format: str | None = None):\n    """Create and return thumbnail for given instance in provided size and format.\n\n    If the provided size is not in the available resolution list, the thumbnail with\n    the closest available size is created and returned, if it does not exist.\n    """\n    # try to find corresponding instance based on given instance_id\n    try:\n        object_type, pk = from_global_id_or_error(instance_id, raise_error=True)\n    except GraphQLError:\n        return HttpResponseNotFound("Cannot found instance with the given id.")\n\n    if object_type not in TYPE_TO_MODEL_DATA_MAPPING.keys():\n        return HttpResponseNotFound("Invalid instance type.")\n\n    # check formats\n    format = format.lower() if format else None\n    if obj

Analyzing files:   0%|          | 2/4045 [00:33<15:51:00, 14.11s/it]

apis: [{'name': 'jwks', 'http_method': 'GET', 'description': 'Returns JSON Web Key Set (JWKS) for JWT validation.', 'content_django': 'def jwks(request):\n    return JsonResponse(get_jwt_manager().get_jwks())', 'content_dafny': 'method jwks(request: Request) returns (response: JsonResponse)\n  requires request != null\n  ensures response != null\n  ensures response.content == get_jwt_manager().get_jwks()', 'file_path': '/Users/ryanmarr/Documents/saleor/saleor/core/views.py'}]


Analyzing files:   0%|          | 3/4045 [00:39<11:56:10, 10.63s/it]

apis: [{'name': 'handle_plugin_webhook', 'http_method': 'UNKNOWN', 'description': 'Handles a webhook for a specific plugin without a channel.', 'content_django': '@allow_writer()\ndef handle_plugin_webhook(request: SaleorContext, plugin_id: str) -> HttpResponse:\n    manager = get_plugins_manager(allow_replica=False)\n    return manager.webhook_endpoint_without_channel(request, plugin_id)', 'content_dafny': 'method handle_plugin_webhook(request: SaleorContext, plugin_id: string) returns (response: HttpResponse)\n  requires request != null && plugin_id != ""\n  ensures response != null\n{\n  var manager := get_plugins_manager(false);\n  response := manager.webhook_endpoint_without_channel(request, plugin_id);\n}', 'file_path': '/Users/ryanmarr/Documents/saleor/saleor/plugins/views.py'}, {'name': 'handle_global_plugin_webhook', 'http_method': 'UNKNOWN', 'description': 'Handles a global webhook for a specific plugin.', 'content_django': '@allow_writer()\ndef handle_global_plugin_webhook(\

Analyzing files:   0%|          | 4/4045 [00:46<10:28:23,  9.33s/it]

apis: [{'name': 'digital_product', 'http_method': 'GET', 'description': 'Return the direct download link to content if given token is still valid.', 'content_django': 'def digital_product(request, token: str) -> FileResponse | HttpResponseNotFound:\n    """Return the direct download link to content if given token is still valid."""\n\n    qs = DigitalContentUrl.objects.using(\n        settings.DATABASE_CONNECTION_REPLICA_NAME\n    ).prefetch_related("line__order__user")\n    content_url = get_object_or_404(qs, token=token)  # type: DigitalContentUrl\n    if not digital_content_url_is_valid(content_url):\n        return HttpResponseNotFound("Url is not valid anymore")\n\n    digital_content = content_url.content\n    digital_content.content_file.open()\n    opened_file = digital_content.content_file.file\n    filename = os.path.basename(digital_content.content_file.name)\n    file_expr = f\'filename="{filename}"\'\n\n    content_type = mimetypes.guess_type(str(filename))[0]\n    respons

Analyzing files:   0%|          | 5/4045 [01:01<12:36:14, 11.23s/it]

apis: [{'name': 'dispatch', 'http_method': 'GET|POST', 'description': 'Handles HTTP GET and POST requests for the GraphQL API.', 'content_django': 'def dispatch(self, request, *args, **kwargs):\n    # Handle options method the GraphQlView restricts it.\n    if request.method == "GET":\n        if settings.PLAYGROUND_ENABLED:\n            return self.render_playground(request)\n        return HttpResponseNotAllowed(["OPTIONS", "POST"])\n    if request.method == "POST":\n        return self.handle_query(request)\n    if settings.PLAYGROUND_ENABLED:\n        return HttpResponseNotAllowed(["GET", "OPTIONS", "POST"])\n    return HttpResponseNotAllowed(["OPTIONS", "POST"])', 'content_dafny': 'method dispatch(request: HttpRequest) returns (response: HttpResponse)\n    requires request.method in ["GET", "POST"]\n    ensures response != null\n{\n    if request.method == "GET" {\n        if settings.PLAYGROUND_ENABLED {\n            response := render_playground(request);\n        } else {\n    

Analyzing files:   0%|          | 9/4045 [01:02<4:16:34,  3.81s/it] 

apis: []
apis: []


Analyzing files:   0%|          | 11/4045 [01:03<3:08:23,  2.80s/it]

apis: []


Analyzing files:   0%|          | 12/4045 [01:04<2:42:12,  2.41s/it]

apis: []


Analyzing files:   0%|          | 13/4045 [01:05<2:15:18,  2.01s/it]

apis: []
apis: []


Analyzing files:   0%|          | 15/4045 [01:06<1:40:15,  1.49s/it]

apis: []


Analyzing files:   0%|          | 16/4045 [01:07<1:30:09,  1.34s/it]

apis: []


Analyzing files:   0%|          | 17/4045 [01:08<1:26:15,  1.28s/it]

apis: []


Analyzing files:   0%|          | 18/4045 [01:08<1:18:59,  1.18s/it]

apis: []
apis: []
apis: []
apis: []


Analyzing files:   1%|          | 22/4045 [01:10<50:53,  1.32it/s]  

apis: []
apis: []
apis: []


Analyzing files:   1%|          | 25/4045 [01:15<1:07:16,  1.00s/it]

apis: []


Analyzing files:   1%|          | 26/4045 [01:17<1:26:47,  1.30s/it]

apis: []
apis: []


Analyzing files:   1%|          | 28/4045 [01:18<1:08:22,  1.02s/it]

apis: []
apis: []
apis: []
apis: []
apis: []
apis: []
apis: []
apis: []
apis: []
apis: []


Analyzing files:   1%|          | 38/4045 [01:19<24:58,  2.67it/s]  

apis: []


Analyzing files:   1%|          | 39/4045 [01:20<26:35,  2.51it/s]

apis: []


Analyzing files:   1%|          | 40/4045 [01:22<37:31,  1.78it/s]

apis: []
apis: []


Analyzing files:   1%|          | 42/4045 [01:22<34:58,  1.91it/s]

apis: []
apis: []


Analyzing files:   1%|          | 44/4045 [01:27<1:05:47,  1.01it/s]

apis: []


Analyzing files:   1%|          | 45/4045 [01:37<2:38:57,  2.38s/it]

apis: []


Analyzing files:   1%|          | 46/4045 [01:42<3:08:04,  2.82s/it]

apis: []
apis: []


Analyzing files:   1%|          | 48/4045 [01:42<2:11:47,  1.98s/it]

apis: []


Analyzing files:   1%|          | 49/4045 [01:43<1:53:54,  1.71s/it]

apis: []


Analyzing files:   1%|          | 50/4045 [01:59<5:25:50,  4.89s/it]

apis: []


Analyzing files:   1%|▏         | 51/4045 [02:00<4:25:01,  3.98s/it]

apis: []
apis: []


Analyzing files:   1%|▏         | 53/4045 [02:01<2:51:25,  2.58s/it]

apis: []


Analyzing files:   1%|▏         | 54/4045 [02:04<2:49:34,  2.55s/it]

apis: []


Analyzing files:   1%|▏         | 55/4045 [02:08<3:13:01,  2.90s/it]

apis: []


Analyzing files:   1%|▏         | 56/4045 [02:10<3:01:42,  2.73s/it]

apis: []


Analyzing files:   1%|▏         | 57/4045 [02:15<3:48:01,  3.43s/it]

apis: []


Analyzing files:   1%|▏         | 58/4045 [02:22<4:54:56,  4.44s/it]

apis: []
apis: []
apis: []
apis: []


Analyzing files:   2%|▏         | 62/4045 [02:32<3:30:34,  3.17s/it]

apis: []
apis: []
apis: []


Analyzing files:   2%|▏         | 65/4045 [02:34<2:22:29,  2.15s/it]

apis: []


Analyzing files:   2%|▏         | 66/4045 [02:41<3:13:12,  2.91s/it]

apis: []


Analyzing files:   2%|▏         | 67/4045 [02:42<2:54:58,  2.64s/it]

apis: []
apis: []
apis: []


Analyzing files:   2%|▏         | 70/4045 [02:45<2:06:39,  1.91s/it]

apis: []
apis: []
apis: []


Analyzing files:   2%|▏         | 73/4045 [02:49<1:48:04,  1.63s/it]

apis: []
apis: []


Analyzing files:   2%|▏         | 75/4045 [02:49<1:23:43,  1.27s/it]

apis: []


Analyzing files:   2%|▏         | 76/4045 [02:53<1:53:40,  1.72s/it]

apis: []


Analyzing files:   2%|▏         | 77/4045 [02:54<1:41:29,  1.53s/it]

apis: []


Analyzing files:   2%|▏         | 78/4045 [03:03<3:31:24,  3.20s/it]

apis: []


Analyzing files:   2%|▏         | 79/4045 [03:04<2:54:14,  2.64s/it]

apis: []


Analyzing files:   2%|▏         | 80/4045 [03:08<3:15:18,  2.96s/it]

apis: []
apis: []
apis: []


Analyzing files:   2%|▏         | 83/4045 [04:05<12:15:17, 11.14s/it]

apis: []


Analyzing files:   2%|▏         | 84/4045 [04:05<9:56:04,  9.03s/it] 

apis: []


Analyzing files:   2%|▏         | 85/4045 [04:06<7:58:59,  7.26s/it]

apis: []


Analyzing files:   2%|▏         | 86/4045 [04:07<6:22:14,  5.79s/it]

apis: []


Analyzing files:   2%|▏         | 87/4045 [04:26<9:54:24,  9.01s/it]

apis: []
apis: []


Analyzing files:   2%|▏         | 89/4045 [04:30<6:42:49,  6.11s/it]

apis: []


Analyzing files:   2%|▏         | 90/4045 [04:30<5:18:16,  4.83s/it]

apis: []


Analyzing files:   2%|▏         | 91/4045 [04:31<4:12:14,  3.83s/it]

apis: []


Analyzing files:   2%|▏         | 92/4045 [04:35<4:21:03,  3.96s/it]

apis: []
apis: []
apis: []
apis: []


Analyzing files:   2%|▏         | 96/4045 [04:36<1:53:23,  1.72s/it]

apis: []


Analyzing files:   2%|▏         | 97/4045 [04:56<5:21:27,  4.89s/it]

apis: []


Analyzing files:   2%|▏         | 97/4045 [04:57<3:21:50,  3.07s/it]


KeyboardInterrupt: 

In [8]:
def display_full_api_info(apis: List[Dict[str, Any]]):
    """Display complete API information with full Django code and Dafny specs"""
    
    print("=" * 100)
    print("COMPLETE DJANGO REST API ANALYSIS")
    print("=" * 100)
    print(f"Total APIs found: {len(apis)}")
    print()
    
    # Group by file path
    files = {}
    for api in apis:
        file_path = api.get('file_path', 'Unknown')
        if file_path not in files:
            files[file_path] = []
        files[file_path].append(api)
    
    # Display by file
    for file_path, file_apis in files.items():
        print(f"📁 FILE: {file_path}")
        print("=" * 100)
        print(f"APIs found: {len(file_apis)}")
        print()
        
        for i, api in enumerate(file_apis, 1):
            print(f"🔗 API #{i}: {api.get('name', 'Unknown')}")
            print(f"   Method: {api.get('http_method', 'UNKNOWN')}")
            print(f"   Description: {api.get('description', 'No description')}")
            print()
            
            print("📝 DJANGO CODE:")
            print("-" * 50)
            django_code = api.get('content_django', 'No Django code available')
            print(django_code)
            print()
            
            print("🔬 DAFNY SPECIFICATION:")
            print("-" * 50)
            dafny_spec = api.get('content_dafny', 'No Dafny specification available')
            print(dafny_spec)
            print()
            
            print("─" * 100)
            print()
    
    # Summary statistics
    print("=" * 100)
    print("SUMMARY STATISTICS")
    print("=" * 100)
    
    # HTTP method distribution
    methods = {}
    for api in apis:
        method = api.get('http_method', 'UNKNOWN')
        methods[method] = methods.get(method, 0) + 1
    
    print("HTTP Methods:")
    for method, count in sorted(methods.items()):
        print(f"  {method}: {count}")
    
    print()
    print(f"Files analyzed: {len(files)}")
    print(f"Total APIs: {len(apis)}")

In [9]:
display_full_api_info(all_apis)

COMPLETE DJANGO REST API ANALYSIS
Total APIs found: 8

📁 FILE: /Users/ryanmarr/Documents/saleor/saleor/thumbnail/views.py
APIs found: 1

🔗 API #1: handle_thumbnail
   Method: UNKNOWN
   Description: Create and return thumbnail for given instance in provided size and format.

📝 DJANGO CODE:
--------------------------------------------------
def handle_thumbnail(request, instance_id: str, size: str, format: str | None = None):
    """Create and return thumbnail for given instance in provided size and format.

    If the provided size is not in the available resolution list, the thumbnail with
    the closest available size is created and returned, if it does not exist.
    """
    # try to find corresponding instance based on given instance_id
    try:
        object_type, pk = from_global_id_or_error(instance_id, raise_error=True)
    except GraphQLError:
        return HttpResponseNotFound("Cannot found instance with the given id.")

    if object_type not in TYPE_TO_MODEL_DATA_MAPPING