In [1]:
import os
from groq import Groq
import re
from typing import List, Dict, Any
import yaml
from tqdm import tqdm
import json

In [None]:
API_KEY = ""
DJANGO_PROJECT_PATH = "/Users/ryanmarr/Documents/saleor"
MODEL_NAME = "deepseek-r1-distill-llama-70b"
MAX_TOKENS = 2048
TEMPERATURE = 0.1
TOP_P = 1
STREAM = False

In [None]:
# Groq client setup
client = OpenAI(api_key=API_KEY)

In [4]:
def find_django_files(directory: str) -> List[str]:
    """Perform DFS to find all Python files in Django project"""
    django_files = []
    
    for root, dirs, files in os.walk(directory):
        # Skip common directories that don't contain Django code
        dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ['node_modules', '__pycache__', 'venv', 'env', '.git']]
        
        for file in files:
            if file.endswith('.py'):
                file_path = os.path.join(root, file)
                django_files.append(file_path)
    
    return django_files

In [None]:
def extract_rest_apis_from_file_with_openai(file_path: str) -> List[Dict[str, Any]]:
    """Use OpenAI to analyze file content and find REST APIs"""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()  
        
        # Skip files that don't contain common Django/API keywords
        if not any(keyword in content.lower() for keyword in ['api', 'view', 'rest', 'http', 'request', 'response', 'serializer']):
            return []

        # Limit content to avoid token limits
        content = content[:8000]  # OpenAI can handle more content
        
        prompt = f"""
        Analyze this Python file and identify all Django REST API functions or classes. Only return the functions that are REST API endpoints. Only return entire implementation of the function including function signature and content. 
        
        File: {file_path}
        
        Look for:
        1. Functions that handle HTTP methods (GET, POST, PUT, DELETE, PATCH)
        2. Functions that process requests and return responses
        3. Any other REST API endpoints
        
        File content:
        {content}
        
        IMPORTANT: Return ONLY valid JSON with this exact structure:
        {{
            "apis": [
                {{
                    "name": "function_or_class_name",
                    "http_method": "GET|POST|PUT|DELETE|PATCH|UNKNOWN",
                    "description": "Brief description of what this API does",
                    "content_django": "actual function and entire implementation function include function signature and content",
                    "content_dafny": "Based on the django function, create a Dafny function specification with preconditions and postconditions assume db schema exists as a dafny type"
                }}
            ]
        }}
        
        If no REST APIs are found, return: {{"apis": []}}
        Do not include any text before or after the JSON.
        """
        
        try:
            completion = client.chat.completions.create(
                model=MODEL_NAME,
                messages=[
                    {
                        "role": "user",
                        "content": prompt
                    }
                ],
                temperature=TEMPERATURE,
                max_tokens=MAX_TOKENS,
                top_p=TOP_P,
                stream=STREAM
            )
            
            response = completion.choices[0].message.content.strip()
            
            # Try to extract JSON from the response
            json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', response, re.DOTALL)
            if json_match:
                json_str = json_match.group(0)
                try:
                    result = json.loads(json_str)
                    for api in result.get('apis', []):
                        api['file_path'] = file_path
                    return result.get('apis', [])
                    
                except json.JSONDecodeError:
                    print(f"Failed to parse JSON from response for {file_path}")
                    print(f"Response: {response}")
                    return []
            else:
                print(f"No JSON found in response for {file_path}")
                print(f"Response: {response}")
                return []
                
        except Exception as e:
            print(f"Error calling OpenAI API for {file_path}: {e}")
            return []
            
    except Exception as e:
        print(f"Error reading file {file_path}: {e}")
        return []

In [6]:
# Main execution function for notebook
"""Analyze Django project and generate Dafny specifications"""

#print(f"Analyzing Django project at: {django_project_path}")

# Find all Python files
django_files = find_django_files(DJANGO_PROJECT_PATH)
print(f"Found {len(django_files)} Python files")

# Extract REST APIs
all_apis = []
for file_path in tqdm(django_files, desc="Analyzing files"):
    apis = extract_rest_apis_from_file_with_groq(file_path)
    all_apis.extend(apis)

Found 4045 Python files


Analyzing files:   0%|          | 4/4045 [00:01<28:57,  2.33it/s]

Error calling Groq API for /Users/ryanmarr/Documents/saleor/saleor/settings.py: Error code: 413 - {'error': {'message': 'Request too large for model `deepseek-r1-distill-llama-70b` in organization `org_01k1p9jeq1eprawa9nne146kdc` service tier `on_demand` on tokens per minute (TPM): Limit 6000, Requested 10782, please reduce your message size and try again. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


Analyzing files:   0%|          | 17/4045 [01:05<4:59:36,  4.46s/it] 

Error calling Groq API for /Users/ryanmarr/Documents/saleor/saleor/webhook/payloads.py: Error code: 413 - {'error': {'message': 'Request too large for model `deepseek-r1-distill-llama-70b` in organization `org_01k1p9jeq1eprawa9nne146kdc` service tier `on_demand` on tokens per minute (TPM): Limit 6000, Requested 13265, please reduce your message size and try again. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
Error calling Groq API for /Users/ryanmarr/Documents/saleor/saleor/webhook/event_types.py: Error code: 413 - {'error': {'message': 'Request too large for model `deepseek-r1-distill-llama-70b` in organization `org_01k1p9jeq1eprawa9nne146kdc` service tier `on_demand` on tokens per minute (TPM): Limit 6000, Requested 9046, please reduce your message size and try again. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_

Analyzing files:   1%|          | 44/4045 [03:46<7:59:40,  7.19s/it] 

Error calling Groq API for /Users/ryanmarr/Documents/saleor/saleor/webhook/transport/asynchronous/transport.py: Error code: 413 - {'error': {'message': 'Request too large for model `deepseek-r1-distill-llama-70b` in organization `org_01k1p9jeq1eprawa9nne146kdc` service tier `on_demand` on tokens per minute (TPM): Limit 6000, Requested 8290, please reduce your message size and try again. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


Analyzing files:   2%|▏         | 78/4045 [10:43<9:47:48,  8.89s/it] 

Error calling Groq API for /Users/ryanmarr/Documents/saleor/saleor/webhook/tests/test_webhook_payloads.py: Error code: 413 - {'error': {'message': 'Request too large for model `deepseek-r1-distill-llama-70b` in organization `org_01k1p9jeq1eprawa9nne146kdc` service tier `on_demand` on tokens per minute (TPM): Limit 6000, Requested 27405, please reduce your message size and try again. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


Analyzing files:   2%|▏         | 81/4045 [11:53<16:36:37, 15.09s/it]

Error calling Groq API for /Users/ryanmarr/Documents/saleor/saleor/webhook/tests/test_tasks.py: Error code: 413 - {'error': {'message': 'Request too large for model `deepseek-r1-distill-llama-70b` in organization `org_01k1p9jeq1eprawa9nne146kdc` service tier `on_demand` on tokens per minute (TPM): Limit 6000, Requested 9361, please reduce your message size and try again. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


Analyzing files:   2%|▏         | 91/4045 [13:23<8:45:36,  7.98s/it] 

Error calling Groq API for /Users/ryanmarr/Documents/saleor/saleor/webhook/tests/response_schemas/test_transaction.py: Error code: 413 - {'error': {'message': 'Request too large for model `deepseek-r1-distill-llama-70b` in organization `org_01k1p9jeq1eprawa9nne146kdc` service tier `on_demand` on tokens per minute (TPM): Limit 6000, Requested 9200, please reduce your message size and try again. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


Analyzing files:   2%|▏         | 96/4045 [14:04<8:31:43,  7.77s/it] 

Error calling Groq API for /Users/ryanmarr/Documents/saleor/saleor/webhook/tests/response_schemas/test_payment.py: Error code: 413 - {'error': {'message': 'Request too large for model `deepseek-r1-distill-llama-70b` in organization `org_01k1p9jeq1eprawa9nne146kdc` service tier `on_demand` on tokens per minute (TPM): Limit 6000, Requested 6843, please reduce your message size and try again. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


Analyzing files:   2%|▏         | 100/4045 [15:10<13:58:17, 12.75s/it]

Error calling Groq API for /Users/ryanmarr/Documents/saleor/saleor/webhook/tests/subscription_webhooks/test_create_deliveries_for_taxes.py: Error code: 413 - {'error': {'message': 'Request too large for model `deepseek-r1-distill-llama-70b` in organization `org_01k1p9jeq1eprawa9nne146kdc` service tier `on_demand` on tokens per minute (TPM): Limit 6000, Requested 14884, please reduce your message size and try again. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


Analyzing files:   3%|▎         | 104/4045 [15:21<7:29:29,  6.84s/it] 

Error calling Groq API for /Users/ryanmarr/Documents/saleor/saleor/webhook/tests/subscription_webhooks/test_create_deliveries_for_subscription.py: Error code: 413 - {'error': {'message': 'Request too large for model `deepseek-r1-distill-llama-70b` in organization `org_01k1p9jeq1eprawa9nne146kdc` service tier `on_demand` on tokens per minute (TPM): Limit 6000, Requested 28295, please reduce your message size and try again. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


Analyzing files:   3%|▎         | 112/4045 [17:11<13:24:13, 12.27s/it]

Error calling Groq API for /Users/ryanmarr/Documents/saleor/saleor/webhook/tests/subscription_webhooks/subscription_queries.py: Error code: 413 - {'error': {'message': 'Request too large for model `deepseek-r1-distill-llama-70b` in organization `org_01k1p9jeq1eprawa9nne146kdc` service tier `on_demand` on tokens per minute (TPM): Limit 6000, Requested 12199, please reduce your message size and try again. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


Analyzing files:   3%|▎         | 116/4045 [17:39<9:57:56,  9.13s/it] 


KeyboardInterrupt: 

In [7]:
all_apis

[{'name': 'GraphQLView',
  'http_method': 'UNKNOWN',
  'description': 'GraphQL API endpoint for handling GraphQL queries.',
  'content_django': 'GraphQLView.as_view(backend=backend, schema=schema)',
  'content_dafny': '// Dafny function specification for GraphQLView\n// This function handles GraphQL queries and mutations.\nmethod GraphQLView(backend: DatabaseBackend, schema: GraphQLSchema) returns (response: HttpResponse)\n    requires backend != null;\n    requires schema != null;\n    ensures response != null;',
  'file_path': '/Users/ryanmarr/Documents/saleor/saleor/urls.py'},
 {'name': 'digital_product',
  'http_method': 'GET',
  'description': 'Handles requests for digital product downloads.',
  'content_django': 'def digital_product(request, token):\n    # Implementation details would be here\n    pass',
  'content_dafny': '// Dafny function specification for digital_product\n// This function retrieves a digital product based on the provided token.\nmethod digital_product(request

In [None]:
def display_full_api_info(apis: List[Dict[str, Any]]):
    """Display complete API information with full Django code and Dafny specs"""
    
    print("=" * 100)
    print("COMPLETE DJANGO REST API ANALYSIS")
    print("=" * 100)
    print(f"Total APIs found: {len(apis)}")
    print()
    
    # Group by file path
    files = {}
    for api in apis:
        file_path = api.get('file_path', 'Unknown')
        if file_path not in files:
            files[file_path] = []
        files[file_path].append(api)
    
    # Display by file
    for file_path, file_apis in files.items():
        print(f"📁 FILE: {file_path}")
        print("=" * 100)
        print(f"APIs found: {len(file_apis)}")
        print()
        
        for i, api in enumerate(file_apis, 1):
            print(f"🔗 API #{i}: {api.get('name', 'Unknown')}")
            print(f"   Method: {api.get('http_method', 'UNKNOWN')}")
            print(f"   Description: {api.get('description', 'No description')}")
            print()
            
            print("📝 DJANGO CODE:")
            print("-" * 50)
            django_code = api.get('content_django', 'No Django code available')
            print(django_code)
            print()
            
            print("🔬 DAFNY SPECIFICATION:")
            print("-" * 50)
            dafny_spec = api.get('content_dafny', 'No Dafny specification available')
            print(dafny_spec)
            print()
            
            print("─" * 100)
            print()
    
    # Summary statistics
    print("=" * 100)
    print("SUMMARY STATISTICS")
    print("=" * 100)
    
    # HTTP method distribution
    methods = {}
    for api in apis:
        method = api.get('http_method', 'UNKNOWN')
        methods[method] = methods.get(method, 0) + 1
    
    print("HTTP Methods:")
    for method, count in sorted(methods.items()):
        print(f"  {method}: {count}")
    
    print()
    print(f"Files analyzed: {len(files)}")
    print(f"Total APIs: {len(apis)}")

In [None]:
display_full_api_info(all_apis)