In [1]:
import os
from groq import Groq
import re
from typing import List, Dict, Any
import yaml
from tqdm import tqdm
import json
from openai import OpenAI

In [None]:
API_KEY = ""
DJANGO_PROJECT_PATH = "/Users/ryanmarr/Documents/sentry"
MODEL_NAME = "gpt-4o"
MAX_TOKENS = 2048
TEMPERATURE = 0.1
TOP_P = 1
STREAM = False
MAX_TOKENS = 30000

In [3]:
# Groq client setup
client = OpenAI(api_key=API_KEY)

In [4]:
def find_django_files(directory: str) -> List[str]:
    """Perform DFS to find all Python files in Django project"""
    django_files = []
    priority_files = []

    for root, dirs, files in os.walk(directory):
        # Skip common directories that don't contain Django code
        dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ['node_modules', '__pycache__', 'venv', 'env', '.git']]
        
        for file in files:
            if file == 'views.py':
                priority_files.append(os.path.join(root, file))
            elif file.endswith('.py'):
                file_path = os.path.join(root, file)
                django_files.append(file_path)

    return priority_files + django_files

In [6]:
def expand_local_imports(file_path: str, visited_files: set = None) -> str:
    """Recursively expand all local imports in a Python file"""
    if not file_path.endswith('.py'):
        return None
    
    # Prevent infinite recursion by tracking visited files
    if visited_files is None:
        visited_files = set()
    
    if file_path in visited_files:
        return f"# Circular import detected: {file_path}"
    
    visited_files.add(file_path)
    
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
        
        # Get the directory of the current file
        file_dir = os.path.dirname(os.path.abspath(file_path))
        
        # Find all import statements
        import_patterns = [
            r'^from\s+([a-zA-Z_][a-zA-Z0-9_.]*)\s+import\s+([a-zA-Z_][a-zA-Z0-9_,\s*]*)',
            r'^import\s+([a-zA-Z_][a-zA-Z0-9_.]*)',
            r'^from\s+([a-zA-Z_][a-zA-Z0-9_.]*)\s+import\s*\*'
        ]
        
        expanded_content = content
        
        for line_num, line in enumerate(content.split('\n')):
            original_line = line
            line = line.strip()
            
            if line.startswith(('from', 'import')) and not line.startswith('#'):
                
                for pattern in import_patterns:
                    match = re.match(pattern, line)
                    if match:
                        module_name = match.group(1)
                        
                        # Check if it's a local import (same directory or subdirectories)
                        possible_paths = [
                            os.path.join(file_dir, f"{module_name}.py"),
                            os.path.join(file_dir, module_name, "__init__.py"),
                            os.path.join(file_dir, module_name, f"{module_name}.py")
                        ]
                        
                        # Also check parent directories recursively
                        current_dir = file_dir
                        for _ in range(3):  # Go up 3 levels max to avoid going too far
                            possible_paths.extend([
                                os.path.join(current_dir, f"{module_name}.py"),
                                os.path.join(current_dir, module_name, "__init__.py"),
                                os.path.join(current_dir, module_name, f"{module_name}.py")
                            ])
                            current_dir = os.path.dirname(current_dir)
                            if current_dir == os.path.dirname(current_dir):  # Reached root
                                break
                        
                        # Find the first existing local file
                        local_file_path = None
                        for path in possible_paths:
                            if os.path.exists(path):
                                local_file_path = path
                                break
                        
                        if local_file_path:
                            print(f"Expanding local import: {module_name} -> {local_file_path}")
                            
                            # Recursively expand the local file
                            local_content = expand_local_imports(local_file_path, visited_files.copy())
                            if local_content:
                                # Replace the import with the actual code
                                replacement = f"\n# {'='*50}\n# Expanded from: {local_file_path}\n# {'='*50}\n{local_content}\n# {'='*50}\n"
                                expanded_content = expanded_content.replace(original_line, replacement)
                        
                        # Handle external packages (simplified)
                        else:
                            try:
                                import importlib.util
                                spec = importlib.util.find_spec(module_name)
                                if spec and spec.origin and os.path.exists(spec.origin):
                                    with open(spec.origin, 'r', encoding='utf-8') as f:
                                        package_source = f.read()
                                    replacement = f"\n# {'='*50}\n# External package: {module_name}\n# {'='*50}\n{package_source}\n# {'='*50}\n"
                                    expanded_content = expanded_content.replace(original_line, replacement)
                            except:
                                # Comment out external imports we can't expand
                                expanded_content = expanded_content.replace(original_line, f"# {original_line} # External import - not expanded")
        
        return expanded_content
        
    except Exception as e:
        print(f"Error expanding imports in {file_path}: {e}")
        return None

In [7]:
file_path = '/Users/ryanmarr/Documents/sentry/src/sentry/utils/assets.py'
content = expand_local_imports(file_path)
#print(f'content: {content}')

In [None]:
def extract_rest_apis_from_file_with_openai(file_path: str) -> List[Dict[str, Any]]:
    """Use OpenAI to analyze file content and find REST APIs"""
    try:

        if file_path.endswith('.py'):
            content = expand_local_imports(file_path)

        # Skip files that don't contain common Django/API keywords
        # if not any(keyword in content.lower() for keyword in ['api', 'view', 'rest', 'http', 'request', 'response', 'serializer']):
        #     return []

        prompt = f"""
        Analyze this Python file and identify all Django REST API functions or classes. Only return the functions that are REST API endpoints. 
        Only return entire implemenation of the function including function signature and content. 
        
        File: {file_path}
        
        Look for:
        1. Functions that handle HTTP methods (GET, POST, PUT, DELETE, PATCH)
        2. Functions that process requests and return responses
        3. Any other REST API endpoints
        
        File content:
        {content[:4000]}  # First 4000 chars for analysis
        
        IMPORTANT: Return ONLY valid JSON with this exact structure:
        {{
            "apis": [
                {{
                    "name": "function_or_class_name",
                    "http_method": "GET|POST|PUT|DELETE|PATCH|UNKNOWN",
                    "description": "Brief description of what this API does",
                    "content_django": actual function and entire implementation funtion include function signature and content,
                    "content_dafny": Based on the django function, create a Dafny function specification with preconditions and postconditions assume db schema exists as a dafny type
                }}
            ]
        }}
        
        If no REST APIs are found, return: {{"apis": []}}
        Do not include any text before or after the JSON.
        """
        
        try:
            completion = client.chat.completions.create(
                model=MODEL_NAME,
                messages=[
                    {
                        "role": "user",
                        "content": prompt
                    }
                ],
                temperature=TEMPERATURE,
                max_tokens=MAX_TOKENS,
                top_p=TOP_P,
                stream=STREAM
            )
            
            response = completion.choices[0].message.content.strip()
            
            # Improved JSON extraction
            try:
                # First, try to parse the entire response as JSON
                result = json.loads(response)
                if 'apis' in result:
                    for api in result.get('apis', []):
                        api['file_path'] = file_path
                    return result.get('apis', [])
            except json.JSONDecodeError:
                # If that fails, try to find JSON within the response
                # Look for content between the first { and last }
                start = response.find('{')
                end = response.rfind('}')
                
                if start != -1 and end != -1 and end > start:
                    json_str = response[start:end + 1]
                    try:
                        result = json.loads(json_str)
                        if 'apis' in result:
                            for api in result.get('apis', []):
                                api['file_path'] = file_path
                            return result.get('apis', [])
                    except json.JSONDecodeError:
                        print(f"Failed to parse extracted JSON from response for {file_path}")
                        print(f"Extracted JSON string: {json_str}")
                        return []
                else:
                    print(f"No JSON structure found in response for {file_path}")
                    print(f"Response: {response}")
                    return []
                
        except Exception as e:
            print(f"Error calling OpenAI API for {file_path}: {e}")
            return []
            
    except Exception as e:
        print(f"Error reading file {file_path}: {e}")
        return []

In [9]:
"""Analyze Django project and generate Dafny specifications"""

# Find all Python files
django_files = find_django_files(DJANGO_PROJECT_PATH)
print(f"Found {len(django_files)} Python files")

Found 6673 Python files


In [None]:
#django_files

['/Users/ryanmarr/Documents/sentry/django_migration_demo/myapp/views.py',
 '/Users/ryanmarr/Documents/sentry/src/social_auth/views.py',
 '/Users/ryanmarr/Documents/sentry/src/sudo/views.py',
 '/Users/ryanmarr/Documents/sentry/src/sentry/auth/providers/fly/views.py',
 '/Users/ryanmarr/Documents/sentry/src/sentry/auth/providers/google/views.py',
 '/Users/ryanmarr/Documents/sentry/src/sentry/auth/providers/saml2/views.py',
 '/Users/ryanmarr/Documents/sentry/src/sentry/auth/providers/saml2/generic/views.py',
 '/Users/ryanmarr/Documents/sentry/src/sentry/auth/providers/github/views.py',
 '/Users/ryanmarr/Documents/sentry/migration_to_sql.py',
 '/Users/ryanmarr/Documents/sentry/django_migration_demo/manage.py',
 '/Users/ryanmarr/Documents/sentry/django_migration_demo/myproject/asgi.py',
 '/Users/ryanmarr/Documents/sentry/django_migration_demo/myproject/__init__.py',
 '/Users/ryanmarr/Documents/sentry/django_migration_demo/myproject/settings.py',
 '/Users/ryanmarr/Documents/sentry/django_migr

In [11]:
# Extract REST APIs using OpenAI
all_apis = []
for file_path in tqdm(django_files, desc="Analyzing files"):
    apis = extract_rest_apis_from_file_with_openai(file_path)
    # print(f'apis: {apis}')
    all_apis.extend(apis)

Analyzing files:   0%|          | 3/6673 [00:26<16:16:46,  8.79s/it]

Error calling OpenAI API for /Users/ryanmarr/Documents/sentry/src/sudo/views.py: Error code: 429 - {'error': {'message': 'Request too large for gpt-4o in organization org-MaKplQDRjBAzeGPYf6lsMrHF on tokens per min (TPM): Limit 30000, Requested 55486. The input or output tokens must be reduced in order to run successfully. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}


Analyzing files:   0%|          | 23/6673 [04:20<3:38:21,  1.97s/it] 

Error calling OpenAI API for /Users/ryanmarr/Documents/sentry/tools/flake8_plugin.py: Error code: 429 - {'error': {'message': 'Request too large for gpt-4o in organization org-MaKplQDRjBAzeGPYf6lsMrHF on tokens per min (TPM): Limit 30000, Requested 36826. The input or output tokens must be reduced in order to run successfully. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}


Analyzing files:   0%|          | 24/6673 [04:22<3:36:58,  1.96s/it]

Error calling OpenAI API for /Users/ryanmarr/Documents/sentry/tools/lib.py: Error code: 429 - {'error': {'message': 'Request too large for gpt-4o in organization org-MaKplQDRjBAzeGPYf6lsMrHF on tokens per min (TPM): Limit 30000, Requested 36485. The input or output tokens must be reduced in order to run successfully. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}


Analyzing files:   0%|          | 25/6673 [04:24<3:36:58,  1.96s/it]

Error calling OpenAI API for /Users/ryanmarr/Documents/sentry/tools/docker_memory_check.py: Error code: 429 - {'error': {'message': 'Request too large for gpt-4o in organization org-MaKplQDRjBAzeGPYf6lsMrHF on tokens per min (TPM): Limit 30000, Requested 34392. The input or output tokens must be reduced in order to run successfully. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}


Analyzing files:   0%|          | 26/6673 [04:26<3:49:45,  2.07s/it]

Error calling OpenAI API for /Users/ryanmarr/Documents/sentry/tools/freeze_requirements.py: Error code: 429 - {'error': {'message': 'Request too large for gpt-4o in organization org-MaKplQDRjBAzeGPYf6lsMrHF on tokens per min (TPM): Limit 30000, Requested 71794. The input or output tokens must be reduced in order to run successfully. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}


Analyzing files:   0%|          | 26/6673 [05:23<22:57:14, 12.43s/it]


KeyboardInterrupt: 

In [None]:
all_apis

In [None]:
def display_full_api_info(apis: List[Dict[str, Any]]):
    """Display complete API information with full Django code and Dafny specs"""
    
    print("=" * 100)
    print("COMPLETE DJANGO REST API ANALYSIS")
    print("=" * 100)
    print(f"Total APIs found: {len(apis)}")
    print()
    
    # Group by file path
    files = {}
    for api in apis:
        file_path = api.get('file_path', 'Unknown')
        if file_path not in files:
            files[file_path] = []
        files[file_path].append(api)
    
    # Display by file
    for file_path, file_apis in files.items():
        print(f"📁 FILE: {file_path}")
        print("=" * 100)
        print(f"APIs found: {len(file_apis)}")
        print()
        
        for i, api in enumerate(file_apis, 1):
            print(f"🔗 API #{i}: {api.get('name', 'Unknown')}")
            print(f"   Method: {api.get('http_method', 'UNKNOWN')}")
            print(f"   Description: {api.get('description', 'No description')}")
            print()
            
            print("📝 DJANGO CODE:")
            print("-" * 50)
            django_code = api.get('content_django', 'No Django code available')
            print(django_code)
            print()
            
            print("🔬 DAFNY SPECIFICATION:")
            print("-" * 50)
            dafny_spec = api.get('content_dafny', 'No Dafny specification available')
            print(dafny_spec)
            print()
            
            print("─" * 100)
            print()
    
    # Summary statistics
    print("=" * 100)
    print("SUMMARY STATISTICS")
    print("=" * 100)
    
    # HTTP method distribution
    methods = {}
    for api in apis:
        method = api.get('http_method', 'UNKNOWN')
        methods[method] = methods.get(method, 0) + 1
    
    print("HTTP Methods:")
    for method, count in sorted(methods.items()):
        print(f"  {method}: {count}")
    
    print()
    print(f"Files analyzed: {len(files)}")
    print(f"Total APIs: {len(apis)}")

In [None]:
display_full_api_info(all_apis)