In [None]:
import os
from groq import Groq
import re
from typing import List, Dict, Any
import yaml
from tqdm import tqdm

In [None]:
API_KEY = ""

In [None]:
# from groq import Groq

# client = Groq(api_key="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")
# completion = client.chat.completions.create(
#     model="meta-llama/llama-4-scout-17b-16e-instruct",
#     messages=[
#       {
#         "role": "user",
#         "content": "Explain the importance of fast language models"
#       }
#     ],
#     temperature=1,
#     max_completion_tokens=1024,
#     top_p=1,
#     stream=True,
#     stop=None,
# )

# for chunk in completion:
#     print(chunk.choices[0].delta.content or "", end="")


In [None]:
import os
import re
import json
from typing import List, Dict, Any
from groq import Groq
import sys

In [None]:
# Groq client setup
client = Groq(api_key=API_KEY)

In [None]:
def find_django_files(directory: str) -> List[str]:
    """Perform DFS to find all Python files in Django project"""
    django_files = []
    
    for root, dirs, files in os.walk(directory):
        # Skip common directories that don't contain Django code
        dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ['node_modules', '__pycache__', 'venv', 'env', '.git']]
        
        for file in files:
            if file.endswith('.py'):
                file_path = os.path.join(root, file)
                django_files.append(file_path)
    
    return django_files

In [None]:
def extract_rest_apis_from_file_with_groq(file_path: str) -> List[Dict[str, Any]]:
    """Use Groq to analyze file content and find REST APIs"""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
        
        # Skip files that are too large or don't seem relevant
        if len(content) > 50000:  # Skip very large files
            return []
        
        # Skip files that don't contain common Django/API keywords
        if not any(keyword in content.lower() for keyword in ['api', 'view', 'rest', 'http', 'request', 'response', 'serializer']):
            return []
        
        prompt = f"""
        Analyze this Python file and identify all Django REST API functions or classes.
        
        File: {file_path}
        
        Look for:
        1. Functions that handle HTTP methods (GET, POST, PUT, DELETE, PATCH)
        2. Functions that process requests and return responses
        3. Any other REST API endpoints
        
        File content:
        {content}  # First 4000 chars for analysis
        
        IMPORTANT: Return ONLY valid JSON with this exact structure:
        {{
            "apis": [
                {{
                    "name": "function_or_class_name",
                    "http_method": "GET|POST|PUT|DELETE|PATCH|UNKNOWN",
                    "description": "Brief description of what this API does",
                    "content": actual function and implementation
                }}
            ]
        }}
        
        If no REST APIs are found, return: {{"apis": []}}
        Do not include any text before or after the JSON.
        """
        
        try:
            completion = client.chat.completions.create(
                model="deepseek-r1-distill-llama-70b",
                messages=[
                    {
                        "role": "user",
                        "content": prompt
                    }
                ],
                temperature=0.1,  # Lower temperature for more consistent JSON
                max_tokens=1024,
                top_p=1,
                stream=False
            )
            
            response = completion.choices[0].message.content.strip()
            # Try to extract JSON from the response
            json_match = re.search(r'\{.*\}', response, re.DOTALL)
            if json_match:
                json_str = json_match.group(0)
                try:
                    result = json.loads(json_str)
                    return result
                    # print(f'result: {result}')
                    # apis = []
                    
                    # for api_info in result.get('apis', []):
                    #     apis.append({
                    #         'name': api_info.get('name', 'unknown'),
                    #         'file': file_path,
                    #         'content': content,
                    #         'line': api_info.get('line_number', 1),
                    #         'type': api_info.get('type', 'function'),
                    #         'http_method': api_info.get('http_method', 'UNKNOWN'),
                    #         'description': api_info.get('description', '')
                    #     })
                    
                    # return apis
                    
                except json.JSONDecodeError:
                    print(f"Failed to parse JSON from response for {file_path}")
                    print(f"Response: {response}")
                    print(f"json_match: {json_match}")
                    return []
            else:
                print(f"No JSON found in response for {file_path}")
                print(f"Response: {response}")
                return []
                
        except Exception as e:
            print(f"Error calling Groq API for {file_path}: {e}")
            return []
            
    except Exception as e:
        print(f"Error reading file {file_path}: {e}")
        return [] 

In [None]:
def analyze_api_with_groq(api_info: Dict[str, Any]) -> Dict[str, Any]:
    """Use Groq to analyze the API and generate Dafny specifications"""
    
    prompt = f"""
    Analyze this Django REST API function and create Dafny preconditions and postconditions.
    
    File: {api_info['file']}
    Function: {api_info['name']}
    
    Code context:
    {api_info['content'][:2000]}  # First 2000 chars for context
    
    Please identify:
    1. What HTTP method this API handles (GET, POST, PUT, DELETE, etc.)
    2. What parameters it accepts
    3. What it returns
    4. What database operations it performs
    5. What business logic it implements
    
    Then create a Dafny function specification with:
    - Preconditions (requires clauses)
    - Postconditions (ensures clauses)
    - Assume database types are already defined in Dafny
    
    Return the result as JSON with this structure:
    {{
        "http_method": "GET|POST|PUT|DELETE|PATCH",
        "parameters": ["param1", "param2"],
        "return_type": "string|int|bool|object",
        "database_operations": ["read", "write", "update", "delete"],
        "dafny_function": {{
            "name": "function_name",
            "preconditions": ["requires clause1", "requires clause2"],
            "postconditions": ["ensures clause1", "ensures clause2"]
        }}
    }}
    """
    
    try:
        completion = client.chat.completions.create(
            model="meta-llama/llama-4-scout-17b-16e-instruct",
            messages=[
                {
                    "role": "user",
                    "content": prompt
                }
            ],
            temperature=0.3,
            max_tokens=2048,
            top_p=1,
            stream=False
        )
        
        response = completion.choices[0].message.content
        
        # Try to parse JSON response
        try:
            return json.loads(response)
        except json.JSONDecodeError:
            # If JSON parsing fails, create a basic structure
            return {
                "http_method": "UNKNOWN",
                "parameters": [],
                "return_type": "unknown",
                "database_operations": [],
                "dafny_function": {
                    "name": api_info['name'],
                    "preconditions": ["requires true"],
                    "postconditions": ["ensures true"]
                },
                "raw_response": response
            }
            
    except Exception as e:
        print(f"Error calling Groq API: {e}")
        return {
            "http_method": "ERROR",
            "parameters": [],
            "return_type": "error",
            "database_operations": [],
            "dafny_function": {
                "name": api_info['name'],
                "preconditions": ["requires true"],
                "postconditions": ["ensures true"]
            }
        }

In [None]:
def generate_dafny_content(apis_analysis: List[Dict[str, Any]]) -> str:
    """Generate Dafny content with all the function specifications"""
    
    dafny_content = f"""// Generated Dafny functions from Django REST APIs
    // Database types are assumed to be already defined
    """
        
    for api in apis_analysis:
        if 'dafny_function' in api:
            func = api['dafny_function']
            dafny_content += f"""
            // API: {api.get('http_method', 'UNKNOWN')} - {func['name']}
            // File: {api.get('file', 'unknown')}
            function {func['name']}(
                // Parameters would be defined here based on API analysis
            ) returns (result: string)
            {{
                // Preconditions
            """
            
            for precond in func.get('preconditions', []):
                dafny_content += f"    {precond}\n"
            
            dafny_content += "    // Postconditions\n"
            for postcond in func.get('postconditions', []):
                dafny_content += f"    {postcond}\n"
            
            dafny_content += """
    // Implementation would go here
    // For now, just return a placeholder
    result := "placeholder"
}
"""
    
    return dafny_content

In [None]:
# Example usage for notebook
django_project_path = "/Users/ryanmarr/Documents/saleor"

In [None]:
# Main execution function for notebook
"""Analyze Django project and generate Dafny specifications"""

print(f"Analyzing Django project at: {django_project_path}")

# Find all Python files
django_files = find_django_files(django_project_path)
print(f"Found {len(django_files)} Python files")

# Extract REST APIs
all_apis = []
for file_path in tqdm(django_files, desc="Analyzing files"):
    apis = extract_rest_apis_from_file_with_groq(file_path)
    all_apis.extend(apis)

In [None]:
print(f"Found {len(all_apis)} potential REST APIs")

# Analyze each API with Groq
apis_analysis = []
for i, api in enumerate(all_apis):
    print(f"Analyzing API {i+1}/{len(all_apis)}: {api['name']}")
    analysis = analyze_api_with_groq(api)
    analysis['original_api'] = api
    apis_analysis.append(analysis)

# Generate Dafny content
dafny_content = generate_dafny_content(apis_analysis)

# Save to files
with open('generated_dafny_functions.dfy', 'w') as f:
    f.write(dafny_content)

with open('api_analysis.json', 'w') as f:
    json.dump(apis_analysis, f, indent=2)

print("Analysis complete!")
print("Generated files:")
print("- generated_dafny_functions.dfy")
print("- api_analysis.json")

return apis_analysis, dafny_content