In [None]:
import os
from groq import Groq
import re
from typing import List, Dict, Any
import yaml
from tqdm import tqdm
import json
from openai import OpenAI
import tiktoken
from typing import Dict, Set, Optional, Tuple
import ast
from dataset_utils import *

In [None]:
DJANGO_PROJECT_PATH = "/Users/ryanmarr/Documents/sentry"
MODEL_NAME = "gpt-4o"
TEMPERATURE = 0.1
TOP_P = 1
STREAM = False
MAX_TOKENS = 16384
PROMPT_TOKENS = 251

In [None]:
file_path = "/Users/ryanmarr/Documents/duct_env/duct/dependency_test/a.py"
content = expand_function_with_imports_recursive(file_path, "a")
print(content)

In [None]:
# Groq client setup
client = OpenAI(api_key=API_KEY)

In [None]:
def extract_rest_apis_from_file_with_openai(file_path: str) -> List[Dict[str, Any]]:
    """Use OpenAI to analyze file content and find REST APIs"""
    try:

        if file_path.endswith('.py'):
            content = python_file_to_string(file_path)

        # Skip files that don't contain common Django/API keywords
        # if not any(keyword in content.lower() for keyword in ['api', 'view', 'rest', 'http', 'request', 'response', 'serializer']):
        #     return []

        prompt = f"""
        Analyze this Python file and identify all Django REST API functions or classes. Only return the functions that are REST API endpoints. 
        Only return entire implemenation of the function including function signature and content. 
        
        Look for:
        1. Functions that handle HTTP methods (GET, POST, PUT, DELETE, PATCH)
        2. Functions that process requests and return responses
        3. Any other REST API endpoints
        
        File content:
        {content}
        
        IMPORTANT: Return ONLY valid JSON with this exact structure:
        {{
            "apis": [
                {{
                    "name": "function name",
                    "http_method": "GET|POST|PUT|DELETE|PATCH|UNKNOWN",
                    "description": "Brief description of what this API does",
                    "django_function_name": include name of the function that is the api,
                }}
            ]
        }}
        
        If no REST APIs are found, return: {{"apis": []}}
        Do not include any text before or after the JSON.
        """
        
        try:
            completion = client.chat.completions.create(
                model=MODEL_NAME,
                messages=[
                    {
                        "role": "user",   
                        "content": prompt
                    }
                ],
                temperature=TEMPERATURE,
                max_tokens=MAX_TOKENS,
                top_p=TOP_P,
                stream=STREAM
            )
            
            response = completion.choices[0].message.content.strip()
            
            # Improved JSON extraction
            try:
                # First, try to parse the entire response as JSON
                result = json.loads(response)
                #print(f'result: {result}')
                if 'apis' in result:
                    for api in result.get('apis', []):
                        api['file_path'] = file_path
                    return result.get('apis', [])
            except json.JSONDecodeError:
                # If that fails, try to find JSON within the response
                # Look for content between the first { and last }
                start = response.find('{')
                end = response.rfind('}')
                
                if start != -1 and end != -1 and end > start:
                    json_str = response[start:end + 1]
                    try:
                        result = json.loads(json_str)
                        if 'apis' in result:
                            for api in result.get('apis', []):
                                api['file_path'] = file_path
                            return result.get('apis', [])
                    except json.JSONDecodeError:
                        print(f"Failed to parse extracted JSON from response for {file_path}")
                        print(f"Extracted JSON string: {json_str}")
                        return []
                else:
                    print(f"No JSON structure found in response for {file_path}")
                    print(f"Response: {response}")
                    return []

        except Exception as e:
            print(f"Error calling OpenAI API for {file_path}: {e}")
            return []
            
    except Exception as e:
        print(f"Error reading file {file_path}: {e}")
        return []

In [None]:
"""Analyze Django project and generate Dafny specifications"""

# Find all Python files
django_files = find_django_files(DJANGO_PROJECT_PATH)
print(f"Found {len(django_files)} Python files")

In [None]:
# Extract REST APIs using OpenAI
all_apis = []
for file_path in tqdm(django_files, desc="Analyzing files"):
    apis = extract_rest_apis_from_file_with_openai(file_path)
    # print(f'apis: {apis}')
    all_apis.extend(apis)

In [None]:
all_apis

In [None]:
file_path =  "/Users/ryanmarr/Documents/sentry/src/sudo/views.py"
django_function_name = "dispatch"
content = expand_function_with_imports_recursive(file_path, django_function_name)
print(content)

In [None]:
all_apis

In [None]:
file_path = "/Users/ryanmarr/Documents/duct_env/duct/dependency_test/a.py"
content = expand_function_with_imports_recursive(file_path, "a")
print(content)

In [None]:
def extract_dafny_func_specs(apis):
    extracted_apis = []
    def extract_dafny_function_spec(api):
        file_path = api['file_path']
        django_function_name = api['django_function_name']
        content = expand_function_with_imports_recursive(file_path, django_function_name)
        prompt = f"""
            For each function in the following code, create a Dafny function specification with preconditions and postconditions assume db schema exists as a dafny type.
            Only return the Dafny function name and specification.
            Function name : {django_function_name}
            Function content: {content}
            Return ONLY valid JSON with this exact structure:
            {{
                "apis": [
                    {{
                        "description": "Brief description of what this API does",
                        "dafny_function_spec": Convert the function to a Dafny function specification with preconditions and postconditions assume db schema exists as a dafny type.
                    }}
                ]
            }}
        """
        try:
            completion = client.chat.completions.create(
                model=MODEL_NAME,
                messages=[
                    {
                        "role": "user",   
                        "content": prompt
                    }
                ],
                temperature=TEMPERATURE,
                max_tokens=MAX_TOKENS,
                top_p=TOP_P,
                stream=STREAM
            )
            
            response = completion.choices[0].message.content.strip()
            
            # Improved JSON extraction
            try:
                # First, try to parse the entire response as JSON
                result = json.loads(response)
                #print(f'result: {result}')
                if 'apis' in result:
                    for api in result.get('apis', []):
                        api['file_path'] = file_path
                    return result.get('apis', [])
            except json.JSONDecodeError:
                # If that fails, try to find JSON within the response
                # Look for content between the first { and last }
                start = response.find('{')
                end = response.rfind('}')
                
                if start != -1 and end != -1 and end > start:
                    json_str = response[start:end + 1]
                    try:
                        result = json.loads(json_str)
                        if 'apis' in result:
                            # for api in result.get('apis', []):
                            #     api['file_path'] = file_path
                            return result.get('apis', [])
                    except json.JSONDecodeError:
                        print(f"Failed to parse extracted JSON from response for {file_path}")
                        print(f"Extracted JSON string: {json_str}")
                        return []
                else:
                    print(f"No JSON structure found in response for {file_path}")
                    print(f"Response: {response}")
                    return []

        except Exception as e:
            print(f"Error calling OpenAI API for {file_path}: {e}")
            return []
    for api in apis:
        extracted_apis.append(extract_dafny_function_spec(api))
    return extracted_apis

dafny_apis = extract_dafny_func_specs(all_apis)
print(dafny_apis)

In [None]:
len(dafny_apis)

In [None]:
dafny_apis[0]

In [None]:
for api in dafny_apis:
    print(api[0]['description'])
    print(api[0]['dafny_function_spec'])
    for line in api[0]['dafny_function_spec'].split('\n'):
        print(line)
    print('--------------------------------')

In [None]:
def break_string_into_lines(text: str) -> list:
    """
    Break a string into a list of lines when it encounters \n characters.
    
    Args:
        text (str): The input string that may contain \n characters
        
    Returns:
        list: List of strings, each representing a line
    """
    if not text:
        return []
    
    return text.split('\n')

In [None]:
display_full_api_info(all_apis)