In [1]:
import os
from groq import Groq
import re
from typing import List, Dict, Any
import yaml

In [2]:
# api_key = os.getenv("GROQ_API_KEY", "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")

# client = Groq(api_key=api_key)

# chat_completion = client.chat.completions.create(
#     model="meta-llama/llama-4-scout-17b-16e-instruct",
#     messages=[
#         {
#             "role": "user",
#             "content": "Explain the importance of fast language models"
#         }
#     ],
# )

# print(chat_completion.choices[0].message.content)


In [3]:
API_KEY = "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"
OUTPUT_FILE = "output.yml"
DIRECTORY = "/Users/ryanmarr/Documents/sentry/src/sentry/models"

In [4]:
class DjangoModelToYAMLConverter:
    def __init__(self, api_key: str = None):
        """Initialize the converter with Groq API key."""
        if api_key is None:
            api_key = os.getenv("GROQ_API_KEY")
        
        if not api_key:
            raise ValueError("Groq API key is required. Set GROQ_API_KEY environment variable or pass it to constructor.")
        
        self.client = Groq(api_key=api_key)
    
    def find_django_model_files(self, directory: str) -> List[str]:
        """
        Perform DFS search for Python files that contain Django models.
        
        Args:
            directory: Root directory to search
            
        Returns:
            List of file paths containing Django models
        """
        django_model_files = []
        
        for root, dirs, files in os.walk(directory):
            for file in files:
                if file.endswith('.py'):
                    file_path = os.path.join(root, file)
                    if self._contains_django_models(file_path):
                        django_model_files.append(file_path)
        
        return django_model_files
    
    def _contains_django_models(self, file_path: str) -> bool:
        """Check if a Python file contains Django model definitions."""
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                content = f.read()
                
            # Look for Django model patterns
            model_patterns = [
                r'class\s+\w+\(models\.Model\)',
                r'class\s+\w+\(models\.Model\):',
                r'from\s+django\.db\s+import\s+models',
                r'import\s+models',
            ]
            
            for pattern in model_patterns:
                if re.search(pattern, content, re.IGNORECASE):
                    return True
                    
        except Exception as e:
            print(f"Error reading file {file_path}: {e}")
            
        return False
    
    def extract_models_from_file(self, file_path: str) -> str:
        """Extract Django model code from a file."""
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                content = f.read()
            
            # Find model classes
            model_pattern = r'class\s+(\w+)\s*\(models\.Model\):.*?(?=class\s+\w+\(|$)'
            models = re.findall(model_pattern, content, re.DOTALL | re.IGNORECASE)
            
            if models:
                return content
            else:
                return ""
                
        except Exception as e:
            print(f"Error extracting models from {file_path}: {e}")
            return ""
    
    def convert_models_to_yaml(self, model_code: str, file_path: str) -> Dict[str, Any]:
        """
        Use Groq to convert Django models to YAML format.
        
        Args:
            model_code: Django model code
            file_path: Path to the model file for context
            
        Returns:
            Dictionary representing the YAML structure
        """
        prompt = f"""
        Convert the following Django models to YAML format. 
        The YAML should follow this exact structure:
        
        tables:
          - name: table_name
            columns:
              - name: column_name
                type: data_type
              - name: another_column
                type: data_type
        
        Rules:
        1. Convert Django field types to appropriate YAML types:
           - CharField, TextField -> string
           - IntegerField, BigIntegerField -> int
           - FloatField, DecimalField -> float
           - BooleanField -> boolean
           - DateField, DateTimeField -> string
           - ForeignKey -> int (for the foreign key ID)
        2. Use the model class name as the table name
        3. Convert field names to column names
        4. Skip Meta classes and methods
        5. Handle relationships appropriately
        
        Django models from file {file_path}:
        
        {model_code}
        
        Return only the YAML structure, no explanations.
        """
        
        try:
            response = self.client.chat.completions.create(
                model="meta-llama/llama-4-scout-17b-16e-instruct",
                messages=[
                    {"role": "user", "content": prompt}
                ],
                temperature=0.1
            )
            
            yaml_content = response.choices[0].message.content.strip()
            
            # Try to parse the YAML response
            try:
                return yaml.safe_load(yaml_content)
            except yaml.YAMLError as e:
                print(f"Error parsing YAML response: {e}")
                print(f"Raw response: {yaml_content}")
                return {"tables": []}
                
        except Exception as e:
            print(f"Error calling Groq API: {e}")
            return {"tables": []}
    
    def process_directory(self, directory: str, output_file: str = None) -> Dict[str, Any]:
        """
        Process all Django model files in a directory and convert to YAML.
        
        Args:
            directory: Directory containing Django models
            output_file: Optional file to save the YAML output
            
        Returns:
            Combined YAML structure
        """
        print(f"Searching for Django model files in: {directory}")
        
        # Find all Django model files
        model_files = self.find_django_model_files(directory)
        print(f"Found {len(model_files)} Django model files:")
        
        all_tables = []
        
        for file_path in model_files:
            print(f"Processing: {file_path}")
            
            # Extract model code
            model_code = self.extract_models_from_file(file_path)
            
            if model_code:
                # Convert to YAML
                yaml_structure = self.convert_models_to_yaml(model_code, file_path)
                
                if yaml_structure and "tables" in yaml_structure:
                    all_tables.extend(yaml_structure["tables"])
                    print(f"  - Converted {len(yaml_structure['tables'])} tables")
                else:
                    print(f"  - No tables found or conversion failed")
            else:
                print(f"  - No models found in file")
        
        result = {"tables": all_tables}
        
        # Save to file if specified
        if output_file:
            with open(output_file, 'w') as f:
                yaml.dump(result, f, default_flow_style=False, sort_keys=False)
            print(f"YAML output saved to: {output_file}")
        
        return result



In [7]:
"""Main function to run the converter."""
# import argparse

# parser = argparse.ArgumentParser(description='Convert Django models to YAML format using Groq')
# parser.add_argument('directory', help='Directory containing Django model files')
# parser.add_argument('--output', '-o', help='Output YAML file path', default="output.yml")
# parser.add_argument('--api-key', help='Groq API key (or set GROQ_API_KEY env var)', default="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")

# args = parser.parse_args()
converter = DjangoModelToYAMLConverter(api_key=API_KEY)
result = converter.process_directory(DIRECTORY, OUTPUT_FILE)

if not OUTPUT_FILE:
    # Print to stdout if no output file specified
    print("\n" + "="*50)
    print("CONVERTED YAML:")
    print("="*50)
    print(yaml.dump(result, default_flow_style=False, sort_keys=False))

Searching for Django model files in: /Users/ryanmarr/Documents/sentry/src/sentry/models
Found 108 Django model files:
Processing: /Users/ryanmarr/Documents/sentry/src/sentry/models/groupsearchviewstarred.py
  - No models found in file
Processing: /Users/ryanmarr/Documents/sentry/src/sentry/models/groupresolution.py
  - No models found in file
Processing: /Users/ryanmarr/Documents/sentry/src/sentry/models/grouphistory.py
  - No models found in file
Processing: /Users/ryanmarr/Documents/sentry/src/sentry/models/releaseactivity.py
  - No models found in file
Processing: /Users/ryanmarr/Documents/sentry/src/sentry/models/orgauthtoken.py
  - No models found in file
Processing: /Users/ryanmarr/Documents/sentry/src/sentry/models/apiscopes.py
Error parsing YAML response: while scanning for the next token
found character '`' that cannot start any token
  in "<unicode string>", line 1, column 1:
    ```
    ^
Raw response: ```
tables:
- name: HasApiScopes
  columns:
  - name: id
    type: int
  

In [None]:
def extract_models_from_file(self, file_path: str) -> str:
        """Extract Django model code from a file."""
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                content = f.read()
            
            # Find model classes
            model_pattern = r'class\s+(\w+)\s*\(models\.Model\):.*?(?=class\s+\w+\(|$)'
            models = re.findall(model_pattern, content, re.DOTALL | re.IGNORECASE)
            
            if models:
                return content
            else:
                return ""
                
        except Exception as e:
            print(f"Error extracting models from {file_path}: {e}")
            return ""

In [None]:
extract_models_from_file()