In [33]:
%pip install langchain_community langchain-openai python-dotenv tqdm



Note: you may need to restart the kernel to use updated packages.


In [34]:
# setup
import os

# Set the path to your Flutter/Dart project directory
repo_directory = "/Volumes/Mac-External/Development/test-twplatform/lib"

# Check if the directory exists
if os.path.exists(repo_directory):
    print(f"Directory exists: {repo_directory}")
else:
    print(f"Directory does not exist: {repo_directory}")

import os
from dotenv import load_dotenv

# Load the .env file
load_dotenv()

Directory exists: /Volumes/Mac-External/Development/test-twplatform/lib


True

In [35]:

# Comment styles based on file type
COMMENT_STYLES = {
    '.dart': '//',
    '.py': '#',
    '.yaml': '#',
    '.yml': '#',
    '.json': '//',     # JSON typically doesn't support comments, but we use // for this purpose
    '.html': '<!--',   # HTML uses <!-- -->
    '.css': '/*',      # CSS uses /* */
    '.js': '//',       # JavaScript uses //
    '.ts': '//',       # TypeScript uses //
    '.md': '```',      # Markdown for code blocks (if needed)
    '.xml': '<!--',    # XML uses <!-- -->
    '.cpp': '//',      # C++ style
    '.java': '//',     # Java style
}

def get_comment_style(file_path):
    """Determine the comment style based on the file extension."""
    _, ext = os.path.splitext(file_path)
    return COMMENT_STYLES.get(ext, '#')  # Default to '#' if extension not found


In [36]:
# Define the template for generating metadata
metadata_prompt = """
You are an AI assistant helping with code documentation for the TradeWork platform. TradeWork is a UK-based property management platform built with Firebase as the backend and Flutter as the Web App frontend. The platform connects landlords, developers, contractors, and subcontractors, supporting project and tender management, vendor selection, and payment processing.

The following information is provided to you:
- File: {file_name}
- Module: {module_name}
- Date Created: {date_created}
- Last Updated: {last_updated}
- Existing Comments: {existing_comments}

Based on the file content and the provided information, generate a detailed metadata block. If any field cannot be determined from the file content, infer the best possible answer based on the context of the platform.

The format should be:

# File: {file_name}
# Module: {module_name}
# Description: A brief description of what the file does based on its content.
# Dependencies: Any dependencies or related files (e.g., Firebase, chat, payment integration, AI chatbot).
# Components: The major components or classes defined in the file, such as widgets, BLoCs, models.
# Role: Whether the file is role-specific (e.g., Landlord, Developer, Contractor, Subcontractor).
# Author: Piers
# Date Created: {date_created}
# Last Updated: {last_updated}
# Related Files: Any related files (BLoC, models, services, etc.).
# Key: Keywords like bloc, widget, model, firebase, payment, chat, etc.
"""

In [37]:
# Define the app context with full TradeWork details
app_context = """
TradeWork is a UK-based platform that aggregates resources and professionals within the property market. It allows landlords, developers, and contractors to manage projects, jobs, and tasks. The platform is designed to streamline property-related workflows by providing access to a wide network of skilled professionals and data resources.

Key details:
- Backend: Firebase (authentication, Firestore, functions for backend logic).
- Frontend: Flutter Web App.
- Role-based workflows: Landlord, Developer, Contractor, Subcontractor.
- Core functionalities: Project creation, tender management, vendor selection, chat, peer ratings, invoice management, payment integration, and AI assistance.

Folder structure:
1. Core Shared Components (twcore/):
   - application/: Manages common logic and state using BLoCs (e.g., authentication, chat).
   - models/: Core data models (e.g., user, auth, company) shared across all users.
   - services/: Shared services for external integrations, AI, payment, and utilities.
   - widgets/: Reusable UI components used throughout the app.

2. Shared Features (shared_features/):
   - Shared features like AI, project management, and document viewing follow the structure:
     - application/: Contains BLoCs managing feature-specific state.
     - models/: Feature-specific models.
     - ui/: Desktop and mobile-specific UI components.
     - services/: Services specific to each feature.
     - widgets/: Reusable widgets for feature-related tasks.

3. User-Specific Features (users/):
   - Each user type (e.g., landlord, contractor, developer) has a similar internal structure:
     - Top-Level: Contains user-specific application/, models/, ui/, services/, and widgets/ to handle the overall user interface and core tasks.
     - Features: Within each user, specific features (e.g., property management for landlords or bid management for subcontractors) follow the same folder structure:
       - application/: BLoCs for managing state related to the feature.
       - models/: Feature-specific models.
       - ui/: Desktop/mobile-specific UI components for the feature.
       - services/: Services for handling feature-specific logic.
       - widgets/: Reusable widgets for feature-related tasks.
"""

In [38]:
from datetime import datetime


def get_file_metadata(file_path):
    file_stats = os.stat(file_path)
    date_created = datetime.fromtimestamp(file_stats.st_ctime).strftime('%Y-%m-%d')
    last_updated = datetime.fromtimestamp(file_stats.st_mtime).strftime('%Y-%m-%d')
    return date_created, last_updated

# Define a function to detect folder context (e.g., core shared, shared features, user-specific)
def detect_folder_context(file_path):
    if 'twcore' in file_path:
        return "Core Shared Components"
    elif 'shared_features' in file_path:
        return "Shared Features"
    elif 'users' in file_path:
        return "User-Specific Features"
    return "Unknown"

def extract_top_comments(file_content, comment_style):
    """Extract the top comments from the file."""
    comment_lines = []
    for line in file_content.splitlines():
        line = line.strip()
        if line.startswith(comment_style):
            comment_lines.append(line)
        else:
            break  # Stop when we hit the first non-comment line
    return "\n".join(comment_lines), "\n".join(file_content.splitlines()[len(comment_lines):])  # Return the comments and the rest of the code


def extract_imports(file_content):
    """Extracts import statements from a Dart file."""
    import_lines = []
    for line in file_content.splitlines():
        if line.strip().startswith('import'):
            import_lines.append(line)
    return import_lines

In [44]:
import re
def extract_import_path(import_statement, current_file_path, lib_directory):
    """Extracts the file path from a Dart import statement.

    Handles relative paths and package paths for local files.
    """
    # Match import statements like: import 'some_relative_path.dart';
    match = re.search(r"import\s+['\"]([^'\"]+)['\"];", import_statement)
    
    if match:
        import_path = match.group(1)
        
        # Handle relative paths
        if import_path.startswith('.'):
            # Convert relative paths to absolute paths based on the current file's location
            base_dir = os.path.dirname(current_file_path)
            absolute_path = os.path.abspath(os.path.join(base_dir, import_path))
            # Ensure the path points to a Dart file
            if not absolute_path.endswith('.dart'):
                absolute_path += '.dart'
            if os.path.exists(absolute_path):
                return absolute_path
        
        # Handle package paths
        elif import_path.startswith('package:tradework_platform/'):
            # Remove 'package:tradework_platform/' and map to lib_directory
            relative_path = import_path.replace('package:tradework_platform/', '')
            absolute_path = os.path.join(lib_directory, relative_path)
            if os.path.exists(absolute_path):
                return absolute_path
        
        # Handle other package paths if needed
        elif import_path.startswith('package:'):
            # You can add custom logic to map other package imports to actual file paths in your repo if needed
            return None
        
    return None  # If not a valid import line or if a package import (ignored for now)

In [45]:
def process_file(file_path, memory, core_files, processed_files, chain, app_context, lib_directory):
    # Skip files that end with .freezed.dart or .g.dart or already processed
    if file_path.endswith('.freezed.dart') or file_path.endswith('.g.dart') or file_path in processed_files:
        print(f"Skipping file: {file_path}")
        return
    
    print(f"Processing file: {file_path}")
    
    try:
        # Read the file content
        with open(file_path, 'r', encoding='utf-8') as file:
            code_content = file.read()
    except Exception as e:
        print(f"Error reading file {file_path}: {e}")
        return

    # Get file metadata (creation and last updated dates)
    date_created, last_updated = get_file_metadata(file_path)

    # Get folder context to add to the metadata
    folder_context = detect_folder_context(file_path)
    
    # Extract the filename
    file_name = os.path.basename(file_path)
    
    # Get the appropriate comment style based on the file type
    comment_style = get_comment_style(file_path)

    # Extract existing comments from the top of the file
    existing_comments, code_content_without_comments = extract_top_comments(code_content, comment_style)

    # Prepare context by including core files content
    context_files_content = ""
    for core_file in core_files:
        if os.path.exists(core_file):
            try:
                with open(core_file, 'r', encoding='utf-8') as cf:
                    context_files_content += cf.read() + "\n"
            except Exception as e:
                print(f"Error reading core file {core_file}: {e}")

    # Use the LLM to generate the metadata
    try:
        result = chain.invoke({
            "code_content": code_content_without_comments,
            "app_context": app_context + "\n" + context_files_content,
            "last_updated": last_updated,
            "date_created": date_created,
            "file_name": file_name,
            "module_name": folder_context,
            "existing_comments": existing_comments
        })
    except Exception as e:
        print(f"Error invoking LLM for file {file_path}: {e}")
        return

    # Extract the generated metadata
    generated_metadata = result.get('text', '')

    # Post-process the metadata to remove any unnecessary pre/post content
    metadata_start = generated_metadata.find("# File:")
    metadata_end = generated_metadata.rfind("# Key:") + len("# Key:")

    if metadata_start != -1 and metadata_end != -1:
        generated_metadata = generated_metadata[metadata_start:metadata_end].strip()

    # Add comment style to each line of the metadata
    metadata_comment = "\n".join([f"{comment_style} {line}" for line in generated_metadata.splitlines()])

    # Insert generated metadata at the start of the file
    try:
        with open(file_path, 'w', encoding='utf-8') as file:
            file.write(metadata_comment + "\n" + code_content_without_comments)
    except Exception as e:
        print(f"Error writing metadata to file {file_path}: {e}")
        return
    
    # Add the file to the processed list
    processed_files.add(file_path)

    # Extract imports and process dependencies
    import_lines = extract_imports(code_content_without_comments)
    for import_line in import_lines:
        # Extract the path from the import and process the imported file
        import_path = extract_import_path(import_line, file_path, lib_directory)
        if import_path and import_path not in processed_files:
            process_file(import_path, memory, core_files, processed_files, chain, app_context, lib_directory)

In [46]:
def process_repository(directory, memory, chain, app_context, chunk_size=20):
    lib_directory = directory  # Assuming 'directory' is the 'lib' folder

    # Prioritize certain core files (like main.dart and services)
    core_files = []
    main_file = os.path.join(lib_directory, "main.dart")
    if os.path.exists(main_file):
        core_files.append(main_file)

    # Add other core files manually (like service locator, route)
    core_files.extend([
        os.path.join(lib_directory, "services", "locator.dart"),
        os.path.join(lib_directory, "routes", "routes.dart")
    ])

    # Initialize set to track processed files
    processed_files = set()

    # Process core files first
    for core_file in core_files:
        if os.path.exists(core_file) and core_file not in processed_files:
            process_file(core_file, memory, core_files, processed_files, chain, app_context, lib_directory)

    # Systematically process other files by module or type
    all_files = sorted([
        os.path.join(root, file_name) for root, dirs, files in os.walk(lib_directory)
        for file_name in files
        if file_name.endswith(".dart") and not file_name.endswith(('.freezed.dart', '.g.dart'))
    ], key=lambda x: x.lower())

    total_files = len(all_files)
    print(f"Total files to process: {total_files}\n")

    # Use tqdm to display a progress bar
    with tqdm(total=total_files, desc="Processing files", unit="file") as pbar:
        for file_path in all_files:
            if file_path not in processed_files:
                process_file(file_path, memory, core_files, processed_files, chain, app_context, lib_directory)
            pbar.update(1)

    print("\nProcessing complete.")

In [47]:

from langchain_openai import ChatOpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferWindowMemory
from langchain.memory import ConversationBufferWindowMemory
from tqdm import tqdm







# Initialize the LLM using GPT-4o-mini (or another GPT model of your choice)
llm = ChatOpenAI(temperature=0.7, model_name="gpt-4o-mini")



# Create a LangChain prompt template for file processing
prompt = PromptTemplate(input_variables=["code_content", "app_context", "last_updated"], template=metadata_prompt)

# Define a chain to process files with the given LLM and prompt template
chain = LLMChain(llm=llm, prompt=prompt)

    
# Initialize memory for tracking context and window resets
memory = ConversationBufferWindowMemory(k=5)  # Keep 5 file contexts in memory at a time




In [49]:
# Run the processing starting from main.dart
# repo_directory = "path_to_your_flutter_repo"
process_repository(repo_directory, memory, chain, app_context)

Processing file: /Volumes/Mac-External/Development/test-twplatform/lib/main.dart
Processing file: /Volumes/Mac-External/Development/test-twplatform/lib/_twcore/application/tenders/bloc/tenders_bloc.dart
Processing file: /Volumes/Mac-External/Development/test-twplatform/lib/_twcore/models/basic_user_details/basic_user_details.dart
Processing file: /Volumes/Mac-External/Development/test-twplatform/lib/_twcore/models/invoicing/invoicing_details_model.dart
Processing file: /Volumes/Mac-External/Development/test-twplatform/lib/_twcore/models/location/location_model.dart
Processing file: /Volumes/Mac-External/Development/test-twplatform/lib/_twcore/models/userpermissions/user_permissions_model.dart
Processing file: /Volumes/Mac-External/Development/test-twplatform/lib/_twcore/models/bid_on_tender/tender_bid.dart
Processing file: /Volumes/Mac-External/Development/test-twplatform/lib/_twcore/models/feedback/bid_feedback/tender_bid_feedback/tender_bid_feedback.dart
Processing file: /Volumes/Mac

KeyboardInterrupt: 