diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 02771efd..586ede6c 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -1,5 +1,7 @@ # GitHub Copilot Instructions for Git AI +> **Note**: This file is specifically tailored for GitHub Copilot's AI assistant. For general Claude Code guidance, see [CLAUDE.md](../CLAUDE.md) in the repository root. + ## Project Overview Git AI is a Rust-based tool that automates intelligent commit message generation using AI. It uses a sophisticated multi-step analysis process to analyze git diffs, score file impacts, and generate meaningful commit messages via OpenAI's API. @@ -122,6 +124,123 @@ fn test_parse_diff_with_multiple_files() { /* ... */ } fn test_score_calculation_for_high_impact_source_file() { /* ... */ } ``` +### Real Test Examples from Codebase + +```rust +// Function calling tests (tests/function_calling_test.rs) +#[test] +fn test_create_commit_function_tool_default() { + let tool = create_commit_function_tool(None).unwrap(); + assert_eq!(tool.function.name, "commit"); + // Verify JSON schema structure +} + +#[test] +fn test_parse_commit_function_response_invalid_json() { + let response = CommitFunctionCall { + name: "commit".to_string(), + arguments: "invalid json".to_string(), + }; + let result = parse_commit_function_response(response); + assert!(result.is_err()); +} + +// Multi-step integration tests (src/multi_step_integration.rs:726) +#[test] +fn test_parse_diff_with_c_i_prefixes() { + // Test with c/ and i/ prefixes that appear in git hook diffs + let diff = r#"diff --git c/test.md i/test.md +new file mode 100644 +index 0000000..6c61a60 +--- /dev/null ++++ i/test.md +@@ -0,0 +1 @@ ++# Test File + +diff --git c/test.js i/test.js +new file mode 100644 +index 0000000..a730e61 +--- /dev/null ++++ i/test.js +@@ -0,0 +1 @@ ++console.log('Hello'); +"#; + + let files = parse_diff(diff).unwrap(); + assert_eq!(files.len(), 2); +} + +// Model token tests (tests/model_token_test.rs:8) +#[test] +fn test_token_counting_accuracy() { + let model = Model::GPT4; + let text = "Hello world"; + let tokens = model.count_tokens(text).unwrap(); + assert!(tokens > 0); + assert!(tokens < 10); // Reasonable bounds +} + +// Hook tests (src/hook.rs) +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_string_pool_get() { + let mut pool = StringPool::new(); + let result = pool.get("test"); + assert_eq!(result, "test"); + } + + #[test] + fn test_string_pool_limit() { + let mut pool = StringPool::new(); + // Test memory limits + for i in 0..1000 { + pool.put(format!("test_{}", i)); + } + assert!(pool.len() <= StringPool::MAX_SIZE); + } +} +``` + +### Integration Test Patterns + +```rust +// Use tests/common.rs for shared utilities +use crate::common::*; + +// Test helper functions pattern +fn setup_test_repo() -> TempDir { + let temp_dir = TempDir::new().unwrap(); + // Initialize git repo, add files, etc. + temp_dir +} + +fn create_test_diff(file_content: &str) -> String { + format!(r#"diff --git a/test.rs b/test.rs +index abc123..def456 100644 +--- a/test.rs ++++ b/test.rs +@@ -1,1 +1,1 @@ +-// Old content ++{}"#, file_content) +} + +#[tokio::test] +async fn test_end_to_end_commit_generation() { + let _temp_repo = setup_test_repo(); + let diff = create_test_diff("// New content"); + + let result = generate_commit_message_local(diff, Some(72)); + assert!(result.is_ok()); + + let message = result.unwrap(); + assert!(message.len() <= 72); + assert!(!message.is_empty()); +} +``` + ## Verification Workflow ### ALWAYS Verify Changes Before Committing @@ -325,6 +444,73 @@ Always include verification steps in your suggestions: - Host exclusive assistant instance locally - Accumulate learning across all projects +### AI Provider Integration + +#### OpenAI Integration (src/openai.rs) + +The OpenAI integration uses `async-openai` client for API communication. See src/openai.rs for the full implementation. + +```rust +// Example API call pattern (simplified from src/model.rs:220) +use async_openai::Client; +use async_openai::types::{ + CreateChatCompletionRequestArgs, + ChatCompletionRequestUserMessageArgs +}; + +pub async fn run(settings: Settings, content: String) -> Result { + let client = async_openai::Client::new(); + let model: Model = settings.model.as_deref().unwrap_or("gpt-4o-mini").into(); + + let request = CreateChatCompletionRequestArgs::default() + .model(model.to_string()) + .messages([ChatCompletionRequestUserMessageArgs::default() + .content(content) + .build()? + .into()]) + .temperature(0.7) + .max_tokens((model.context_size() - tokens) as u16) + .build()?; + + let response = client.chat().create(request).await?; + Ok(response.choices[0].message.content.clone().unwrap_or_default()) +} +``` + +#### Ollama Integration (src/ollama.rs) + +**Note**: Ollama support is planned but not yet implemented in the codebase. The tool currently supports OpenAI models only. + +#### Model-Specific Optimizations (src/model.rs) + +See src/model.rs:43 for the Model enum definition and src/model.rs:52 for token counting implementation. + +```rust +// Simplified from src/model.rs:43-80 +impl Model { + /// Counts tokens using tiktoken for accurate API usage + pub fn count_tokens(&self, text: &str) -> Result { + if text.is_empty() { + return Ok(0); + } + + let tokenizer = TOKENIZER.get_or_init(|| { + let model_str: &str = self.into(); + get_tokenizer(model_str) + }); + + let tokens = tokenizer.encode_ordinary(text); + Ok(tokens.len()) + } + + /// Gets maximum context size for the model (src/model.rs:76) + pub fn context_size(&self) -> usize { + let model_str: &str = self.into(); + get_context_size(model_str) // From tiktoken-rs + } +} +``` + ## Commit Message Format ### Style Guidelines @@ -342,28 +528,67 @@ Always include verification steps in your suggestions: ### Command Structure ```bash +# Configuration management git-ai config set # Set configuration git-ai config get # Get configuration value git-ai config reset # Reset to defaults + +# Hook management git-ai hook install # Install git hook git-ai hook uninstall # Remove git hook git-ai hook reinstall # Reinstall hook + +# Available for debugging and testing +git-ai --help # Show help information ``` +### Configuration Keys Reference + +| Key | Default | Description | Example | +| ------------------- | ------------- | ----------------------------------------------------- | ---------------------------- | +| `openai-api-key` | (required) | OpenAI API key | `sk-...` | +| `model` | `gpt-4o-mini` | AI model to use (see src/config.rs:15) | `gpt-4o`, `gpt-4.1`, `gpt-4` | +| `max-tokens` | `2024` | API request token limit (see src/config.rs:14) | `256`, `1024` | +| `max-commit-length` | `72` | Commit message character limit (see src/config.rs:13) | `50`, `100` | + ## Development Workflow ### Local Development -- Use `just local-install` for quick dev installation -- Run `cargo test` before committing +- Use `just local-install` for quick dev installation with hook setup +- Run `cargo test --all` before committing - Check `./scripts/integration-tests` for comprehensive testing - Run `./scripts/hook-stress-test` for hook testing +- Use `./scripts/comprehensive-tests` for full test coverage + +### Justfile Commands + +The project includes essential development commands: + +```bash +just local-install # Install locally with debug symbols and setup hooks +just integration-test # Run integration tests in Docker +just docker-build # Build Docker image +just local-github-actions # Run GitHub Actions locally with act +``` + +### Script Utilities + +```bash +# Testing scripts (Fish shell) +./scripts/integration-tests # Core integration test suite +./scripts/comprehensive-tests # Extensive test coverage (11 test categories) +./scripts/hook-stress-test # Git hook functionality testing +./scripts/current-version # Get current version information +``` ### Building -- Release builds use LTO and aggressive optimization -- Include debug symbols even in release builds -- Multi-target support: Linux (GNU/musl), macOS +- **Development builds**: `cargo build` (fast compilation) +- **Release builds**: `cargo build --release` (LTO and aggressive optimization) +- **Debug symbols**: Include debug symbols even in release builds +- **Multi-target support**: Linux (GNU/musl), macOS, Windows +- **Local installation**: `cargo install --debug --path .` for development ## Documentation Standards @@ -438,16 +663,58 @@ git-ai hook reinstall # Reinstall hook ```rust use serde::{Deserialize, Serialize}; +use serde_json::json; +use async_openai::types::{ChatCompletionTool, ChatCompletionToolType, FunctionObjectArgs}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FileAnalysisResult { + pub lines_added: u32, + pub lines_removed: u32, + pub file_category: String, + pub summary: String, +} -#[derive(Serialize, Deserialize)] -struct FileAnalysis { - lines_added: u32, - lines_removed: u32, - category: FileCategory, - summary: String, +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FileChange { + #[serde(rename = "type")] + pub change_type: String, + pub summary: String, + pub lines_changed: u32, + pub impact_score: f32, + pub file_category: String, } -// Use with OpenAI function calling for structured outputs +// Create OpenAI function tools with JSON schemas +pub fn create_analyze_function_tool() -> Result { + let function = FunctionObjectArgs::default() + .name("analyze") + .description("Analyze a single file's changes from the git diff") + .parameters(json!({ + "type": "object", + "properties": { + "file_path": { + "type": "string", + "description": "Relative path to the file" + }, + "diff_content": { + "type": "string", + "description": "The git diff content for this specific file only" + }, + "operation_type": { + "type": "string", + "enum": ["added", "modified", "deleted", "renamed", "binary"], + "description": "Type of operation performed on the file" + } + }, + "required": ["file_path", "diff_content", "operation_type"] + })) + .build()?; + + Ok(ChatCompletionTool { + r#type: ChatCompletionToolType::Function, + function + }) +} ``` ### Fallback Strategy @@ -471,13 +738,238 @@ single_step_api(diff).await ```rust use rayon::prelude::*; +use tokio::task::JoinSet; +// CPU-bound parallel processing with rayon let analyses: Vec = files .par_iter() .map(|file| analyze_file(file)) .collect(); + +// Async parallel processing with tokio +let mut join_set = JoinSet::new(); +for file in files { + join_set.spawn(async move { + analyze_file_async(&file).await + }); +} + +let results = join_all(futures).await; +``` + +## Debugging and Troubleshooting + +### Environment Setup for Debugging + +```bash +# Enable debug logging for all components +export RUST_LOG=debug + +# Enable trace logging for specific modules +export RUST_LOG=ai::multi_step_analysis=trace,ai::openai=debug + +# Run with debug output +cargo run --example multi_step_commit ``` +### Debug Output Analysis + +Git AI provides comprehensive debug output when `RUST_LOG=debug` is set: + +```text +=== GIT AI HOOK DEBUG SESSION === + +📋 INITIALIZATION + Args: commit_msg_file='.git/COMMIT_EDITMSG', source=None, sha1=None + Build: Debug build with performance profiling enabled + +⚙️ SETUP & PREPARATION + │ Generate instruction template 1.56ms ✓ + │ Count tokens 306.13ms ✓ + │ Calculate instruction tokens 307.77ms ✓ + └ Get context size 959.00ns ✓ + +🤖 AI PROCESSING + Multi-Step Attempt: FAILED + └ Error: Invalid function_call ✗ No function named 'required' specified + + Single-Step Fallback: SUCCESS + │ Creating commit function tool ✓ max_length=72 + │ OpenAI API call 2.78s ✓ + └ Response parsing ✓ +``` + +### Common Issues and Solutions + +#### API-Related Issues + +```bash +# Test API connectivity +cargo run --example function_calling_demo + +# Check API key configuration +git-ai config get openai-api-key + +# Test with different models +git-ai config set model gpt-4o-mini # Faster, cheaper +git-ai config set model gpt-4o # Better quality +``` + +#### Hook Issues + +```bash +# Reinstall hook if not working +git-ai hook reinstall + +# Check hook file directly +ls -la .git/hooks/prepare-commit-msg +cat .git/hooks/prepare-commit-msg + +# Test in isolation +RUST_LOG=debug git commit --no-edit +``` + +#### Performance Issues + +```bash +# Profile token usage +RUST_LOG=debug git-ai 2>&1 | grep "tokens" + +# Monitor API call timing +RUST_LOG=debug git-ai 2>&1 | grep "OpenAI API call" + +# Reduce token limit for large diffs +git-ai config set max-tokens 256 +``` + +### Testing Specific Scenarios + +```bash +# Test empty diffs +git commit --allow-empty --no-edit + +# Test large diffs +dd if=/dev/zero of=large_file bs=1M count=1 +git add large_file && git commit --no-edit + +# Test binary files +cp /bin/ls binary_file && git add binary_file && git commit --no-edit + +# Test Unicode and special characters +echo "🚀 Unicode test 你好" > unicode.txt && git add . && git commit --no-edit +``` + +### Integration Test Categories + +The comprehensive test suite covers 11 categories: + +1. **Hook Installation and Configuration** - Basic setup and config management +2. **Basic Git Operations** - Standard commit workflows +3. **File Creation Permutations** - Empty files, multiple files, different content types +4. **File Modification Permutations** - Start, middle, end modifications +5. **Advanced Git Operations** - Amend, squash, template commits +6. **Branch and Merge Operations** - Feature branches, merging +7. **File Operations** - Deletions, mixed operations +8. **Special Content** - Binary files, Unicode, special characters +9. **File System Operations** - Directory moves, symlinks, permissions +10. **Edge Cases** - Empty commits, case sensitivity, file/directory conversion +11. **Bulk Operations** - Many files, large changes + +### Performance Monitoring + +```rust +// Use profiling macros for performance tracking +use crate::profile; + +pub fn expensive_operation() -> Result { + profile!("Generate instruction template"); + // ... implementation +} +``` + +## API Usage Optimization + +### Token Management Strategies + +```rust +// Smart truncation preserving file boundaries +let truncated_diff = if estimated_tokens > max_tokens { + truncate_diff_intelligently(&diff, max_tokens) +} else { + diff +}; + +// Prioritize high-impact files +let sorted_files: Vec<_> = files + .iter() + .sorted_by(|a, b| b.impact_score.partial_cmp(&a.impact_score).unwrap()) + .take(max_files_for_context) + .collect(); +``` + +### Model Selection Guidelines + +| Model | Use Case | Speed | Quality | Cost | +| ------------- | ------------------------------------------- | ------ | ------- | ------ | +| `gpt-4o-mini` | Default model, fast iteration, simple diffs | Fast | Good | Low | +| `gpt-4.1` | Balanced performance | Medium | High | Medium | +| `gpt-4o` | Best quality, complex diffs | Slow | Highest | High | +| `gpt-4` | Stable, proven performance | Medium | High | High | + +### Fallback Strategy Implementation + +```rust +// Implement graceful degradation +pub async fn generate_commit_message(diff: String) -> Result { + // Try multi-step with API + if let Ok(msg) = generate_commit_message_multi_step(&client, diff.clone()).await { + return Ok(msg); + } + + // Fallback to local analysis + if let Ok(msg) = generate_commit_message_local(diff.clone(), Some(72)) { + return Ok(msg); + } + + // Last resort: single-step API + generate_commit_message_single_step(&client, diff).await +} +``` + +## Release and Deployment + +### Version Management + +```bash +# Check current version +./scripts/current-version + +# Prepare release build +cargo build --release + +# Multi-platform builds +cargo build --target x86_64-unknown-linux-gnu +cargo build --target x86_64-unknown-linux-musl +cargo build --target x86_64-apple-darwin +``` + +### CI/CD Pipeline + +```bash +# Test locally with act (GitHub Actions simulation) +just local-github-actions + +# Run full release pipeline in Docker +just release +``` + +### Distribution Channels + +- **Crates.io**: Primary distribution via `cargo install git-ai` +- **Pre-compiled binaries**: Via `cargo-binstall` for faster installation +- **Docker**: Containerized builds and testing +- **GitHub Releases**: Tagged releases with pre-built binaries + ## Workspace Rules The project includes two important workspace rules: @@ -784,7 +1276,7 @@ let result = tokio::task::spawn_blocking(|| { **All public functions**: -```rust +````rust /// Parses a git diff into individual file changes. /// /// Handles various diff formats including standard git diff output, @@ -810,7 +1302,7 @@ let result = tokio::task::spawn_blocking(|| { /// # Ok::<(), anyhow::Error>(()) /// ``` pub fn parse_diff(diff_content: &str) -> Result> -``` +```` **Module docs**: @@ -917,6 +1409,7 @@ config = "0.15.11" ``` **When adding**: + - ✅ Justify new dependencies - ✅ Use minimal feature flags - ✅ Prefer maintained crates @@ -926,7 +1419,7 @@ config = "0.15.11" When providing feedback: -``` +```text ❌ Line 42: Using unwrap() can panic ✅ Fix: option.ok_or_else(|| anyhow!("no value"))? 📝 Why: Provides better error context and prevents panics