From 0accca8f454bda1b1f4ba9b27923daaff3f00061 Mon Sep 17 00:00:00 2001 From: Linus Oleander Date: Tue, 28 May 2024 07:47:25 +0200 Subject: [PATCH 1/3] Remove old training and example files for model updates --- src/bin/clear.rs | 24 ---- src/bin/fine-tune-json.rs | 183 ------------------------------ src/bin/fine-tune-md.rs | 190 ------------------------------- src/bin/fine-tune.rs | 227 ------------------------------------- src/examples.rs | 95 ---------------- train/Justfile | 4 - train/prompt.md | 20 ---- train/promptfooconfig.yaml | 108 ------------------ train/scripts/train | 80 ------------- train/tests.csv | 2 - 10 files changed, 933 deletions(-) delete mode 100644 src/bin/clear.rs delete mode 100644 src/bin/fine-tune-json.rs delete mode 100644 src/bin/fine-tune-md.rs delete mode 100644 src/bin/fine-tune.rs delete mode 100644 src/examples.rs delete mode 100644 train/Justfile delete mode 100644 train/prompt.md delete mode 100644 train/promptfooconfig.yaml delete mode 100755 train/scripts/train delete mode 100644 train/tests.csv diff --git a/src/bin/clear.rs b/src/bin/clear.rs deleted file mode 100644 index 7dbf345c..00000000 --- a/src/bin/clear.rs +++ /dev/null @@ -1,24 +0,0 @@ -use git2::Repository; -use anyhow::{Context, Result}; - -#[tokio::main] -async fn main() -> Result<()> { - env_logger::init(); - - let repo = Repository::open_from_env().context("Failed to open repository")?; - let mut config = repo.config().context("Failed to load config")?; - config - .remove("ai.thread-id") - .context("Failed to delete thread-id")?; - config.snapshot().context("Failed to save config")?; - let mut global_config = config - .open_global() - .context("Failed to open global config")?; - global_config - .remove("ai.assistant-id") - .context("Failed to delete assistant-id")?; - global_config - .snapshot() - .context("Failed to save global config")?; - Ok(()) -} diff --git a/src/bin/fine-tune-json.rs b/src/bin/fine-tune-json.rs deleted file mode 100644 index d92ba76f..00000000 --- a/src/bin/fine-tune-json.rs +++ /dev/null @@ -1,183 +0,0 @@ -use std::io::Write; -use std::fs::File; - -use git2::{Commit, DiffFormat, DiffOptions, Repository}; -use anyhow::{Context, Result}; -use serde_json::json; - -fn main() -> Result<()> { - env_logger::init(); - - let max_tokens = 16385; - let file_name = "examples.jsonl"; - let max_commits = 20; - - log::info!("Creating fine-tune file with {} commits and {} tokens", max_commits, max_tokens); - - let repo = Repository::open(".").context("Failed to open git repository")?; - let mut revwalk = repo.revwalk().context("Failed to create Revwalk")?; - let mut file = File::create(file_name).context("Failed to create file")?; - - file.write_all(b"").context("Failed to write to file")?; - - revwalk.push_head().expect("Failed to push head"); - - let mut curr_size = 0; - let mut commit_count = 0; - - let example = json!({ - "": "" - }); - - writeln!(file, "{}\n", example).context("Failed to write to file")?; - - for oid in revwalk { - let oid = oid.context("Failed to get oid")?; - let commit = repo.find_commit(oid).context("Couldn't find commit")?; - - if commit.parent_count() > 1 { - continue; - } - - // let weight = if commit.author().email() == Some(&user_email) { - // 1 - // } else if commit.committer().email() == Some(&user_email) { - // 1 - // } else { - // 0 - // }; - - let Ok(Some(content)) = generate_commit_diff(&repo, &commit) else { - continue; - }; - - let Some(commit) = commit.message() else { - continue; - }; - - if commit.starts_with("Merge") { - continue; - } - - if commit.starts_with("Revert") { - continue; - } - - if commit.len() > 72 { - continue; - } - - // Check if it contains a new line - if commit.trim().contains("\n") { - continue; - } - - if commit.contains("[") && commit.contains("]") { - continue; - } - - let message = json!({ - commit.trim() : content.trim() - }); - - let content = serde_json::to_string(&message)?; - curr_size += content.split_whitespace().count(); - - if curr_size > max_tokens { - log::warn!("Max tokens reached: {}", max_tokens); - break; - } - - commit_count += 1; - - writeln!(file, "{}\n", content).context("Failed to write to file")?; - - if commit_count >= max_commits { - break; - } - } - - Ok(()) -} - -fn should_exclude_path(file_path: &str) -> bool { - let exclude_patterns = vec![ - "/docs/", "/documentation/", "/guides/", // Documentation - "/assets/", "/images/", "/graphics/", "/designs/", // Assets and design-related files - "Gemfile", "Gemfile.lock", // Dependency files - "/config/", "/settings/", "/initializers/", // Configuration files - "/vendor/", "/third-party/", "/external/", // Third-party and vendor code - "/submodules/", // Git submodules - "/.github/", "/.gitignore", "/.gitmodules", "/.gitattributes", // Git and GitHub specific files - "/.gitlab-ci.yml", "/.travis.yml", "/appveyor.yml", // CI/CD configuration files - "/Dockerfile", "/docker-compose.yml", "/.dockerignore", // Docker files - "/.editorconfig", "/.rubocop.yml", "/.eslintignore", "/.eslintrc", // Linter and editor configuration - "/test/", "/spec/", "/tests/", "/specs/", // Test files and directories - "/locales/", "/i18n/", // Localization files - "/logs/", "/tmp/", // Logs and temporary files - "/public/", // Public assets - "/node_modules/", "/package.json", "/yarn.lock", // Node.js specific files - "/.env", "/.env.example", // Environment files - "/db/schema.rb", "/db/migrate/", // Database schema and migrations - "/scripts/", "/tools/", // Utility scripts and tools - "/CHANGELOG", "/LICENSE", "/README.md", // Project meta-files - ]; - - exclude_patterns - .iter() - .any(|pattern| file_path.contains(pattern)) -} - -fn generate_commit_diff(repo: &Repository, commit: &Commit) -> Result> { - let parent = commit.parents().next().unwrap_or_else(|| commit.clone()); - let tree = commit.tree().expect("Couldn't get commit tree"); - let parent_tree = parent.tree().expect("Couldn't get parent tree"); - let mut opts = DiffOptions::new(); - opts - .ignore_whitespace_change(true) - .recurse_untracked_dirs(false) - .recurse_ignored_dirs(false) - .ignore_whitespace_eol(true) - .ignore_blank_lines(true) - .include_untracked(false) - .ignore_whitespace(true) - .indent_heuristic(false) - .ignore_submodules(true) - .include_ignored(false) - .interhunk_lines(0) - .context_lines(0) - .patience(true) - .minimal(true); - - let diff = repo - .diff_tree_to_tree(Some(&parent_tree), Some(&tree), Some(&mut opts)) - .context("Failed to get diff")?; - - let mut patch: Vec = Vec::new(); - - #[rustfmt::skip] - diff.print(DiffFormat::Patch, |delta, _, line| { - // Ignore if line is a binary file - if line.origin() == 'B' { - return false; - } - - let file_path = delta.new_file().path().unwrap_or_else(|| delta.old_file().path().unwrap()); - - if should_exclude_path(file_path.to_str().unwrap()) { - return false; - } - - let content = line.content(); - patch.extend_from_slice(content); - - true - }).context("Failed to print diff")?; - - let content = String::from_utf8(patch).context("Failed to convert patch to string")?; - if content.split_whitespace().count() > 600 { - Ok(None) - } else { - Ok(Some(content)) - } -} diff --git a/src/bin/fine-tune-md.rs b/src/bin/fine-tune-md.rs deleted file mode 100644 index 4de2fc82..00000000 --- a/src/bin/fine-tune-md.rs +++ /dev/null @@ -1,190 +0,0 @@ -use std::io::Write; -use std::fs::File; - -use git2::{Commit, DiffFormat, DiffOptions, Repository}; -use anyhow::{Context, Result}; - -fn main() -> Result<()> { - env_logger::init(); - - let max_tokens = 49999; - let file_name = "fine-tune.md"; - let max_commits = 100; - - log::info!("Creating fine-tune file with {} commits and {} tokens", max_commits, max_tokens); - - let repo = Repository::open(".").context("Failed to open git repository")?; - // let config = repo.config().context("Couldn't access repository config")?; - // let user_email = config.get_string("user.email").context("Couldn't get user email")?; - let mut revwalk = repo.revwalk().context("Failed to create Revwalk")?; - let mut file = File::create(file_name).context("Failed to create file")?; - - file - .write_all(b"# Examples\n\nExamples of best practices for writing git commit messages:\n\n") - .context("Failed to write to file")?; - - revwalk.push_head().expect("Failed to push head"); - - let mut curr_size = 0; - let mut commit_count = 0; - - for oid in revwalk { - let oid = oid.context("Failed to get oid")?; - let commit = repo.find_commit(oid).context("Couldn't find commit")?; - - if commit.parent_count() > 1 { - continue; - } - - // let weight = if commit.author().email() == Some(&user_email) { - // 1 - // } else if commit.committer().email() == Some(&user_email) { - // 1 - // } else { - // 0 - // }; - - let Ok(Some(content)) = generate_commit_diff(&repo, &commit) else { - continue; - }; - - let Some(commit) = commit.message() else { - continue; - }; - - if commit.starts_with("Merge") { - continue; - } - - if commit.starts_with("Revert") { - continue; - } - - if commit.len() > 72 { - continue; - } - - // Check if it contains a new line - if commit.trim().contains("\n") { - continue; - } - - if commit.contains("[") && commit.contains("]") { - continue; - } - - let message = format!( - "## Example {}\n\n### GIT DIFF:\n\n{}\n### COMMIT MESSAGE:\n\n{}\n", - commit_count, content, commit - ); - - curr_size += message.split_whitespace().count(); - - if curr_size > max_tokens { - log::warn!("Max tokens reached: {}", max_tokens); - break; - } - - commit_count += 1; - - // append to file - - if commit_count >= max_commits { - break; - } - - file - .write_all(message.as_bytes()) - .context("Failed to write to file")?; - } - - log::info!( - "Wrote {} commits to train file and {} commits to validate file", - commit_count / 2, - commit_count / 2 - ); - - Ok(()) -} - -fn should_exclude_path(file_path: &str) -> bool { - let exclude_patterns = vec![ - "/docs/", "/documentation/", "/guides/", // Documentation - "/assets/", "/images/", "/graphics/", "/designs/", // Assets and design-related files - "Gemfile", "Gemfile.lock", // Dependency files - "/config/", "/settings/", "/initializers/", // Configuration files - "/vendor/", "/third-party/", "/external/", // Third-party and vendor code - "/submodules/", // Git submodules - "/.github/", "/.gitignore", "/.gitmodules", "/.gitattributes", // Git and GitHub specific files - "/.gitlab-ci.yml", "/.travis.yml", "/appveyor.yml", // CI/CD configuration files - "/Dockerfile", "/docker-compose.yml", "/.dockerignore", // Docker files - "/.editorconfig", "/.rubocop.yml", "/.eslintignore", "/.eslintrc", // Linter and editor configuration - "/test/", "/spec/", "/tests/", "/specs/", // Test files and directories - "/locales/", "/i18n/", // Localization files - "/logs/", "/tmp/", // Logs and temporary files - "/public/", // Public assets - "/node_modules/", "/package.json", "/yarn.lock", // Node.js specific files - "/.env", "/.env.example", // Environment files - "/db/schema.rb", "/db/migrate/", // Database schema and migrations - "/scripts/", "/tools/", // Utility scripts and tools - "/CHANGELOG", "/LICENSE", "/README.md", // Project meta-files - ]; - - exclude_patterns - .iter() - .any(|pattern| file_path.contains(pattern)) -} - -fn generate_commit_diff(repo: &Repository, commit: &Commit) -> Result> { - let parent = commit.parents().next().unwrap_or_else(|| commit.clone()); - let tree = commit.tree().expect("Couldn't get commit tree"); - let parent_tree = parent.tree().expect("Couldn't get parent tree"); - let mut opts = DiffOptions::new(); - opts - .ignore_whitespace_change(true) - .recurse_untracked_dirs(false) - .recurse_ignored_dirs(false) - .ignore_whitespace_eol(true) - .ignore_blank_lines(true) - .include_untracked(false) - .ignore_whitespace(true) - .indent_heuristic(false) - .ignore_submodules(true) - .include_ignored(false) - .interhunk_lines(0) - .context_lines(0) - .patience(true) - .minimal(true); - - let diff = repo - .diff_tree_to_tree(Some(&parent_tree), Some(&tree), Some(&mut opts)) - .context("Failed to get diff")?; - - let mut patch: Vec = Vec::new(); - - #[rustfmt::skip] - diff.print(DiffFormat::Patch, |delta, _, line| { - // Ignore if line is a binary file - if line.origin() == 'B' { - return false; - } - - let file_path = delta.new_file().path().unwrap_or_else(|| delta.old_file().path().unwrap()); - - if should_exclude_path(file_path.to_str().unwrap()) { - return false; - } - - let content = line.content(); - patch.extend_from_slice(content); - - true - }).context("Failed to print diff")?; - - let content = String::from_utf8(patch).context("Failed to convert patch to string")?; - if content.split_whitespace().count() > 600 { - Ok(None) - } else { - Ok(Some(content)) - } -} diff --git a/src/bin/fine-tune.rs b/src/bin/fine-tune.rs deleted file mode 100644 index 112ee3d6..00000000 --- a/src/bin/fine-tune.rs +++ /dev/null @@ -1,227 +0,0 @@ -// use std::io::Write; -// use std::fs::File; - -// use git2::{Commit, DiffFormat, DiffOptions, Repository}; -// use anyhow::{Context, Result}; -// use serde_json::json; - -// fn prompt() -> String { -// let language = config::APP.language; -// let max_length = config::APP.max_length; - -// format!("Create concise and meaningful git commit messages based on diffs, incorporating these practices: - -// - Language: {language}. -// - Maximum Length: {max_length} characters for the summary. -// - Structure: Begin with a clear summary. Use present tense. -// - Clarity and Relevance: Focus on detailing the changes and their reasons. Exclude irrelevant details. -// - Consistency: Maintain a consistent style of tense, punctuation, and capitalization. -// - Review: Ensure the commit message accurately reflects the changes made and their purpose without leaving the description blank. - -// Refer to examples.jsonl for examples of how commit messages can be mapped to git diffs") -// } - -// fn main() -> Result<()> { -// env_logger::init(); - -// let max_tokens = 16385; -// let validate_file_name = "validate.jsonl"; -// let train_file_name = "train.jsonl"; -// let max_commits = 20; - -// log::info!("Creating fine-tune file with {} commits and {} tokens", max_commits, max_tokens); - -// let repo = Repository::open(".").context("Failed to open git repository")?; -// // let config = repo.config().context("Couldn't access repository config")?; -// // let user_email = config.get_string("user.email").context("Couldn't get user email")?; -// let mut revwalk = repo.revwalk().context("Failed to create Revwalk")?; -// let mut validate_file = File::create(validate_file_name).context("Failed to create file")?; -// let mut train_file = File::create(train_file_name).context("Failed to create file")?; - -// validate_file.write_all(b"").context("Failed to write to file")?; -// train_file.write_all(b"").context("Failed to write to file")?; - -// revwalk.push_head().expect("Failed to push head"); - -// let mut curr_size = 0; -// let mut commit_count = 0; -// let mut result = vec![]; - -// for oid in revwalk { -// let oid = oid.context("Failed to get oid")?; -// let commit = repo.find_commit(oid).context("Couldn't find commit")?; - -// if commit.parent_count() > 1 { -// continue; -// } - -// // let weight = if commit.author().email() == Some(&user_email) { -// // 1 -// // } else if commit.committer().email() == Some(&user_email) { -// // 1 -// // } else { -// // 0 -// // }; - -// let Ok(Some(content)) = generate_commit_diff(&repo, &commit) else { -// continue; -// }; - -// let Some(commit) = commit.message() else { -// continue; -// }; - -// if commit.starts_with("Merge") { -// continue; -// } - -// if commit.starts_with("Revert") { -// continue; -// } - -// if commit.len() > 72 { -// continue; -// } - -// // Check if it contains a new line -// if commit.trim().contains("\n") { -// continue; -// } - -// if commit.contains("[") && commit.contains("]") { -// continue; -// } - -// let message = json!({ -// "messages": [ -// { "role": "assistant", "content": commit.trim() }, -// { "role": "user", "content": content.trim() }, -// { "role": "system", "content": "Convert from git patch into git commit message" } -// ] -// }); - -// let content = serde_json::to_string(&message)?; -// curr_size += content.split_whitespace().count(); - -// if curr_size > max_tokens { -// log::warn!("Max tokens reached: {}", max_tokens); -// break; -// } - -// commit_count += 1; - -// result.push(message); - -// if commit_count >= max_commits { -// break; -// } -// } - -// let train_result = result[..(result.len() / 2)].to_vec(); -// for (i, message) in train_result.iter().enumerate() { -// let content = serde_json::to_string(&message)?; -// if i > 0 { -// train_file.write_all(b"\n").context("Failed to write to file")?; -// } -// train_file.write_all(content.as_bytes()).context("Failed to write to file")?; -// } - -// let validate_result = result[(result.len() / 2)..].to_vec(); -// for (i, message) in validate_result.iter().enumerate() { -// let content = serde_json::to_string(&message)?; -// if i > 0 { -// validate_file.write_all(b"\n").context("Failed to write to file")?; -// } -// validate_file.write_all(content.as_bytes()).context("Failed to write to file")?; -// } - -// log::info!( -// "Wrote {} commits to train file and {} commits to validate file", -// commit_count / 2, -// commit_count / 2 -// ); - -// Ok(()) -// } - -// fn should_exclude_path(file_path: &str) -> bool { -// let exclude_patterns = vec![ -// "/docs/", "/documentation/", "/guides/", // Documentation -// "/assets/", "/images/", "/graphics/", "/designs/", // Assets and design-related files -// "Gemfile", "Gemfile.lock", // Dependency files -// "/config/", "/settings/", "/initializers/", // Configuration files -// "/vendor/", "/third-party/", "/external/", // Third-party and vendor code -// "/submodules/", // Git submodules -// "/.github/", "/.gitignore", "/.gitmodules", -// "/.gitattributes", // Git and GitHub specific files -// "/.gitlab-ci.yml", "/.travis.yml", "/appveyor.yml", // CI/CD configuration files -// "/Dockerfile", "/docker-compose.yml", "/.dockerignore", // Docker files -// "/.editorconfig", "/.rubocop.yml", "/.eslintignore", -// "/.eslintrc", // Linter and editor configuration -// "/test/", "/spec/", "/tests/", "/specs/", // Test files and directories -// "/locales/", "/i18n/", // Localization files -// "/logs/", "/tmp/", // Logs and temporary files -// "/public/", // Public assets -// "/node_modules/", "/package.json", "/yarn.lock", // Node.js specific files -// "/.env", "/.env.example", // Environment files -// "/db/schema.rb", "/db/migrate/", // Database schema and migrations -// "/scripts/", "/tools/", // Utility scripts and tools -// "/CHANGELOG", "/LICENSE", "/README.md", // Project meta-files -// ]; - -// exclude_patterns.iter().any(|pattern| file_path.contains(pattern)) -// } - -// fn generate_commit_diff(repo: &Repository, commit: &Commit) -> Result> { -// let parent = commit.parents().next().unwrap_or_else(|| commit.clone()); -// let tree = commit.tree().expect("Couldn't get commit tree"); -// let parent_tree = parent.tree().expect("Couldn't get parent tree"); -// let mut opts = DiffOptions::new(); -// opts -// .ignore_whitespace_change(true) -// .recurse_untracked_dirs(false) -// .recurse_ignored_dirs(false) -// .ignore_whitespace_eol(true) -// .ignore_blank_lines(true) -// .include_untracked(false) -// .ignore_whitespace(true) -// .indent_heuristic(false) -// .ignore_submodules(true) -// .include_ignored(false) -// .interhunk_lines(0) -// .context_lines(0) -// .patience(true) -// .minimal(true); - -// let diff = repo -// .diff_tree_to_tree(Some(&parent_tree), Some(&tree), Some(&mut opts)) -// .context("Failed to get diff")?; - -// let mut patch: Vec = Vec::new(); - -// #[rustfmt::skip] -// diff.print(DiffFormat::Patch, |delta, _, line| { -// // Ignore if line is a binary file -// if line.origin() == 'B' { -// return false; -// } - -// let file_path = delta.new_file().path().unwrap_or_else(|| delta.old_file().path().unwrap()); - -// if should_exclude_path(file_path.to_str().unwrap()) { -// return false; -// } - -// let content = line.content(); -// patch.extend_from_slice(content); - -// true -// }).context("Failed to print diff")?; - -// let content = String::from_utf8(patch).context("Failed to convert patch to string")?; -// if content.split_whitespace().count() > 600 { Ok(None) } else { Ok(Some(content)) } -// } - -fn main() { - println!("Hello, world!"); -} diff --git a/src/examples.rs b/src/examples.rs deleted file mode 100644 index c229c028..00000000 --- a/src/examples.rs +++ /dev/null @@ -1,95 +0,0 @@ -// Hook: prepare-commit-msg - -use std::path::Path; -use std::time::Duration; - -use colored::Colorize; -use git2::{DiffOptions, Repository, RepositoryOpenFlags}; -use anyhow::{Context, Result}; -use ai::config::APP; -use ai::commit; - -const MAX_NUMBER_OF_COMMITS: usize = 5; - -trait RepositoryExt { - fn get_last_n_commits(&self, max_commits: usize) -> Result, git2::Error>; -} - -impl RepositoryExt for Repository { - fn get_last_n_commits(&self, max_commits: usize) -> Result, git2::Error> { - let mut revwalk = self.revwalk()?; - revwalk.push_head()?; - Ok( - revwalk - .take(max_commits) - .map(move |id| { - self - .find_commit(id.unwrap()) - .expect("Failed to find commit") - }) - .collect() - ) - } -} - -// TODO: Duplicate code from src/commit.rs -trait CommitExt { - fn show(&self, repo: &Repository, max_tokens: usize) -> Result; -} - -impl CommitExt for git2::Commit<'_> { - fn show(&self, repo: &Repository, max_tokens: usize) -> Result { - let mut commit_info = "".to_string(); - let mut opts = DiffOptions::new(); - let tree = self.tree()?; - let parent_tree = self.parent(0).ok().as_ref().and_then(|c| c.tree().ok()); - let diff = repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&tree), Some(&mut opts))?; - - _ = diff - .print(git2::DiffFormat::Patch, |_delta, _hunk, line| { - commit_info.push_str(std::str::from_utf8(line.content()).unwrap()); - commit_info.len() < max_tokens - }) - .ok(); - - Ok(commit_info) - } -} - -use console::Style; -use indicatif::{ProgressBar, ProgressStyle}; - -pub async fn run(_args: &clap::ArgMatches) -> Result<()> { - let max_tokens = APP.max_tokens; - - let current_dir = std::env::current_dir().context("Failed to get current directory")?; - let repo = Repository::open_ext(¤t_dir, RepositoryOpenFlags::empty(), Vec::<&Path>::new())?; - let commits = repo - .get_last_n_commits(MAX_NUMBER_OF_COMMITS) - .context("Failed to get last commits")?; - - // Create and configure the progress bar - let spinner_style = ProgressStyle::default_spinner() - .tick_strings(&["-", "\\", "|", "/"]) - .template("{spinner:.blue} {msg}") - .context("Failed to create progress bar style")?; - - let pb = ProgressBar::new_spinner(); - pb.set_style(spinner_style); - pb.enable_steady_tick(Duration::from_millis(100)); - - let header_style = Style::new().bold(); - println!("{}", header_style.apply_to("🛠️ AI-Generated Commit Message Examples")); - - for (index, commit) in commits.iter().enumerate() { - pb.set_message(format!("Loading commit #{} ...\n", index + 1)); - let response = commit::generate(commit.show(&repo, max_tokens)?).await?; - - let commit_message = response.response.trim(); - pb.println(format!("Commit #{}:", index + 1)); - pb.println(format!("\tOriginal: {}", commit.message().unwrap_or_default().trim().italic())); - pb.println(format!("\tGenerated: {}", commit_message.italic())); - } - - Ok(()) -} diff --git a/train/Justfile b/train/Justfile deleted file mode 100644 index 6356e6ed..00000000 --- a/train/Justfile +++ /dev/null @@ -1,4 +0,0 @@ -set dotenv-load - -eval: - promptfoo eval diff --git a/train/prompt.md b/train/prompt.md deleted file mode 100644 index 47e9e27b..00000000 --- a/train/prompt.md +++ /dev/null @@ -1,20 +0,0 @@ -# Git Commit Message Generation - -## Instructions: - -You are an AI assistant that generates concise and meaningful git commit messages based on provided diffs. Please adhere to the following guidelines: - -- Language: English -- Summary Length: Up to 72 characters. -- Structure: Begin with a clear, present-tense summary. -- Content: Emphasize the changes and their rationale, excluding irrelevant details. -- Consistency: Maintain uniformity in tense, punctuation, and capitalization. -- Accuracy: Ensure the message accurately reflects the changes and their purpose. - -## Output: - -Your output should be a commit message generated from the input diff. - -## Input: - -INPUT: {{diff}} diff --git a/train/promptfooconfig.yaml b/train/promptfooconfig.yaml deleted file mode 100644 index d9cdec80..00000000 --- a/train/promptfooconfig.yaml +++ /dev/null @@ -1,108 +0,0 @@ ---- - -prompts: - - | - You are an AI assistant that generates concise and meaningful git commit messages based on provided diffs. Please adhere to the following guidelines: - - - Structure: Begin with a clear, present-tense summary. - - Content: Emphasize the changes and their rationale, excluding irrelevant details. - - Consistency: Maintain uniformity in tense, punctuation, and capitalization. - - Accuracy: Ensure the message accurately reflects the changes and their purpose. - - Present tense, imperative mood. (e.g., "Add x to y" instead of "Added x to y") - - ## Output: - - Your output should be a commit message generated from the input diff and nothing else. - - ## Input: - - INPUT: {{diff}} - -defaultTest: - options: - provider: openai:gpt-4-turbo - -providers: - # starcoder2 - - id: ollama:starcoder2:latest - label: starcoder2-temp-0-1 - config: - temperature: 0.1 - max_tokens: 72 - - id: ollama:starcoder2:latest - label: starcoder2-temp-0-7 - config: - temperature: 0.9 - max_tokens: 72 - # deepseek-coder - # - id: ollama:deepseek-coder:latest - # label: deepseek-coder-temp-0-1 - # config: - # temperature: 0.1 - # max_tokens: 72 - # - id: ollama:deepseek-coder:latest - # label: deepseek-coder-temp-0-7 - # config: - # temperature: 0.9 - # max_tokens: 72 - # codeup - - id: ollama:codeup:latest - label: codeup-temp-0-1 - config: - temperature: 0.1 - max_tokens: 72 - - id: ollama:codeup:latest - label: codeup-temp-0-7 - config: - temperature: 0.9 - max_tokens: 72 - # codellama - - id: ollama:codellama:13b - label: codellama-13b-temp-0-1 - config: - temperature: 0.1 - max_tokens: 72 - - id: ollama:codellama:13b - label: codellama-13b-temp-0-7 - config: - temperature: 0.9 - max_tokens: 72 - # codegemma - - id: ollama:codegemma:latest - label: temperature-0-1 - config: - temperature: 0.1 - max_tokens: 72 - - id: ollama:codegemma:latest - label: temperature-0-7 - config: - temperature: 0.9 - max_tokens: 72 - # gpt-4 - - id: openai:gpt-4 - label: gpt-4 - config: - max_tokens: 72 - temperature: 0.1 - - id: openai:gpt-4 - label: gpt-4 - config: - max_tokens: 72 - temperature: 0.7 -scenarios: - - config: - - vars: - diff: "diff --git a/README.md b/README.md\nindex 5f4a4b3..b3b1b3e 100644\n--- a/README.md\n+++ b/README.md\n@@ -1,3 +1,5 @@\n # AI Assistant\n\n+## Welcome\n+\n You are an AI assistant that generates concise and meaningful git commit messages based on provided diffs." - commit: "Added welcome section to readme" - tests: - - assert: - - type: factuality - value: "{{commit}}" - - type: llm-rubric - value: No wrapper text followed by :, like "Output:", "Commit message:" or "##" - - type: llm-rubric - value: "Ensure output has a similar mening to {{commit}}" - - type: llm-rubric - value: The output uses present tense and imperative mood, i.e., "Add x to y" instead of "Added x to y" - - type: javascript - value: Math.abs(output.length) < (72 * 1.3) diff --git a/train/scripts/train b/train/scripts/train deleted file mode 100755 index d81418cc..00000000 --- a/train/scripts/train +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env fish - -set repo_url "https://github.com/rails/rails.git" -set output_file "commits_diffs.csv" -set tmp_dir /tmp/test-git-dir-for-git-ai -set commit_limit 2 -set header "diff,commit" - -set -x GIT_DIR "$tmp_dir/.git" -echo "Using $GIT_DIR as GIT_DIR" - -if test -d "$tmp_dir" - echo "Using existing repo in $tmp_dir" -else - echo "Cloning $repo_url to $tmp_dir" - git clone "$repo_url" "$tmp_dir" -end - -echo $header >$output_file - -set commit_index 0 -set collected_commits 0 - -while test $collected_commits -lt $commit_limit - set i $commit_index - - echo "Processing commit $i" - - set commit_hash (git rev-parse HEAD~$i 2>/dev/null) - if test -z "$commit_hash" - echo "[BUG] Commit $i not found" - exit 1 - end - - set parent_count (git rev-list --count $commit_hash^@ 2>/dev/null) - if not set -q parent_count - echo "[BUG] Commit $commit_hash has no parent" - exit 1 - end - - set diff (git diff HEAD~$i^ HEAD~$i 2>/dev/null) - if not set -q diff - echo "[BUG] Diff not found for commit $i" - exit 1 - end - - # set diff (echo $diff | sed '1d') # Remove first line of diff - # set diff (echo $diff | sed -e ':a' -e 'N' -e '$!ba' -e 's/\n/ /g') # Remove trailing newline - - set newline_count (echo $diff | tr -cd '\n' | wc -c) - if test $newline_count -gt 1 - echo "Skipping commit $i with $newline_count newlines" - set commit_index (math $commit_index + 1) - continue - end - - if test (string length "$diff") -gt 100000 - echo "Skipping merge commit $i" - set commit_index (math $commit_index + 1) - continue - end - - set commit_msg (git log --format=%B -n 1 HEAD~$i 2>/dev/null) - if string match -q "*Merge pull*" $commit_msg - echo "Skipping commit $i with message: $commit_msg" - set commit_index (math $commit_index + 1) - continue - end - - echo "Commit: $commit_msg" - echo "Diff: $diff" - - echo "$diff,$commit_msg" >>$output_file - - set commit_index (math $commit_index + 1) - set collected_commits (math $collected_commits + 1) - echo "Remaining commits: $commit_limit" -end - -echo "Output stored in $output_file" diff --git a/train/tests.csv b/train/tests.csv deleted file mode 100644 index 82b985f9..00000000 --- a/train/tests.csv +++ /dev/null @@ -1,2 +0,0 @@ -diff,commit -services: - <%= dependency %> <%- end -%> <%- end -%> <%- if depends_on_system_test? -%> selenium: image: seleniarm/standalone-chromium restart: unless-stopped @@ -4310 +439 @@ services: - default volumes: - redis-data:/data <%- end -%> <%- end -%> <%= devcontainer_db_service_yaml(indentation: 4) %> <%- if !devcontainer_volumes.empty? -%> volumes: <%- devcontainer_volumes.each do |volume| -%>,Remove extra line generation from devecontainer compose.yaml From 301ac334ede9d07f0c1643d3e5c76a8255627e78 Mon Sep 17 00:00:00 2001 From: Linus Oleander Date: Tue, 28 May 2024 07:53:56 +0200 Subject: [PATCH 2/3] Remove examples module and update dependencies --- Cargo.lock | 12 ++++++------ Cargo.toml | 47 ++++++++++++++++++----------------------------- src/main.rs | 4 ---- 3 files changed, 24 insertions(+), 39 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f53c605e..fa0085e3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -88,9 +88,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.85" +version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27a4bd113ab6da4cd0f521068a6e2ee1065eab54107266a11835d02c8ec86a37" +checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" [[package]] name = "async-convert" @@ -1605,18 +1605,18 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.202" +version = "1.0.203" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "226b61a0d411b2ba5ff6d7f73a476ac4f8bb900373459cd00fab8512828ba395" +checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.202" +version = "1.0.203" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6048858004bcff69094cd972ed40a32500f153bd3be9f716b2eed2e8217c4838" +checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 7416e6b8..78ce6ed5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,43 +18,32 @@ path = "src/main.rs" name = "git-ai-hook" path = "src/bin/hook.rs" -[[bin]] -name = "git-ai-clear" -path = "src/bin/clear.rs" - [dependencies] -tokio = { version = "1.37.0", features = ["rt-multi-thread", "macros"] } -reqwest = { version = "0.11.27", features = ["json"] } -serde = { version = "1", features = ["derive"] } -serde_derive = "1.0.202" +anyhow = "1.0.86" async-openai = "0.18.3" -serde_json = "1.0.117" -env_logger = "0.10.2" -lazy_static = "1.4.0" -thiserror = "1.0.61" -indicatif = "0.17.8" -serde_ini = "0.2.0" -console = "0.15.8" -anyhow = "1.0.85" -dotenv = "0.15.0" -config = "0.13.4" +clap = { version = "4.5.4", features = ["derive", "wrap_help"] } colored = "2.1.0" +config = "0.13.4" +console = "0.15.8" ctrlc = "3.4.4" -log = "0.4.21" +dotenv = "0.15.0" +env_logger = "0.10.2" +git2 = { version = "0.18.3" } home = "0.5.9" - -[dependencies.clap] -features = ["derive", "wrap_help"] -default-features = true -version = "4.5.4" - -[dependencies.git2] -default-features = false -version = "0.18.3" +indicatif = "0.17.8" +lazy_static = "1.4.0" +log = "0.4.21" +reqwest = { version = "0.11.27", features = ["json"] } +serde = { version = "1", features = ["derive"] } +serde_derive = "1.0.203" +serde_ini = "0.2.0" +serde_json = "1.0.117" +thiserror = "1.0.61" +tokio = { version = "1.37.0", features = ["rt-multi-thread", "macros"] } [dev-dependencies] tempfile = "3.10.1" -anyhow = "1.0.85" +anyhow = "1.0.86" git2 = "0.18.3" rand = "0.8.5" diff --git a/src/main.rs b/src/main.rs index 62fc62b0..9a3a3e05 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,7 +2,6 @@ mod uninstall; mod install; mod reinstall; mod config; -mod examples; use clap::{Arg, Command}; use anyhow::Result; @@ -99,9 +98,6 @@ async fn main() -> Result<()> { } _ => unreachable!() }, - Some(("examples", args)) => { - examples::run(args).await?; - } _ => unreachable!() } From ebcbed362cd20eb12df8efb0a264591f99df5e81 Mon Sep 17 00:00:00 2001 From: Linus Oleander Date: Tue, 28 May 2024 07:55:58 +0200 Subject: [PATCH 3/3] Implement directory traversal function in filesystem.rs --- src/filesystem.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/filesystem.rs b/src/filesystem.rs index e4296cd3..3c797332 100644 --- a/src/filesystem.rs +++ b/src/filesystem.rs @@ -52,9 +52,9 @@ impl File { } impl From<&File> for Dir { - fn from(file: &File) -> Self { - file.parent() - } + fn from(file: &File) -> Self { + file.parent() + } } impl std::fmt::Display for File {