From e9bcd120d597ece207ecf0cb0a4fcef4f074a797 Mon Sep 17 00:00:00 2001 From: iamprecieee Date: Wed, 19 Feb 2025 22:44:10 +0100 Subject: [PATCH] feat(analyzer): add extra checks for quality of commit messages - add gibberish check to ensure message contains valid words - add context check to ensure messages contain a subject and body - update README to reflect changes Also, improve training data and examples to match standard commit messages --- README.md | 9 +- src/config/data.py | 144 +++++++++++++++---------------- src/config/integration_config.py | 4 +- src/core/analyzer.py | 76 +++++++++++++++- src/routers/telex.py | 2 +- tests/test_telex.py | 41 ++++++++- 6 files changed, 196 insertions(+), 80 deletions(-) diff --git a/README.md b/README.md index f77cea5..467e830 100644 --- a/README.md +++ b/README.md @@ -118,7 +118,7 @@ project_root/ ### Core Analysis Engine -The system implements a three-stage commit analysis pipeline: +The system implements a multi-step process to evaluate the quality of commit messages: #### Direct Pattern Matching - Matches against predefined commit types @@ -173,6 +173,13 @@ semantic_patterns = { } ``` +#### Content Quality +- It verifies that the commit message contains enough words. Messages with fewer than 5 words are flagged with a high-severity warning, while those with 5–9 words are flagged with a medium-severity warning. +- Scans the commit message for words that might be gibberish. + +#### Context Evaluation +- Ensures that the commit message provides adequate context. It looks for a clear separation between the subject line and the detailed body (detected via a double newline \n\n). If this separation is missing, the method suggests splitting the message to improve clarity. + ## API Documentation ### GitHub Webhook Endpoint diff --git a/src/config/data.py b/src/config/data.py index 29d799c..740bc72 100644 --- a/src/config/data.py +++ b/src/config/data.py @@ -12,96 +12,96 @@ } example_commits = { - "feat": "feat(auth): implement OAuth2 with role-based access", - "fix": "fix(api): resolve data race in concurrent requests", - "docs": "docs(api): update authentication documentation", - "refactor": "refactor(core): simplify error handling logic", - "chore": "chore(deps): update dependency versions to latest", - "style": "style(components): format according to style guide", - "perf": "perf(queries): optimize database index for faster lookups", - "test": "test(api): add integration tests for payment flow", + "feat": "feat(auth): implement OAuth2 with role-based access\n\nImplemented OAuth2 protocol with role-based control to enhance security and scalability.", + "fix": "fix(api): resolve data race in concurrent requests\n\nFixed a race condition by adding synchronization mechanisms to prevent concurrent data modifications.", + "docs": "docs(api): update authentication documentation\n\nUpdated API documentation to detail the new authentication methods and error handling procedures.", + "refactor": "refactor(core): simplify error handling logic\n\nRefactored error handling to remove redundancies and improve code maintainability.", + "chore": "chore(deps): update dependency versions to latest\n\nUpgraded dependencies to address security vulnerabilities and improve performance.", + "style": "style(components): format according to style guide\n\nReformatted code to comply with style guidelines for better readability and consistency.", + "perf": "perf(queries): optimize database index for faster lookups\n\nEnhanced database indexing strategy to improve query performance on large datasets.", + "test": "test(api): add integration tests for payment flow\n\nAdded integration tests to ensure reliable and consistent performance of the payment processing system." } commit_training_data = { "feat": [ - "feat(auth): implement JWT authentication flow", - "feat(ui): add dark mode toggle with system preference detection", - "feat(api): implement rate limiting middleware", - "feat(forms): add client-side form validation", - "feat(search): implement elasticsearch integration", - "feat(cache): add Redis caching layer for API responses", - "feat(auth): implement social login providers", - "feat(security): add two-factor authentication support", + "feat(auth): implement JWT authentication flow\n\nImplemented JWT-based authentication with token expiration handling to secure user sessions.", + "feat(ui): add dark mode toggle with system preference detection\n\nAdded dark mode toggle that automatically adjusts based on system settings for improved user experience.", + "feat(api): implement rate limiting middleware\n\nIntroduced rate limiting to prevent API abuse and ensure system stability under high load.", + "feat(forms): add client-side form validation\n\nImplemented real-time form validation to provide immediate feedback and improve data integrity.", + "feat(search): implement elasticsearch integration\n\nIntegrated Elasticsearch to boost search performance and enhance result accuracy.", + "feat(cache): add Redis caching layer for API responses\n\nAdded a Redis caching layer to reduce response times and improve overall scalability.", + "feat(auth): implement social login providers\n\nEnabled social login functionality to simplify the authentication process for users.", + "feat(security): add two-factor authentication support\n\nIntroduced two-factor authentication to enhance account security and reduce fraud risks.", ], "fix": [ - "fix(auth): resolve token refresh race condition", - "fix(api): handle concurrent request deadlocks", - "fix(validation): correct email regex pattern", - "fix(memory): resolve memory leak in WebSocket connections", - "fix(security): patch SQL injection vulnerability", - "fix(cors): resolve cross-origin request issues", - "fix(cache): handle cache invalidation edge cases", - "fix(ui): resolve mobile viewport rendering issues", + "fix(auth): resolve token refresh race condition\n\nFixed a race condition in the token refresh logic by implementing proper synchronization mechanisms.", + "fix(api): handle concurrent request deadlocks\n\nResolved API deadlocks by optimizing resource locking and request handling procedures.", + "fix(validation): correct email regex pattern\n\nUpdated the email validation regex to accurately handle various valid email formats.", + "fix(memory): resolve memory leak in WebSocket connections\n\nAddressed a memory leak by ensuring WebSocket connections are properly closed after use.", + "fix(security): patch SQL injection vulnerability\n\nPatched a SQL injection vulnerability by sanitizing user inputs and using parameterized queries.", + "fix(cors): resolve cross-origin request issues\n\nAdjusted CORS settings to correctly handle cross-origin requests and improve security.", + "fix(cache): handle cache invalidation edge cases\n\nFixed issues with cache invalidation to ensure data consistency across different layers.", + "fix(ui): resolve mobile viewport rendering issues\n\nCorrected viewport meta tag settings to improve rendering on mobile devices.", ], "docs": [ - "docs(api): update REST endpoints documentation", - "docs(setup): improve installation instructions", - "docs(auth): document OAuth2 implementation details", - "docs(deploy): add AWS deployment guide", - "docs(contributing): update PR guidelines", - "docs(api): add GraphQL schema documentation", - "docs(security): document security best practices", - "docs(testing): update e2e testing guide", + "docs(api): update REST endpoints documentation\n\nRevised REST API documentation to include detailed information on new endpoints and error handling.", + "docs(setup): improve installation instructions\n\nEnhanced installation guide with step-by-step instructions and troubleshooting tips for new users.", + "docs(auth): document OAuth2 implementation details\n\nProvided comprehensive documentation covering OAuth2 flows, configuration, and security considerations.", + "docs(deploy): add AWS deployment guide\n\nCreated a detailed guide for deploying the application on AWS, including best practices and configuration tips.", + "docs(contributing): update PR guidelines\n\nUpdated contributing guidelines to reflect new review processes and code standards.", + "docs(api): add GraphQL schema documentation\n\nIncluded detailed documentation for the GraphQL schema to help developers understand query structures.", + "docs(security): document security best practices\n\nOutlined security best practices and compliance requirements for developers and auditors.", + "docs(testing): update e2e testing guide\n\nRevised the end-to-end testing documentation with new scenarios and tool integrations.", ], "refactor": [ - "refactor(api): split monolithic controller into modules", - "refactor(db): optimize database query patterns", - "refactor(auth): separate authentication logic", - "refactor(middleware): improve error handling flow", - "refactor(utils): create shared utility functions", - "refactor(services): implement repository pattern", - "refactor(validation): centralize validation logic", - "refactor(config): improve configuration management", + "refactor(api): split monolithic controller into modules\n\nRefactored the API controller into modular components to enhance maintainability and scalability.", + "refactor(db): optimize database query patterns\n\nImproved database performance by optimizing complex queries and reducing unnecessary joins.", + "refactor(auth): separate authentication logic\n\nIsolated authentication logic into a dedicated module for clearer structure and easier testing.", + "refactor(middleware): improve error handling flow\n\nStreamlined error handling within middleware to ensure consistent responses across the application.", + "refactor(utils): create shared utility functions\n\nExtracted common code into shared utilities to reduce duplication and simplify maintenance.", + "refactor(services): implement repository pattern\n\nAdopted the repository pattern in the services layer to decouple business logic from data access.", + "refactor(validation): centralize validation logic\n\nCentralized various validation routines into a single module for consistency and reuse.", + "refactor(config): improve configuration management\n\nRefactored configuration handling by separating environment-specific settings into distinct files.", ], "chore": [ - "chore(deps): update package dependencies to latest", - "chore(ci): update GitHub Actions workflow", - "chore(docker): optimize container build process", - "chore(lint): update ESLint configuration", - "chore(git): update gitignore patterns", - "chore(deps): remove unused dependencies", - "chore(scripts): update build scripts", - "chore(types): update TypeScript definitions", + "chore(deps): update package dependencies to latest\n\nUpgraded all package dependencies to their latest versions to address security issues and improve performance.", + "chore(ci): update GitHub Actions workflow\n\nRevised the CI pipeline to streamline automated testing and deployment processes.", + "chore(docker): optimize container build process\n\nOptimized the Dockerfile to reduce image build times and improve container efficiency.", + "chore(lint): update ESLint configuration\n\nUpdated ESLint rules to enforce new coding standards and remove deprecated configurations.", + "chore(git): update gitignore patterns\n\nRefined the .gitignore file to exclude unnecessary files and reduce repository clutter.", + "chore(deps): remove unused dependencies\n\nCleaned up the project by removing outdated and unused dependencies to simplify maintenance.", + "chore(scripts): update build scripts\n\nEnhanced build scripts for better readability and efficiency during the deployment process.", + "chore(types): update TypeScript definitions\n\nUpdated TypeScript definition files to reflect recent changes in the codebase.", ], "style": [ - "style(css): align with design system guidelines", - "style(components): update button styling", - "style(layout): improve responsive grid system", - "style(theme): update color palette variables", - "style(forms): standardize input field styling", - "style(fonts): update typography system", - "style(animations): refine transition effects", - "style(icons): update icon system to SVG", + "style(css): align with design system guidelines\n\nUpdated CSS styles to conform with the latest design system standards for better consistency.", + "style(components): update button styling\n\nRefined button styling to improve visual hierarchy and overall usability in the UI.", + "style(layout): improve responsive grid system\n\nEnhanced the grid layout to ensure consistent behavior across multiple device sizes.", + "style(theme): update color palette variables\n\nModified theme variables to reflect new branding and improve the overall aesthetic appeal.", + "style(forms): standardize input field styling\n\nStandardized the styling of form inputs for a cohesive look throughout the application.", + "style(fonts): update typography system\n\nUpdated typography settings to enhance readability and maintain visual consistency.", + "style(animations): refine transition effects\n\nImproved transition effects for smoother animations and better user interaction.", + "style(icons): update icon system to SVG\n\nReplaced icon fonts with SVG icons to ensure scalability and clarity on all devices.", ], "perf": [ - "perf(images): implement lazy loading strategy", - "perf(api): add query result caching", - "perf(db): optimize database indices", - "perf(bundle): reduce JavaScript bundle size", - "perf(assets): implement CDN distribution", - "perf(queries): optimize database join operations", - "perf(cache): implement LRU caching strategy", - "perf(api): implement response compression", + "perf(images): implement lazy loading strategy\n\nImplemented lazy loading for images to defer off-screen loading and improve page load times.", + "perf(api): add query result caching\n\nIntroduced caching for API query results to reduce response times and lower server load.", + "perf(db): optimize database indices\n\nRevised database indices to accelerate query performance and reduce data retrieval latency.", + "perf(bundle): reduce JavaScript bundle size\n\nMinimized bundle size by removing unused code and optimizing dependency imports.", + "perf(assets): implement CDN distribution\n\nConfigured CDN distribution for static assets to boost load times and global accessibility.", + "perf(queries): optimize database join operations\n\nEnhanced join query efficiency to better handle large datasets and reduce processing time.", + "perf(cache): implement LRU caching strategy\n\nAdopted an LRU caching strategy to improve memory management and response speed.", + "perf(api): implement response compression\n\nEnabled compression for API responses to decrease payload size and improve transfer speeds.", ], "test": [ - "test(api): add integration tests for auth flow", - "test(ui): add unit tests for form validation", - "test(e2e): add checkout flow tests", - "test(utils): improve test coverage for helpers", - "test(auth): add OAuth callback tests", - "test(api): add load testing scenarios", - "test(security): add penetration testing suite", - "test(performance): add benchmark tests", + "test(api): add integration tests for auth flow\n\nAdded comprehensive integration tests to validate the authentication flow under various scenarios.", + "test(ui): add unit tests for form validation\n\nImplemented unit tests to ensure that all form validations perform correctly and reliably.", + "test(e2e): add checkout flow tests\n\nDeveloped end-to-end tests to simulate the complete checkout process and identify any issues.", + "test(utils): improve test coverage for helpers\n\nEnhanced test coverage for utility functions to catch edge cases and improve overall stability.", + "test(auth): add OAuth callback tests\n\nAdded tests specifically for OAuth callback functionality to ensure proper third-party integration.", + "test(api): add load testing scenarios\n\nImplemented load testing to evaluate API performance under high traffic conditions.", + "test(security): add penetration testing suite\n\nIntroduced a penetration testing suite to identify and mitigate potential security vulnerabilities.", + "test(performance): add benchmark tests\n\nAdded benchmark tests to measure performance improvements and track regression over time.", ], } diff --git a/src/config/integration_config.py b/src/config/integration_config.py index 071c16b..111e5f4 100644 --- a/src/config/integration_config.py +++ b/src/config/integration_config.py @@ -39,14 +39,14 @@ def generate_json_config(): "type": "text", "required": True, "description": "Set example commits for each custom commit type to guide new devs. These appear in suggestions when similar commits need fixing. Format: {'type1': 'example message1', 'type2': 'example message 2'}.", - "default": "{'feat': 'feat(auth): implement 0Auth2 with role-based access', 'fix': 'fix(api): resolve data race in concurrent requests'}" + "default": "{'feat': 'feat(auth): implement OAuth2 with role-based access\n\nImplemented OAuth2 protocol with role-based control to enhance security and scalability.', 'fix': 'fix(api): resolve data race in concurrent requests\n\nFixed a race condition by adding synchronization mechanisms to prevent concurrent data modifications.'}" }, { "label": "Training Data", "type": "text", "required": True, "description": "Add custom data to train the analyzer with commits that match preferred style. More examples = better suggestions. Format: {'type1': ['example1', 'example2'], 'type2': ['example3', 'example4']}. The analyzer learns from these to better match preferred conventions.", - "default": "{'feat': ['feat(ui): add dark mode toggle with system preference detection','feat(auth): implement JWT authentication flow','feat(api): implement rate limiting middleware','feat(forms): add client-side form validation','feat(search): implement elasticsearch integration','feat(cache): add Redis caching layer for API responses','feat(auth): implement social login providers','feat(security): add two-factor authentication support']}" + "default": "{'feat': ['feat(auth): implement OAuth2 with role-based access\n\nImplemented OAuth2 protocol with role-based control to enhance security and scalability.','feat(search): implement elasticsearch integration\n\nIntegrated Elasticsearch to boost search performance and enhance result accuracy.']}" } ], "target_url": settings.target_url diff --git a/src/core/analyzer.py b/src/core/analyzer.py index 96d43ec..3db4dee 100644 --- a/src/core/analyzer.py +++ b/src/core/analyzer.py @@ -9,7 +9,7 @@ from sklearn.metrics.pairwise import cosine_similarity from .models import CommitIssue from datetime import datetime -import ast +import ast, string class CommitAnalyzer: @@ -114,11 +114,83 @@ def _suggest_commit_type(self, message: str) -> str: return max(semantic_scores.items(), key=lambda x: x[1])[0] return "chore" - + + def _check_gibberish(self, word: str) -> bool: + """ + Determines if a word is likely to be gibberish based on vowel content. + + Criteria for identifying gibberish: + - Words shorter than 2 characters must contain at least one vowel + - Words 2 characters or longer must have a vowel ratio of at least 0.2 + """ + cleaned_word = word.strip(string.punctuation) + if not cleaned_word: + return False + + vowels = set("aeiouyAEIOUY") + vowel_count = sum(1 for char in cleaned_word if char in vowels) + + if len(cleaned_word) < 2: + return vowel_count == 0 + else: + vowel_ratio = vowel_count / len(cleaned_word) + return vowel_ratio < 0.2 + + def _check_content_quality(self, message: str) -> list[CommitIssue]: + """ + Assesses the quality of the commit message content. + Checks if the message is too short or lacks sufficient detail. + Also checks the commit message for potential gibberish words. + """ + issues = [] + words = message.split() + word_count = len(words) + if word_count < 5: + issues.append(CommitIssue( + severity="high", + message="Commit message is too short", + suggestion="Try providing a brief summary that explains what change was made and why." + )) + elif word_count < 10: + issues.append(CommitIssue( + severity="medium", + message="Commit message might be too brief", + suggestion="Consider adding a bit more detail." + )) + + gibberish_words = [ + word.strip(string.punctuation) + for word in words + if self._check_gibberish(word) and word.strip(string.punctuation) + ] + if gibberish_words: + issues.append(CommitIssue( + severity="high", + message="Potential gibberish words detected in commit message", + suggestion=f"Review and correct the following words: {', '.join(gibberish_words)}" + )) + return issues + + def _check_context(self, message: str) -> list[CommitIssue]: + """ + Evaluates whether the commit message provides adequate context. + Checks for and suggests separation of the message into a subject and a detailed body if needed. + """ + issues = [] + if "\n\n" not in message: + issues.append(CommitIssue( + severity="medium", + message="Commit message may be missing detailed context", + suggestion="Consider splitting your commit message into a concise subject and a detailed body." + )) + return issues + def analyze_commit(self, message: str) -> list[CommitIssue]: """Analyzes a commit message and returns any quality issues found.""" issues = [] issues.extend([*self._check_format(message)]) + issues.extend([*self._check_content_quality(message)]) + issues.extend([*self._check_context(message)]) return [issue for issue in issues if issue] def format_analysis(self, commit: dict, issues: list[CommitIssue]) -> str: diff --git a/src/routers/telex.py b/src/routers/telex.py index 5a7f8c9..79e5890 100644 --- a/src/routers/telex.py +++ b/src/routers/telex.py @@ -23,7 +23,7 @@ async def telex_webhook( results to Slack if issues are found. """ try: - commit_message = ast.literal_eval(payload.message) + commit_message = ast.literal_eval(payload.message.replace("\n", "\\n")) except Exception as e: raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, diff --git a/tests/test_telex.py b/tests/test_telex.py index 3e633a6..971ebdb 100644 --- a/tests/test_telex.py +++ b/tests/test_telex.py @@ -6,7 +6,7 @@ def test_send_from_telex_success(): response = client.post( "/webhook/telex?is_test=true", json={ - "message": '[{"id": "8ce4cf04f4rw6w8600675237350b14b4", "message": "cleanup user auth", "timestamp": "2025-02-18T10:17:54+01:00", "url": "https://github.com/8", "author": {"name": "test", "email": "test@gmail.com"}}]', + "message": '[{"id": "8ce4cf04f4rw6w8600675237350b14b4", "message": "fix(auth): child\n\nFixed a race condition in the token refresh logic by implementing proper synchronization mechanisms.", "timestamp": "2025-02-18T10:17:54+01:00", "url": "https://github.com/8", "author": {"name": "test", "email": "test@gmail.com"}}]', "settings": [ { "label": "commit_types", @@ -14,10 +14,47 @@ def test_send_from_telex_success(): "description": "Custom commit types and keywords", "required": False, "default": "{'feat': ['add', 'implement', 'new', 'introduce'], 'fix': ['fix', 'resolve', 'patch', 'address']}", + }, + { + "label": "Example Commits", + "type": "text", + "required": True, + "description": "Set example commits for each custom commit type to guide new devs. These appear in suggestions when similar commits need fixing. Format: {'type1': 'example message1', 'type2': 'example message 2'}.", + "default": "{'feat': 'feat(auth): implement OAuth2 with role-based access\n\nImplemented OAuth2 protocol with role-based control to enhance security and scalability.', 'fix': 'fix(api): resolve data race in concurrent requests\n\nFixed a race condition by adding synchronization mechanisms to prevent concurrent data modifications.'}" + } + ], + }, + ) + assert response.status_code == 200 + + + + + +def test_send_from_telex_failure(): + response = client.post( + "/webhook/telex?is_test=true", + json={ + "message": '[{"id": "8ce4cf04f4rw6w8600675237350b14b4", "message": "fix(auth): child\n\nFixed a race condcvghdczhjvjhzcvhjvzhjvhjvczjonization mechanisms.", "timestamp": "2025-02-18T10:17:54+01:00", "url": "https://github.com/8", "author": {"name": "test", "email": "test@gmail.com"}}]', + "settings": [ + { + "label": "commit_types", + "type": "text", + "description": "Custom commit types and keywords", + "required": False, + "default": "{'feat': ['add', 'implement', 'new', 'introduce'], 'fix': ['fix', 'resolve', 'patch', 'address']}", + }, + { + "label": "Example Commits", + "type": "text", + "required": True, + "description": "Set example commits for each custom commit type to guide new devs. These appear in suggestions when similar commits need fixing. Format: {'type1': 'example message1', 'type2': 'example message 2'}.", + "default": "{'feat': 'feat(auth): implement OAuth2 with role-based access\n\nImplemented OAuth2 protocol with role-based control to enhance security and scalability.', 'fix': 'fix(api): resolve data race in concurrent requests\n\nFixed a race condition by adding synchronization mechanisms to prevent concurrent data modifications.'}" } ], }, ) assert response.status_code == 200 response_data = json.loads(response.content.decode()) - assert "refactor" in response_data + for word in ("Potential gibberish words", "too brief"): + assert word in response_data