Description:

Classifies according to business and technical concerns a sample project (a microservice) PlantBasedPizza.LoyaltyPoints.
Classfication results stored as output.csv file in classification/results folder under the name of llm that performed classification.

Key Features:

- LLM Provider Abstraction: Strategy pattern with OpenAI, Anthropic, and Ollama providers
- File Scanner: Scans all 4 LoyaltyPoints projects for .cs and appsettings.json files
- Semantic Classification: Extracts business context, rules, workflows, and integration points
- CSV Output: Structured results for the vector database
- Intermediate Results: JSON files for debugging and comparison
- Provider Comparison: Test all 3 LLM providers on the same code

In [1]:
from classification.code_scanner import CodeScanner
from pathlib import Path
from classification.classification_pipeline import ClassificationPipeline
from dotenv import load_dotenv
import os


load_dotenv()

LOYALTY_POINTS_APPLICATION = os.getenv("LOYALTY_POINTS_PROJECT_ROOT")
print(LOYALTY_POINTS_APPLICATION)# Update this path

projects = [
    "PlantBasedPizza.LoyaltyPoints.Api.csproj",
    "PlantBasedPizza.LoyaltyPoints.Internal.csproj",
    "PlantBasedPizza.LoyaltyPoints.Shared.csproj",
    "PlantBasedPizza.LoyaltyPoints.Worker.csproj"
]

classification_results = "results/classification"
def create_model_folder(model):
    model_folder = Path(f"{classification_results}/{model}")
    model_folder.mkdir(parents=True, exist_ok=True)

    output_csv = f"{model_folder}/output.csv"
    intermediate_results = f"{model_folder}/intermediate_results"
    return output_csv, intermediate_results

code_scanner = CodeScanner(LOYALTY_POINTS_APPLICATION, projects)

D:/src/learning/dotnet/event-driven-course/module5/src/PlantBasedPizza.LoyaltyPoints/application


In [4]:
from llms.anthropic import AnthropicClassifier

anthropic_key = os.environ.get("ANTHROPIC_API_KEY")
output_csv, intermediate_results_dir = create_model_folder("claude4.0")
anth_provider = AnthropicClassifier(api_key=anthropic_key, model="claude-sonnet-4-0")
pipeline = ClassificationPipeline(anth_provider,
                                  scanner=code_scanner,
                                  output_csv=output_csv,
                                  intermediate_dir=intermediate_results_dir)

print("\n=== Running Classification with Anthropic ===")
anthropic_results = pipeline.run_classification()

print("\n=== Classification Complete ===")
print(f"Results saved to: {output_csv}")
print(f"Intermediate results in: {intermediate_results_dir}")

Initialized Anthropic provider with model: claude-sonnet-4-0

=== Running Classification with Anthropic ===
Starting classification with provider: Anthropic-claude-sonnet-4-0
Scanning code files...
Scanned 6 files from PlantBasedPizza.LoyaltyPoints.Api.csproj
Scanned 7 files from PlantBasedPizza.LoyaltyPoints.Internal.csproj
Scanned 13 files from PlantBasedPizza.LoyaltyPoints.Shared.csproj
Scanned 7 files from PlantBasedPizza.LoyaltyPoints.Worker.csproj
Found 33 files to classify
Classifying files...
Classifying 1/33: PlantBasedPizza.LoyaltyPoints.Api\ObservabilityExtensions.cs
Classifying 2/33: PlantBasedPizza.LoyaltyPoints.Api\Program.cs
Classifying 3/33: PlantBasedPizza.LoyaltyPoints.Api\appsettings.Development.json
Classifying 4/33: PlantBasedPizza.LoyaltyPoints.Api\appsettings.json
Classifying 5/33: PlantBasedPizza.LoyaltyPoints.Api\bin\Debug\net9.0\appsettings.Development.json
Classifying 6/33: PlantBasedPizza.LoyaltyPoints.Api\bin\Debug\net9.0\appsettings.json
Classifying 7/33: 

In [None]:
from llms.ollama import OllamaClassifier

output_csv, intermediate_results_dir = create_model_folder("codellama")
ollama_provider = OllamaClassifier(model="codellama:7b", base_url="http://localhost:11434")
pipeline = ClassificationPipeline(ollama_provider,
                                  scanner=code_scanner,
                                  output_csv=output_csv,
                                  intermediate_dir=intermediate_results_dir)

print("\n=== Running Classification with Ollama ===")
ollama_results = pipeline.run_classification()

print("\n=== Classification Complete ===")
print(f"Results saved to: {output_csv}")
print(f"Intermediate results in: {intermediate_results_dir}")

In [None]:
from llms.openai import OpenAIClassifier

openai_key = os.getenv("OPENAI_API_KEY")
output_csv, intermediate_results_dir = create_model_folder("gpt4.1")
openai_provider = OpenAIClassifier(api_key=openai_key, model="gpt-4.1-2025-04-14")
pipeline = ClassificationPipeline(openai_provider,
                                  scanner=code_scanner,
                                  output_csv=output_csv,
                                  intermediate_dir=intermediate_results_dir)

print("\n=== Running Classification with OpenAI ===")
openai_results = pipeline.run_classification()

print("\n=== Classification Complete ===")
print(f"Results saved to: {output_csv}")
print(f"Intermediate results in: {intermediate_results_dir}")