# Jigsaw - Agile Community Rules Classification
### https://www.kaggle.com/competitions/jigsaw-agile-community-rules

## Constrained Generation for Reddit Content Moderation: Binary Classification Using Logit Probabilities
Technical Overview
This notebook implements a constrained generation approach for automated content moderation using a Llama 3.2-1B model. Instead of relying on traditional text parsing, we employ logits processors to restrict the model's output vocabulary to only "True" and "False" tokens, then extract probabilistic confidence scores directly from the model's logit distributions.
A reference notebook from  https://www.kaggle.com/code/xbar19/jigsaw-llama3-1-8b-instruct-fine-tuned is appreciated.

## Install packages on Kaggle: Add-ons > Install Dependencies 

```bash
pip install pip3-autoremove
pip install torch torchvision torchaudio xformers --index-url https://download.pytorch.org/whl/cu124
pip install unsloth vllm
pip install scikit-learn
```

In [22]:
import kagglehub
import pandas as pd
import os
import glob

# Check if running on Kaggle
if 'KAGGLE_KERNEL_RUN_TYPE' in os.environ:
    # Running on Kaggle
    base_path = "/kaggle/input/jigsaw-agile-community-rules/"
    df_train = pd.read_csv(f"{base_path}train.csv")
    df_test = pd.read_csv(f"{base_path}test.csv")
else:
    # Running locally
    base_path = "./data/synthetic_generation/"
    
    # Find all train files
    train_files = glob.glob(f"{base_path}*train*.csv")
    if train_files:
        train_dfs = [pd.read_csv(file) for file in train_files]
        df_train = pd.concat(train_dfs, ignore_index=True)
        print(f"Concatenated {len(train_files)} train files: {train_files}")
    else:
        raise FileNotFoundError(f"No train files found in {base_path}")
    
    # Find all test files
    test_files = glob.glob(f"{base_path}*test*.csv")
    if test_files:
        test_dfs = [pd.read_csv(file) for file in test_files]
        df_test = pd.concat(test_dfs, ignore_index=True)
        print(f"Concatenated {len(test_files)} test files: {test_files}")
    else:
        raise FileNotFoundError(f"No test files found in {base_path}")

print(f"Using path: {base_path}")
print(f"Train shape: {df_train.shape}")
print(f"Test shape: {df_test.shape}")
df_train.head(2)

Concatenated 2 train files: ['./data/synthetic_generation/synthetic_data_train_2.csv', './data/synthetic_generation/synthetic_data_train_1.csv']
Concatenated 1 test files: ['./data/synthetic_generation/synthetic_data_test.csv']
Using path: ./data/synthetic_generation/
Train shape: (10000, 10)
Test shape: (1000, 10)


Unnamed: 0,subreddit,rule,raw_generated_data,error,positive_example_1,negative_example_1,positive_example_2,negative_example_2,test_comment,violates_rule
0,theater,"""No Piracy: Discussion or sharing of illegal d...","Positive Example 1: ""Hey everyone, does anyone...",,"""Hey everyone, does anyone know where I can fi...","""I'm researching the legal implications of fan...","""Guys, I just found a link to a free streaming...","""I'm so excited to see 'Hadestown' next month!...","""I just saw a great student production of 'Lit...",False
1,frugal,"""Provide realistic timelines and expectations ...","Positive Example 1: ""I can fix your car! Just ...",,"""I can fix your car! Just bring it to my place...","""I'm happy to take a look at your car issue. I...","""I can do your taxes super cheap! I'll get the...","""I'm a CPA and offer tax preparation services....","""Hey, I'm a freelance graphic designer. I can ...",False


In [23]:
df_test=df_test.dropna(subset=["violates_rule"])
print(df_test.shape)

(992, 10)


## Load LLM (llama-3.2-1B) model with vLLM (Suitable for batch inference) (logits output)

In [2]:
import multiprocessing as mp
mp.set_start_method('spawn', force=True)
import os
os.environ['VLLM_USE_V1'] = '0'  # Force V0 for logits processor support

import torch
import numpy as np
import pandas as pd
from vllm import LLM, SamplingParams
from transformers import LogitsProcessor
import math
from vllm.lora.request import LoRARequest


class TrueFalseLogitsProcessor(LogitsProcessor):
    """Forces model to only output True or False tokens"""
    def __init__(self, allowed_ids):
        self.allowed_ids = allowed_ids
        
    def __call__(self, input_ids, scores: torch.Tensor) -> torch.Tensor:
        # Create a mask that's -inf for all tokens except allowed ones
        mask = torch.full_like(scores, float('-inf'))
        mask[self.allowed_ids] = 0
        
        # Apply the mask to force only allowed tokens
        scores = scores + mask
        return scores

class LlamaClassifier:
    def __init__(self):
        # Model path selection
        if os.getenv('KAGGLE_KERNEL_RUN_TYPE'):
            self.model_path = "/kaggle/input/llama-3.2/transformers/1b-instruct/1"
        else:
            self.model_path = "unsloth/Llama-3.2-1B-Instruct"
        
        # Initialize model with LoRA support
        self.model = LLM(
            model=self.model_path,
            max_model_len=1024,
            gpu_memory_utilization=0.5,
            dtype="half",
            seed=123,
            enable_lora=True,  # Enable LoRA support
            max_lora_rank=64,  # Adjust based on your LoRA configuration
            max_loras=1,       # Maximum number of LoRA adapters to load
        )
        
        self.tokenizer = self.model.get_tokenizer()
        self.setup_token_constraints()
        
        # Sampling with constrained output
        logits_processors = [TrueFalseLogitsProcessor(self.KEEP)]
        self.sampling_params = SamplingParams(
            n=1,
            temperature=0,
            seed=777,
            skip_special_tokens=True,
            max_tokens=1,
            logits_processors=logits_processors,
            logprobs=2
        )
    
    def setup_token_constraints(self):
        """Get token IDs for 'False' and 'True'"""
        choices = ["False", "True"]
        self.KEEP = []
        for x in choices:
            c = self.tokenizer.encode(x, add_special_tokens=False)[0]
            self.KEEP.append(c)
        
        self.false_token_id = self.KEEP[0]
        self.true_token_id = self.KEEP[1]
        print(f"Constrained to tokens: {self.KEEP} = {choices}")

    def create_lora_request(self, lora_adapter_path, adapter_name="custom_adapter"):
        """Create LoRA request object"""
        return LoRARequest(
            lora_name=adapter_name,
            lora_int_id=1,
            lora_local_path=lora_adapter_path
        )
    
    def create_prompt(self, input_data: pd.Series):
        return f"""Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request.

### Instruction:
You are a really experienced moderator for the subreddit /r/{input_data['subreddit']}. 
Your job is to determine if the following reported comment violates the given rule.
Answer with only "True" or "False".

### Input:
Rule: {input_data['rule']}

Example 1:
{self.format_comment(input_data['positive_example_1'])}
Rule violation: True

Example 2:
{self.format_comment(input_data['negative_example_1'])}
Rule violation: False

Example 3:
{self.format_comment(input_data['positive_example_2'])}
Rule violation: True

Example 4:
{self.format_comment(input_data['negative_example_2'])}
Rule violation: False

Test sentence:
{self.format_comment(input_data['test_comment'])}

### Response:
Rule violation:"""
    
    def format_comment(self, comment):
        return "\n".join(["| " + line for line in comment.split('\n')])
    
    def predict_classification(self, input_data: pd.Series, lora_adapter_path=None):
        """Single prediction with optional LoRA"""
        prompt = self.create_prompt(input_data)
        
        # Add LoRA request if path provided
        generate_kwargs = {"use_tqdm": False}
        if lora_adapter_path:
            generate_kwargs["lora_request"] = self.create_lora_request(lora_adapter_path)
        
        responses = self.model.generate([prompt], self.sampling_params, **generate_kwargs)
        
        response = responses[0]
        predicted_text = response.outputs[0].text.strip()
        
        try:
            x = response.outputs[0].logprobs[0]
            
            # Extract probabilities for True/False tokens
            logprobs = []
            for k in self.KEEP:
                if k in x:
                    logprobs.append(math.exp(x[k].logprob))
                else:
                    logprobs.append(0)
            
            logprobs = np.array(logprobs)
            logprobs /= (logprobs.sum() + 1e-15)
            
            violation_probability = logprobs[1]  # True probability
            confidence = max(logprobs)
            
        except Exception as e:
            print(f"Error: {e}")
            violation_probability = 0.5
            confidence = 0.5
        
        return {
            'prediction': predicted_text,
            'is_violation': violation_probability > 0.5,
            'violation_probability': violation_probability,
            'confidence': confidence
        }
    
    def predict_batch(self, input_data_list, verbose=False, lora_adapter_path=None):
        """Batch predictions with optional LoRA"""
        prompts = [self.create_prompt(data) for data in input_data_list]
        
        # Add LoRA request if path provided
        generate_kwargs = {"use_tqdm": True}
        if lora_adapter_path:
            generate_kwargs["lora_request"] = self.create_lora_request(lora_adapter_path)
        
        responses = self.model.generate(prompts, self.sampling_params, **generate_kwargs)
        
        results = []
        for i, response in enumerate(responses):
            try:
                predicted_text = response.outputs[0].text.strip()
                x = response.outputs[0].logprobs[0]
                
                # Extract probabilities
                logprobs = []
                for k in self.KEEP:
                    if k in x:
                        logprobs.append(math.exp(x[k].logprob))
                    else:
                        logprobs.append(0)
                
                logprobs = np.array(logprobs)
                logprobs /= (logprobs.sum() + 1e-15)
                
                violation_probability = logprobs[1]
                confidence = max(logprobs)
                
            except Exception as e:
                print(f"Error {i}: {e}")
                violation_probability = 0.5
                confidence = 0.5
                predicted_text = "Error"
            
            result = {
                'prediction': predicted_text,
                'is_violation': violation_probability > 0.5,
                'violation_probability': violation_probability,
                'confidence': confidence,
                'sample_index': i
            }
            
            # if not verbose:
            #     print(f"Sample {i+1}: {predicted_text} (prob: {violation_probability:.4f})")
            
            results.append(result)
        
        return results

    # Convenience methods for LoRA usage
    def predict_with_lora(self, input_data: pd.Series, lora_adapter_path):
        """Single prediction using LoRA adapter"""
        return self.predict_classification(input_data, lora_adapter_path)
    
    def predict_batch_with_lora(self, input_data_list, lora_adapter_path, verbose=False):
        """Batch predictions using LoRA adapter"""
        return self.predict_batch(input_data_list, verbose, lora_adapter_path)

INFO 08-09 00:02:18 [__init__.py:235] Automatically detected platform cuda.


## Instantiate the model=vLLM

In [3]:
model=LlamaClassifier()

INFO 08-09 00:02:26 [config.py:1604] Using max model len 1024
INFO 08-09 00:02:26 [llm_engine.py:228] Initializing a V0 LLM engine (v0.10.0) with config: model='unsloth/Llama-3.2-1B-Instruct', speculative_config=None, tokenizer='unsloth/Llama-3.2-1B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config={}, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=1024, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto,  device_config=cuda, decoding_config=DecodingConfig(backend='xgrammar', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_backend=''), observability_config=ObservabilityConfig(show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None), seed=123, served_model_name=unsloth/Llama-3.2

Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


INFO 08-09 00:02:31 [default_loader.py:262] Loading weights took 0.81 seconds
INFO 08-09 00:02:31 [punica_selector.py:19] Using PunicaWrapperGPU.
INFO 08-09 00:02:31 [model_runner.py:1115] Model loading took 2.4357 GiB and 2.238438 seconds
INFO 08-09 00:02:32 [worker.py:295] Memory profiling takes 1.08 seconds
INFO 08-09 00:02:32 [worker.py:295] the current vLLM instance can use total_gpu_memory (15.69GiB) x gpu_memory_utilization (0.50) = 7.85GiB
INFO 08-09 00:02:32 [worker.py:295] model weights take 2.44GiB; non_torch_memory takes 0.05GiB; PyTorch activation peak memory takes 1.18GiB; the rest of the memory reserved for KV Cache is 4.18GiB.
INFO 08-09 00:02:32 [executor_base.py:113] # cuda blocks: 8559, # CPU blocks: 8192
INFO 08-09 00:02:32 [executor_base.py:118] Maximum concurrency for 1024 tokens per request: 133.73x
INFO 08-09 00:02:34 [model_runner.py:1385] Capturing cudagraphs for decoding. This may lead to unexpected consequences if the model is not static. To run the model in

Capturing CUDA graph shapes:   0%|          | 0/35 [00:00<?, ?it/s]

INFO 08-09 00:02:46 [model_runner.py:1537] Graph capturing finished in 13 secs, took 0.24 GiB
INFO 08-09 00:02:46 [llm_engine.py:424] init engine (profile, create kv cache, warmup model) took 15.32 seconds
Constrained to tokens: [4139, 2575] = ['False', 'True']


## Prediction for test-dataset (batch)

In [24]:
# from tqdm import tqdm
# import numpy as np
# print(df_train.shape)
# def process_dataframe_in_batches(model, df, batch_size=12):
#     """Process dataframe using batch predictions with progress bar"""
    
#     # Calculate number of batches
#     num_batches = len(df) // batch_size + (1 if len(df) % batch_size > 0 else 0)
    
#     all_results = []
    
#     # Process in batches with progress bar
#     with tqdm(total=len(df), desc="Processing predictions") as pbar:
#         for i in range(0, len(df), batch_size):
#             # Get current batch
#             batch_df = df.iloc[i:i+batch_size]
            
#             # Convert batch to list of Series (input format for predict_batch)
#             batch_list = [row for _, row in batch_df.iterrows()]  # Fixed: removed asterisks and fixed variable name
            
#             # Get predictions for this batch
#             batch_results = model.predict_batch(batch_list)
            
#             # Add to results
#             all_results.extend(batch_results)
            
#             # Update progress bar
#             pbar.update(len(batch_df))
#             pbar.set_postfix({'Batch': f'{i//batch_size + 1}/{num_batches}'})
    
#     return all_results

# # Process in batches
# predictions = process_dataframe_in_batches(model, df_test, batch_size=12)
# df_test['predicted_rule_violation'] = [pred['prediction'] for pred in predictions] 

In [25]:
from tqdm import tqdm
import numpy as np

def process_dataframe_in_batches(model, df, batch_size=12):
    """Process dataframe using batch predictions with progress bar and error handling"""
    
    # Calculate number of batches
    num_batches = len(df) // batch_size + (1 if len(df) % batch_size > 0 else 0)
    
    all_results = []
    failed_batches = []
    failed_indices = []
    
    # Process in batches with progress bar
    with tqdm(total=len(df), desc="Processing predictions") as pbar:
        for i in range(0, len(df), batch_size):
            # Get current batch
            batch_df = df.iloc[i:i+batch_size]
            current_batch_num = i//batch_size + 1
            
            try:
                # Convert batch to list of Series (input format for predict_batch)
                batch_list = [row for idx, row in batch_df.iterrows()]  # Fixed syntax
                
                # Get predictions for this batch
                batch_results = model.predict_batch(batch_list)
                
                # Add batch indices to results for tracking
                for j, result in enumerate(batch_results):
                    result['original_index'] = batch_df.index[j]
                
                # Add to results
                all_results.extend(batch_results)
                
            except Exception as e:
                # Log the error and continue with next batch
                print(f"\nError processing batch {current_batch_num}: {str(e)}")
                failed_batches.append(current_batch_num)
                batch_indices = batch_df.index.tolist()
                failed_indices.extend(batch_indices)
                
                # Create error results for all items in failed batch
                for idx in batch_indices:
                    error_result = {
                        'prediction': 'Error',
                        'is_violation': False,  # Default to False for errors
                        'violation_probability': 0.0,
                        'confidence': 0.0,
                        'original_index': idx,
                        'error': f"Batch {current_batch_num} failed: {str(e)}",
                        'batch_error': True
                    }
                    all_results.append(error_result)
            
            # Update progress bar
            pbar.update(len(batch_df))
            pbar.set_postfix({
                'Batch': f'{current_batch_num}/{num_batches}',
                'Failed': len(failed_batches)
            })
    
    # Print summary
    if failed_batches:
        print(f"\nProcessing completed with {len(failed_batches)} failed batches.")
        print(f"Failed batch numbers: {failed_batches}")
        print(f"Total failed rows: {len(failed_indices)}")
    else:
        print(f"\nAll {num_batches} batches processed successfully!")
    
    return all_results, failed_batches, failed_indices

# Process in batches with error handling
predictions, failed_batches, failed_indices = process_dataframe_in_batches(model, df_test, batch_size=12)

# Extract predictions and handle errors
df_test['predicted_rule_violation'] = [pred['prediction'] for pred in predictions]
df_test['prediction_error'] = [pred.get('error', '') for pred in predictions]
df_test['batch_failed'] = [pred.get('batch_error', False) for pred in predictions]

# Optional: Check results
print(f"Total predictions: {len(predictions)}")
print(f"Failed batches: {len(failed_batches)}")
print(f"Failed rows: {len(failed_indices)}")
if len(predictions) > 0:
    success_rate = ((len(predictions) - len(failed_indices)) / len(predictions)) * 100
    print(f"Success rate: {success_rate:.2f}%")

(10000, 10)


Processing predictions:   0%|                           | 0/992 [00:00<?, ?it/s]

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:   1%| | 12/992 [00:00<00:12, 77.07it/s, Batch=1/83, Fail

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:   2%| | 24/992 [00:00<00:11, 81.19it/s, Batch=2/83, Fail

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:   4%| | 36/992 [00:00<00:11, 84.09it/s, Batch=3/83, Fail

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:   5%| | 48/992 [00:00<00:11, 82.17it/s, Batch=4/83, Fail

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:   6%| | 60/992 [00:00<00:11, 83.62it/s, Batch=5/83, Fail

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:   7%| | 72/992 [00:00<00:10, 84.56it/s, Batch=6/83, Fail

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:   8%| | 84/992 [00:00<00:10, 87.45it/s, Batch=7/83, Fail

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  10%| | 96/992 [00:01<00:10, 87.44it/s, Batch=8/83, Fail

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  11%| | 108/992 [00:01<00:10, 87.19it/s, Batch=9/83, Fai

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  12%| | 120/992 [00:01<00:10, 84.98it/s, Batch=10/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  13%|▏| 132/992 [00:01<00:09, 86.05it/s, Batch=11/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  15%|▏| 144/992 [00:01<00:09, 86.16it/s, Batch=12/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  16%|▏| 156/992 [00:01<00:09, 86.87it/s, Batch=13/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  17%|▏| 168/992 [00:01<00:09, 86.83it/s, Batch=14/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  18%|▏| 180/992 [00:02<00:09, 89.35it/s, Batch=15/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  19%|▏| 192/992 [00:02<00:09, 88.17it/s, Batch=16/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  21%|▏| 204/992 [00:02<00:08, 88.54it/s, Batch=17/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  22%|▏| 216/992 [00:02<00:08, 88.63it/s, Batch=18/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  23%|▏| 228/992 [00:02<00:08, 87.01it/s, Batch=19/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  24%|▏| 240/992 [00:02<00:08, 87.35it/s, Batch=20/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  25%|▎| 252/992 [00:02<00:08, 87.50it/s, Batch=21/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  27%|▎| 264/992 [00:03<00:08, 88.79it/s, Batch=22/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  28%|▎| 276/992 [00:03<00:08, 87.52it/s, Batch=23/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  29%|▎| 288/992 [00:03<00:08, 83.89it/s, Batch=24/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  30%|▎| 300/992 [00:03<00:08, 86.21it/s, Batch=25/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  31%|▎| 312/992 [00:03<00:07, 86.57it/s, Batch=26/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  33%|▎| 324/992 [00:03<00:07, 86.47it/s, Batch=27/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  34%|▎| 336/992 [00:03<00:07, 83.34it/s, Batch=28/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  35%|▎| 348/992 [00:04<00:07, 85.27it/s, Batch=29/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  36%|▎| 360/992 [00:04<00:07, 86.11it/s, Batch=30/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  38%|▍| 372/992 [00:04<00:07, 85.46it/s, Batch=31/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  39%|▍| 384/992 [00:04<00:07, 85.42it/s, Batch=32/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  40%|▍| 396/992 [00:04<00:06, 87.39it/s, Batch=33/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  41%|▍| 408/992 [00:04<00:06, 86.56it/s, Batch=34/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  42%|▍| 420/992 [00:04<00:06, 85.95it/s, Batch=35/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  44%|▍| 432/992 [00:05<00:06, 85.94it/s, Batch=36/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  45%|▍| 444/992 [00:05<00:06, 87.27it/s, Batch=37/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  46%|▍| 456/992 [00:05<00:06, 87.95it/s, Batch=38/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  47%|▍| 468/992 [00:05<00:05, 88.20it/s, Batch=39/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  48%|▍| 480/992 [00:05<00:05, 88.05it/s, Batch=40/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  50%|▍| 492/992 [00:05<00:05, 89.05it/s, Batch=41/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  51%|▌| 504/992 [00:05<00:05, 83.87it/s, Batch=42/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  52%|▌| 516/992 [00:05<00:05, 85.55it/s, Batch=43/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  53%|▌| 528/992 [00:06<00:05, 82.49it/s, Batch=44/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  54%|▌| 540/992 [00:06<00:05, 85.07it/s, Batch=45/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  56%|▌| 552/992 [00:06<00:05, 86.53it/s, Batch=46/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  58%|▌| 576/992 [00:06<00:04, 86.53it/s, Batch=48/83, Fa


Error processing batch 48: 'float' object has no attribute 'split'


Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  59%|▌| 588/992 [00:06<00:03, 112.79it/s, Batch=49/83, F

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  60%|▌| 600/992 [00:06<00:03, 105.68it/s, Batch=50/83, F

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  62%|▌| 612/992 [00:06<00:03, 100.46it/s, Batch=51/83, F

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  63%|▋| 624/992 [00:07<00:03, 96.86it/s, Batch=52/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  64%|▋| 636/992 [00:07<00:03, 93.84it/s, Batch=53/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  65%|▋| 648/992 [00:07<00:03, 92.25it/s, Batch=54/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  67%|▋| 660/992 [00:07<00:03, 92.11it/s, Batch=55/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  68%|▋| 672/992 [00:07<00:03, 88.24it/s, Batch=56/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  70%|▋| 696/992 [00:07<00:03, 87.63it/s, Batch=58/83, Fa


Error processing batch 58: 'float' object has no attribute 'split'


Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  71%|▋| 708/992 [00:07<00:02, 112.30it/s, Batch=59/83, F

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  73%|▋| 720/992 [00:08<00:02, 104.14it/s, Batch=60/83, F

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  74%|▋| 732/992 [00:08<00:02, 99.25it/s, Batch=61/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  75%|▊| 744/992 [00:08<00:02, 95.60it/s, Batch=62/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  76%|▊| 756/992 [00:08<00:02, 95.06it/s, Batch=63/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  77%|▊| 768/992 [00:08<00:02, 92.43it/s, Batch=64/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  79%|▊| 780/992 [00:08<00:02, 90.01it/s, Batch=65/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  80%|▊| 792/992 [00:08<00:02, 88.64it/s, Batch=66/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  81%|▊| 804/992 [00:09<00:02, 88.57it/s, Batch=67/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  82%|▊| 816/992 [00:09<00:01, 88.82it/s, Batch=68/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  83%|▊| 828/992 [00:09<00:01, 89.13it/s, Batch=69/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  85%|▊| 840/992 [00:09<00:01, 88.96it/s, Batch=70/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  86%|▊| 852/992 [00:09<00:01, 87.33it/s, Batch=71/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  87%|▊| 864/992 [00:09<00:01, 89.98it/s, Batch=72/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  88%|▉| 876/992 [00:09<00:01, 87.90it/s, Batch=73/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  90%|▉| 888/992 [00:09<00:01, 89.28it/s, Batch=74/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  91%|▉| 900/992 [00:10<00:01, 87.79it/s, Batch=75/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  92%|▉| 912/992 [00:10<00:00, 87.46it/s, Batch=76/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  93%|▉| 924/992 [00:10<00:00, 86.46it/s, Batch=77/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  94%|▉| 936/992 [00:10<00:00, 83.77it/s, Batch=78/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  96%|▉| 948/992 [00:10<00:00, 85.50it/s, Batch=79/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  97%|▉| 960/992 [00:10<00:00, 85.07it/s, Batch=80/83, Fa

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processing predictions:  98%|▉| 972/992 [00:10<00:00, 85.07it/s, Batch=81/83, Fa


Error processing batch 81: The decoder prompt (length 7901) is longer than the maximum model length of 1024. Make sure that `max_model_len` is no smaller than the number of text tokens.


Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/19 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing predictions:  99%|▉| 984/992 [00:11<00:00, 95.19it/s, Batch=82/83, Fa


Error processing batch 82: index 12 is out of bounds for axis 0 with size 12


Adding requests:   0%|          | 0/8 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/8 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, o

Processing predictions: 100%|█| 992/992 [00:11<00:00, 89.02it/s, Batch=83/83, Fa


Processing completed with 4 failed batches.
Failed batch numbers: [48, 58, 81, 82]
Total failed rows: 48
Total predictions: 992
Failed batches: 4
Failed rows: 48
Success rate: 95.16%

Sample of rows with errors:
    predicted_rule_violation  \
570                    Error   
571                    Error   
572                    Error   
573                    Error   
574                    Error   

                                      prediction_error  
570  Batch 48 failed: 'float' object has no attribu...  
571  Batch 48 failed: 'float' object has no attribu...  
572  Batch 48 failed: 'float' object has no attribu...  
573  Batch 48 failed: 'float' object has no attribu...  
574  Batch 48 failed: 'float' object has no attribu...  





## Prediction for test-dataset-LoRA (batch)

In [33]:
from tqdm import tqdm
import numpy as np

def process_dataframe_in_batches_lora(model, df, lora_adapter_path, batch_size=12):
    """Process dataframe using LoRA batch predictions with progress bar and error handling"""
    
    # Calculate number of batches
    num_batches = len(df) // batch_size + (1 if len(df) % batch_size > 0 else 0)
    
    all_results = []
    failed_batches = []
    failed_indices = []
    
    # Process in batches with progress bar
    with tqdm(total=len(df), desc="Processing LoRA predictions") as pbar:
        for i in range(0, len(df), batch_size):
            # Get current batch
            batch_df = df.iloc[i:i+batch_size]
            current_batch_num = i//batch_size + 1
            
            try:
                # Convert batch to list of Series (input format for predict_batch_with_lora)
                batch_list = [row for idx, row in batch_df.iterrows()]
                
                # Get LoRA predictions for this batch
                batch_results = model.predict_batch_with_lora(
                    batch_list, 
                    lora_adapter_path=lora_adapter_path,
                    verbose=False
                )
                
                # Add batch indices to results for tracking
                for j, result in enumerate(batch_results):
                    result['original_index'] = batch_df.index[j]
                
                # Add to results
                all_results.extend(batch_results)
                
            except Exception as e:
                # Log the error and continue with next batch
                print(f"\nError processing LoRA batch {current_batch_num}: {str(e)}")
                failed_batches.append(current_batch_num)
                batch_indices = batch_df.index.tolist()
                failed_indices.extend(batch_indices)
                
                # Create error results for all items in failed batch
                for idx in batch_indices:
                    error_result = {
                        'prediction': 'Error',
                        'is_violation': False,
                        'violation_probability': 0.0,
                        'confidence': 0.0,
                        'original_index': idx,
                        'error': f"LoRA batch {current_batch_num} failed: {str(e)}",
                        'batch_error': True
                    }
                    all_results.append(error_result)
            
            # Update progress bar
            pbar.update(len(batch_df))
            pbar.set_postfix({
                'Batch': f'{current_batch_num}/{num_batches}',
                'Failed': len(failed_batches),
                'LoRA': 'Active'
            })
    
    # Print summary
    if failed_batches:
        print(f"\nLoRA processing completed with {len(failed_batches)} failed batches.")
        print(f"Failed batch numbers: {failed_batches}")
        print(f"Total failed rows: {len(failed_indices)}")
    else:
        print(f"\nAll {num_batches} LoRA batches processed successfully!")
    
    return all_results, failed_batches, failed_indices

# Usage with LoRA adapter
lora_adapter_path = "./lora/Llama_32_1B_Instruct_lora_fp16_r64_s10000_e_3_msl1024"  # Set your LoRA adapter path

# Process in batches with LoRA and error handling
predictions, failed_batches, failed_indices = process_dataframe_in_batches_lora(
    model, 
    df_test, 
    lora_adapter_path=lora_adapter_path,
    batch_size=12
)

# Extract predictions and handle errors
df_test['predicted_rule_violation'] = [pred['prediction'] for pred in predictions]
df_test['prediction_error'] = [pred.get('error', '') for pred in predictions]
df_test['batch_failed'] = [pred.get('batch_error', False) for pred in predictions]

# Check results
print(f"Total predictions: {len(predictions)}")
print(f"Failed batches: {len(failed_batches)}")
print(f"Failed rows: {len(failed_indices)}")
if len(predictions) > 0:
    success_rate = ((len(predictions) - len(failed_indices)) / len(predictions)) * 100
    print(f"LoRA Success rate: {success_rate:.2f}%")

  return LoRARequest(


Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:   1%| | 12/992 [00:00<00:54, 18.06it/s, Batch=1/83,

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:   2%| | 24/992 [00:00<00:29, 32.35it/s, Batch=2/83,

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:   4%| | 36/992 [00:00<00:21, 43.50it/s, Batch=3/83,

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:   5%| | 48/992 [00:01<00:18, 49.82it/s, Batch=4/83,

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:   6%| | 60/992 [00:01<00:16, 56.50it/s, Batch=5/83,

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:   7%| | 72/992 [00:01<00:14, 61.74it/s, Batch=6/83,

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:   8%| | 84/992 [00:01<00:13, 67.02it/s, Batch=7/83,

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  10%| | 96/992 [00:01<00:12, 69.53it/s, Batch=8/83,

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  11%| | 108/992 [00:01<00:12, 71.90it/s, Batch=9/83

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  12%| | 120/992 [00:02<00:12, 72.08it/s, Batch=10/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  13%|▏| 132/992 [00:02<00:11, 72.62it/s, Batch=11/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  15%|▏| 144/992 [00:02<00:11, 72.85it/s, Batch=12/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  16%|▏| 156/992 [00:02<00:11, 73.76it/s, Batch=13/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  17%|▏| 168/992 [00:02<00:11, 73.75it/s, Batch=14/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  18%|▏| 180/992 [00:02<00:10, 75.76it/s, Batch=15/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  19%|▏| 192/992 [00:03<00:10, 75.17it/s, Batch=16/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  21%|▏| 204/992 [00:03<00:10, 75.25it/s, Batch=17/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  22%|▏| 216/992 [00:03<00:10, 74.27it/s, Batch=18/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  23%|▏| 228/992 [00:03<00:10, 71.26it/s, Batch=19/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  24%|▏| 240/992 [00:03<00:10, 72.63it/s, Batch=20/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  25%|▎| 252/992 [00:03<00:10, 72.99it/s, Batch=21/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  27%|▎| 264/992 [00:04<00:09, 74.01it/s, Batch=22/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  28%|▎| 276/992 [00:04<00:09, 74.69it/s, Batch=23/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  29%|▎| 288/992 [00:04<00:09, 71.83it/s, Batch=24/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  30%|▎| 300/992 [00:04<00:09, 74.12it/s, Batch=25/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  31%|▎| 312/992 [00:04<00:09, 74.91it/s, Batch=26/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  33%|▎| 324/992 [00:04<00:08, 75.80it/s, Batch=27/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  34%|▎| 336/992 [00:05<00:09, 71.78it/s, Batch=28/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  35%|▎| 348/992 [00:05<00:08, 72.58it/s, Batch=29/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  36%|▎| 360/992 [00:05<00:08, 73.93it/s, Batch=30/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  38%|▍| 372/992 [00:05<00:11, 53.48it/s, Batch=31/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  39%|▍| 384/992 [00:05<00:10, 57.56it/s, Batch=32/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  40%|▍| 396/992 [00:06<00:09, 62.98it/s, Batch=33/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  41%|▍| 408/992 [00:06<00:08, 66.56it/s, Batch=34/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  42%|▍| 420/992 [00:06<00:08, 69.50it/s, Batch=35/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  44%|▍| 432/992 [00:06<00:07, 71.72it/s, Batch=36/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  45%|▍| 444/992 [00:06<00:07, 74.23it/s, Batch=37/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  46%|▍| 456/992 [00:06<00:07, 75.45it/s, Batch=38/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  47%|▍| 468/992 [00:06<00:06, 76.08it/s, Batch=39/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  48%|▍| 480/992 [00:07<00:06, 75.87it/s, Batch=40/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  50%|▍| 492/992 [00:07<00:06, 75.34it/s, Batch=41/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  51%|▌| 504/992 [00:07<00:06, 71.04it/s, Batch=42/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  52%|▌| 516/992 [00:07<00:06, 72.13it/s, Batch=43/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  53%|▌| 528/992 [00:07<00:06, 69.11it/s, Batch=44/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  54%|▌| 540/992 [00:08<00:06, 71.38it/s, Batch=45/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  56%|▌| 552/992 [00:08<00:05, 73.61it/s, Batch=46/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  58%|▌| 576/992 [00:08<00:05, 74.45it/s, Batch=48/8


Error processing LoRA batch 48: 'float' object has no attribute 'split'


Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  59%|▌| 588/992 [00:08<00:04, 97.71it/s, Batch=49/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  60%|▌| 600/992 [00:08<00:04, 92.02it/s, Batch=50/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  62%|▌| 612/992 [00:08<00:04, 86.36it/s, Batch=51/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  63%|▋| 624/992 [00:08<00:04, 83.21it/s, Batch=52/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  64%|▋| 636/992 [00:09<00:04, 80.10it/s, Batch=53/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  65%|▋| 648/992 [00:09<00:04, 78.75it/s, Batch=54/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  67%|▋| 660/992 [00:09<00:04, 78.11it/s, Batch=55/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  68%|▋| 672/992 [00:09<00:04, 73.85it/s, Batch=56/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  70%|▋| 696/992 [00:09<00:03, 74.81it/s, Batch=58/8


Error processing LoRA batch 58: 'float' object has no attribute 'split'


Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  71%|▋| 708/992 [00:09<00:02, 94.92it/s, Batch=59/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  73%|▋| 720/992 [00:10<00:03, 89.81it/s, Batch=60/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  74%|▋| 732/992 [00:10<00:03, 86.19it/s, Batch=61/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  75%|▊| 744/992 [00:10<00:03, 80.82it/s, Batch=62/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  76%|▊| 756/992 [00:10<00:02, 81.96it/s, Batch=63/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  77%|▊| 768/992 [00:10<00:02, 80.78it/s, Batch=64/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  79%|▊| 780/992 [00:10<00:02, 79.75it/s, Batch=65/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  80%|▊| 792/992 [00:11<00:02, 78.63it/s, Batch=66/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  81%|▊| 804/992 [00:11<00:02, 77.73it/s, Batch=67/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  82%|▊| 816/992 [00:11<00:02, 78.05it/s, Batch=68/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  83%|▊| 828/992 [00:11<00:02, 78.31it/s, Batch=69/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  85%|▊| 840/992 [00:11<00:01, 76.63it/s, Batch=70/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  86%|▊| 852/992 [00:11<00:01, 75.19it/s, Batch=71/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  87%|▊| 864/992 [00:11<00:01, 76.65it/s, Batch=72/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  88%|▉| 876/992 [00:12<00:01, 75.53it/s, Batch=73/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  90%|▉| 888/992 [00:12<00:01, 76.26it/s, Batch=74/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  91%|▉| 900/992 [00:12<00:01, 75.77it/s, Batch=75/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  92%|▉| 912/992 [00:12<00:01, 75.08it/s, Batch=76/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  93%|▉| 924/992 [00:12<00:00, 74.66it/s, Batch=77/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  94%|▉| 936/992 [00:12<00:00, 72.49it/s, Batch=78/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  96%|▉| 948/992 [00:13<00:00, 73.62it/s, Batch=79/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/12 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  97%|▉| 960/992 [00:13<00:00, 73.49it/s, Batch=80/8

Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processing LoRA predictions:  98%|▉| 972/992 [00:13<00:00, 73.49it/s, Batch=81/8


Error processing LoRA batch 81: The decoder prompt (length 7901) is longer than the maximum model length of 1024. Make sure that `max_model_len` is no smaller than the number of text tokens.


Adding requests:   0%|          | 0/12 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/19 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

Processing LoRA predictions:  99%|▉| 984/992 [00:13<00:00, 80.59it/s, Batch=82/8


Error processing LoRA batch 82: index 12 is out of bounds for axis 0 with size 12


Adding requests:   0%|          | 0/8 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/8 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, o

Processing LoRA predictions: 100%|█| 992/992 [00:13<00:00, 72.54it/s, Batch=83/8


LoRA processing completed with 4 failed batches.
Failed batch numbers: [48, 58, 81, 82]
Total failed rows: 48
Total predictions: 992
Failed batches: 4
Failed rows: 48
LoRA Success rate: 95.16%





## Summary

In [34]:
import pandas as pd
import numpy as np
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score, classification_report

def convert_to_bool(series):
    """Simple boolean conversion"""
    if series.dtype == 'bool':
        return series
    
    # Convert to string then to boolean
    def to_bool(val):
        if pd.isna(val):
            return np.nan
        val_str = str(val).strip().lower()
        if val_str in ['true', '1', 'yes', 'y']:
            return True
        elif val_str in ['false', '0', 'no', 'n']:
            return False
        else:
            return np.nan
    
    return series.apply(to_bool)

# Convert both columns to boolean
df_test['violates_rule_bool'] = convert_to_bool(df_test['violates_rule'])
df_test['predicted_bool'] = convert_to_bool(df_test['predicted_rule_violation'])

# Filter valid data (no NaNs, no errors)
valid_mask = (
    df_test['violates_rule_bool'].notna() & 
    df_test['predicted_bool'].notna() &
    (df_test['predicted_rule_violation'] != 'Error')
)
df_clean = df_test[valid_mask]

print(f"Total rows: {len(df_test)}")
print(f"Valid rows: {len(df_clean)}")
print(f"Success rate: {len(df_clean)/len(df_test)*100:.2f}%\n")

if len(df_clean) > 0:
    # Convert to numpy boolean arrays for sklearn
    y_true = np.array(df_clean['violates_rule_bool'], dtype=bool)
    y_pred = np.array(df_clean['predicted_bool'], dtype=bool)
    
    # Calculate metrics
    f1 = f1_score(y_true, y_pred)
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, zero_division=0)
    recall = recall_score(y_true, y_pred, zero_division=0)
    
    print(f"F1 Score: {f1:.4f}")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}\n")
    
    print("Classification Report:")
    print(classification_report(y_true, y_pred, target_names=['No Violation', 'Violation']))
else:
    print("No valid data for evaluation.")

Total rows: 992
Valid rows: 944
Success rate: 95.16%

F1 Score: 0.8489
Accuracy: 0.8443
Precision: 0.8695
Recall: 0.8293

Classification Report:
              precision    recall  f1-score   support

No Violation       0.82      0.86      0.84       446
   Violation       0.87      0.83      0.85       498

    accuracy                           0.84       944
   macro avg       0.84      0.85      0.84       944
weighted avg       0.85      0.84      0.84       944

