In [None]:
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from rouge_score import rouge_scorer
from datasets import load_dataset

# Initialize model and tokenizer
model_name = "mukulvyas99/LawChatTinyLlama2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
generator = pipeline('text-generation', model=model, tokenizer=tokenizer)

# Load and prepare dataset
dataset = load_dataset("path_to_your_dataset")  # Adjust dataset path/source

def prepare_data(example):
    parts = example['text'].split("[/INST]")
    ac_text = parts[0] + "[/INST]</s>"
    ac_result = parts[1].strip() if len(parts) > 1 else ""
    return {'ac_text': ac_text, 'ac_result': ac_result}

dataset = dataset.map(prepare_data)

# Generate responses and compute ROUGE scores
predictions = []
references = []
scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)

for example in dataset['train']:
    output = generator(example['ac_text'], max_length=200)
    model_output = output[0]['generated_text']
    predictions.append(model_output)
    references.append(example['ac_result'])

scores = [scorer.score(ref, pred) for ref, pred in zip(references, predictions)]

# Calculate and print average ROUGE scores
average_scores = {key: sum(metric.fmeasure for metric in [score[key] for score in scores]) / len(scores) for key in ['rouge1', 'rougeL']}
print("Average ROUGE-1 and ROUGE-L Scores:", average_scores)
