In [None]:
import os
import pandas as pd
from tqdm import tqdm
from together import Together
import json
import re

# Define the prompt creation function for summarizing cases
def generate_case_summary_prompt(case_text):
    return f"""
   
    Summarize the case text using this template as accurately as possible while
    maintaining correct English grammar. Do not add extra information:
    "The <active agent> did <action> to <passive agent> which led to
    <consequence>. The <active agent> had <good/bad/neutral> moral intention,
    however, the <action> violated <ethical principle> ethical principle which
    caused <ethical issue>."
    Case text is as follows: "{case_text}"

    give the output in commam seperated format
    
    """

# Function to extract summary text from the raw API response
def extract_summary_from_response(response_text):
    try:
        # Match the summary pattern starting with "The" and capturing the template
        summary_match = re.search(r'^The .*', response_text, re.DOTALL)
        if summary_match:
            return summary_match.group().strip()  # Extract the summary text
        else:
            print("No valid summary found in response.")
            return None
    except Exception as e:
        print(f"Error extracting summary: {e}")
        return None

# Function to run the agent and fetch a response
def run_agent(client, prompt, model, content):
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "assistant", "content": content},
                {"role": "user", "content": prompt}
            ]
        )
        # Extract the content of the response
        response_text = response.choices[0].message.content.strip()
        print(f"Raw Response: {response_text}")  # Log raw response
        return extract_summary_from_response(response_text)  # Extract summary
    except Exception as e:
        print(f"Error in API call: {e}")
        return None

# Set API key for Together client
os.environ['TOGETHER_API_KEY'] = "Your_API_Key"
client = Together(api_key=os.environ.get("TOGETHER_API_KEY"))

# Define the LLM model
llm_model = "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"

# Load dataset
data = pd.read_json("practice.json")

# Initialize lists to collect generated summaries and invalid responses
all_summaries = []
invalid_responses = []

# Process each case in the dataset
for i in tqdm(range(len(data))):  # Iterate over all rows in the dataset
    case_text = data.iloc[i]["selftext"]  # Extract the case text

    # Generate a summary for the case text
    summary = run_agent(
        client,
        generate_case_summary_prompt(case_text),
        llm_model,
        "You are a legal domain expert generating case summaries."
    )

    # Validate and append the response
    if summary:
        all_summaries.append({"case_index": i, "summary": summary})  # Add summary
    else:
        invalid_responses.append({"case_index": i, "case_text": case_text})  # Add invalid response

# Save all generated summaries to a CSV file
if all_summaries:
    summary_df = pd.DataFrame(all_summaries)
    summary_output_file = "summary.csv"
    summary_df.to_csv(summary_output_file, index=False)
    print(f"Generated summaries saved as '{summary_output_file}'")
else:
    print("No valid summaries generated.")

# Save invalid responses for debugging
if invalid_responses:
    invalid_output_file = "/kaggle/working/invalid_responses.json"
    with open(invalid_output_file, "w") as f:
        json.dump(invalid_responses, f, indent=4)
    print(f"Invalid responses saved as '{invalid_output_file}'")
else:
    print("No invalid responses.")