# Models explaining their classifications

## Importing libraries

In [1]:
import os
import pandas as pd
from dotenv import load_dotenv
from langchain_nvidia_ai_endpoints import ChatNVIDIA
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
import warnings
warnings.filterwarnings("ignore")

### Getting Path of current file
import os
import sys
from pathlib import Path

path = Path(os.path.dirname(os.getcwd()))
path = str(path)
print(path)
sys.path.insert(1, path)

/Users/saideepbunny/Projects/Email-Assistant-using-Generative-AI


## Setting up API keys

In [2]:
load_dotenv(dotenv_path=f"{path}/config/nvidia_token.env")

model1 = "google/gemma-3n-e4b-it"
model2 = "mistralai/mistral-nemotron"
model3 = "meta/llama-4-maverick-17b-128e-instruct"

token1 = os.getenv("GEMMA_API_KEY")
token2 = os.getenv("MISTRAL_API_KEY")
token3 = os.getenv("LLAMA_API_KEY")

## Defining prompt

In [3]:
# Original classification prompt - considering Prompt-2
original_prompt = """You are an expert email classification system. Your task is to analyze the provided email and classify it into one of the three categories below. Your response must be **only** the category name and nothing else.

### **Categories**

**1. JOB**
Classify an email as `JOB` if it concerns the status of a specific job application. Recipient of the mail will be informed about the status of their application. This includes notifications that a candidate has:
*   Successfully applied for a job (only application confirmation).
*   Been invited for an assessment (online or offline).
*   Been shortlisted for a position.
*   Been invited to an interview (online or offline).
*   Been rejected for a position.

**2. MEET**
Classify an email as `MEET` if the sender is requesting to schedule an **online meeting**.
*   **Crucial Condition:** The request must be for an online/virtual meeting, NOT an in-person meeting.

**3. OTHER**
Classify an email as `OTHER` if it does not fit into the `JOB` or `MEET` categories. This includes, but is not limited to:
*   General job-related discussions that are not about application status (e.g., networking, asking about a role, advertisements or requesting for an interview).
*   Requests for an in-person or face-to-face meeting.
*   Newsletters, marketing emails, personal correspondence, etc.

**Important Rules:**
* If an email mentions a "meeting" but does not specify whether it is virtual or in-person, classify it as an online meeting.
* Any Job interview online meeting request must be classified as `JOB` category only.
* Only classify an email as JOB if it communicates a completed action related to the application status (e.g., application received, interview invitation, rejected, job offered). Do NOT classify emails with ongoing or vague updates (e.g., "Your application is under review") as JOB.


### **Output Format**

Your answer must be one of these three words exactly, with no additional text:
*   JOB
*   MEET
*   OTHER"""

## Error Explanation function

In [4]:
def explain_classification_error(model, token, email_content, classification_prompt, predicted_label):
    """
    Use ChatPromptTemplate to explain why a classification error occurred.
    
    Args:
        email_content (str): The original email content
        true_label (str): The correct classification
        predicted_label (str): The model's prediction
        
    Returns:
        str: Explanation of the error
    """
    
    # Create the conversation template
    conversation_template = ChatPromptTemplate.from_messages([
        # Show the original classification task
        ("user", "{email_content}\n\n{classification_prompt}"),
        
        # Show the model's response
        ("assistant", "{predicted_label}"),
        
        # Ask for explanation of the classification
        ("user", """Provide an explanation for above classification:

Please analyze this email classification and provide:

**Why did you classify it as {predicted_label}?**
   - What specific parts of the email led to this decision?
   - Which keywords or phrases influenced your choice?
   - Recite the rule from the given instructions which lead to this decision

Please provide a detailed analysis in 2-3 short paragraphs.""")
    ])
    
    # Initialize the LLM
    llm = ChatNVIDIA(
        model=model,
        api_key=token,
        temperature=0.3,
        max_tokens=1024,
        streaming=False
    )
    
    # Create the chain
    chain = conversation_template | llm | StrOutputParser()
    
    # Generate explanation
    try:
        explanation = chain.invoke({
            "email_content": email_content,
            "classification_prompt": original_prompt,
            "predicted_label": predicted_label
        })
        return explanation
    except Exception as e:
        return f"Error generating explanation: {str(e)}"

## Loading and analyzing results

In [5]:
def display_error_summary(df, model_column, explanation_column):

    for i in range(df.shape[0]):
        if(df.iloc[i]['label'] != df.iloc[i][model_column]):
            print(f"\nEMAIL :\n{df.iloc[i]['email']}")
            print(f"\n\nTRUE LABEL : {df.iloc[i]['label']}")
            print(f"\n\nPREDICTED LABEL : {df.iloc[i][model_column]}")
            print(f"\n\nEXPLANATION COLUMN :\n{df.iloc[i][explanation_column]}")

            print()
            print("-"*90)
            print()

In [6]:
def load_and_analyze_results(file_path, sheet_name="annotated_data", model_name='mistral'):
    """
    Load existing classification results and analyze errors.
    
    Args:
        file_path (str): Path to Excel file with results
        sheet_name (str): Sheet name containing the data
        model_column (str): Column name with model predictions
        
    Returns:
        pd.DataFrame: Error analysis results
    """

    model_column = f"{model_name}_results"
    explanation_column = f"{model_name}_explanation"
    
    # Load the data
    df = pd.read_excel(file_path, sheet_name=sheet_name)
    
    # Validate required columns
    required_columns = ['email', 'label', model_column]
    missing_columns = [col for col in required_columns if col not in df.columns]
    
    if missing_columns:
        raise ValueError(f"Missing required columns: {missing_columns}")

    if(model_name == "mistral"):
        model, token = model2, token2
    elif(model_name == "gemma"):
        model, token = model1, token1
    elif(model_name == "llama"):
        model, token = model3, token3
    
    # Create Explanation column
    df[explanation_column] = df.apply(
                                        lambda row: explain_classification_error(
                                            model,
                                            token,
                                            email_content=row["email"],
                                            classification_prompt=original_prompt,
                                            predicted_label=row[model_column]
                                        ),
                                        axis=1
                                    )
    
    # Display comprehensive summary
    display_error_summary(df, model_column, explanation_column)

## Error Explanation

### Mistral error explanation

In [7]:
load_and_analyze_results(f"{path}/data/email_classification_annotated_data.xlsx", "Prompt-2 results", 'mistral')


EMAIL :
Hello Saideep,

Thank you for beginning the application for the job Data Scientist. We hope you’ll have time to return and complete it.  

If you decide not to apply, you can either delete the data or it will be automatically deleted after 30 days. We appreciate your interest and look forward to hearing from you soon!

Sincerely,
Ford Talent Acquisition Team


TRUE LABEL : OTHER


PREDICTED LABEL : JOB


EXPLANATION COLUMN :
The email was classified as **JOB** because it directly concerns the status of a job application. The sender acknowledges that the recipient (Saideep) has begun an application for the position of **Data Scientist** and encourages them to complete it. This falls under the **JOB** category because it involves an **application confirmation**—a key indicator of a job-related communication as defined in the instructions.

Key phrases that influenced this decision include:
- *"Thank you for beginning the application for the job Data Scientist"*
- *"We hope you’l

### Gemma error explanation

In [8]:
load_and_analyze_results(f"{path}/data/email_classification_annotated_data.xlsx", "Prompt-2 results", 'gemma')


EMAIL :
Hello Saideep,

Thank you for your interest in joining the Embark team!  We are always on the look-out for passionate and talented future pack members to join us on our mission to improve the life and health of dogs.  Our hiring team will review your application and be in touch if your skills and qualifications match our needs.  

Learn more about our fantastic team, the science and discoveries we are embarking on, and the products we have launched @Embark.    

Thanks again for applying and have a great day!

Embark Team


TRUE LABEL : JOB


PREDICTED LABEL : OTHER


EXPLANATION COLUMN :
**Why did you classify it as OTHER?**

I classified the email as `OTHER` because it is a general acknowledgment of an application submission, not a notification regarding the status of a specific job application. The email confirms receipt of the application and states that the hiring team will review it. This is a standard automated response and doesn't communicate a decision (acceptance, re

In [9]:
load_and_analyze_results(f"{path}/data/email_classification_annotated_data.xlsx", "Prompt-2 results", 'llama')


EMAIL :
Dear Robert,
My name is Patricia Gonzalez, and I'm the procurement director at Meridian Manufacturing. We're currently evaluating logistics partners for our East Coast distribution network, and Coastal Logistics came highly recommended by several industry contacts.
I'd appreciate the opportunity to discuss our requirements and learn more about your capabilities. Would you be available for a 45-minute introductory call? I'm flexible with timing but generally prefer meetings between 9 AM and 3 PM EST on weekdays.
Some topics I'd like to cover include your capacity for handling temperature-sensitive products, your technology integration capabilities, and your experience with automotive parts distribution.
Please let me know a few times that work for you, and I'll send over a calendar invite.
Thank you for your time.
Best regards,
Patricia Gonzalez
Procurement Director
Meridian Manufacturing


TRUE LABEL : MEET


PREDICTED LABEL : OTHER


EXPLANATION COLUMN :
The email was classif