# Testing prompts for Classifying emails

## Importing libraries

In [1]:
import os
import numpy as np
import requests
from dotenv import load_dotenv
from langsmith import traceable
from langchain_nvidia_ai_endpoints import ChatNVIDIA
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (
    precision_score, recall_score, f1_score,
    accuracy_score, confusion_matrix, hamming_loss
)
import re

import warnings
warnings.filterwarnings("ignore")

### Getting Path of current file
import os
import sys
from pathlib import Path

path = Path(os.path.dirname(os.getcwd()))
path = str(path)
print(path)
sys.path.insert(1, path)

/Users/saideepbunny/Projects/Email-Assistant-using-Generative-AI


## Setting API keys

In [2]:
load_dotenv(dotenv_path=f"{path}/config/nvidia_token.env")

model1 = "google/gemma-3n-e4b-it"
model2 = "mistralai/mistral-nemotron"
model3 = "moonshotai/kimi-k2-instruct"

token = os.getenv("NVIDIA_API_KEY")

## Classification function

In [3]:
def classify_email(model, token, prompt_msg, email_content: str, examples: list = None):
    """
    Classify emails using an NVIDIA LLM with optional few-shot examples.

    Args:
        model (str): NVIDIA LLM model name.
        token (str): API key for NVIDIA LLM.
        prompt_msg (str): Classification instruction message.
        email_content (str): Email text to classify.
        examples (list, optional): Few-shot examples in the form 
                                   [{"email": "example email", "label": "JOB"}, ...]

    Returns:
        str: Cleaned classification result (only capital letters).
    """

    # Build few-shot messages
    messages = []

    # Add few-shot examples if provided
    if examples:
        for ex in examples:
            messages.append(("user", ex["email"]))
            messages.append(("assistant", ex["label"]))

    # Add the actual email to classify
    messages.append((
        "user",
        f"""{{email_content}}\n\n{prompt_msg}"""
    ))

    # Create prompt template
    prompt = ChatPromptTemplate.from_messages(messages)

    # Use the NVIDIA LLM via LangChain
    llm = ChatNVIDIA(
        model=model,
        api_key=token,
        temperature=0.2,
        max_tokens=4096,
        streaming=False
    )

    # Define chain
    chain = prompt | llm | StrOutputParser()

    # Run it
    result = chain.invoke({"email_content": email_content})

    # Extract only capital letters (JOB, MEET, OTHER)
    clean_result = re.sub(r'[^A-Za-z,]+', '', result)
    final_result = clean_result.split(",")
    return np.array(final_result)


## Information extraction function

In [4]:
def extract_MEET_info(model, token, prompt_msg, email_content: str, examples: list = None):
    """
    Extract information from JOB emails using an NVIDIA LLM with optional few-shot examples.

    Args:
        model (str): NVIDIA LLM model name.
        token (str): API key for NVIDIA LLM.
        prompt_msg (str): Classification instruction message.
        email_content (str): Email text to classify.
        examples (list, optional): Few-shot examples in the form 
                                   [{"email": "example email", "label": "JOB"}, ...]

    Returns:
        str: Extracted information result.
    """

    # Build few-shot messages
    messages = []

    # Add few-shot examples if provided
    if examples:
        for ex in examples:
            messages.append(("user", ex["email"]))
            messages.append(("assistant", ex["label"]))

    # Add the actual email to classify
    messages.append((
        "user",
        f"""{prompt_msg}\n\n EMAIL TO ANALYZE: {{email_content}} \n\n PIPE-DELIMITED OUTPUT:"""
    ))

    # Create prompt template
    prompt = ChatPromptTemplate.from_messages(messages)

    # Use the NVIDIA LLM via LangChain
    llm = ChatNVIDIA(
        model=model,
        api_key=token,
        temperature=0.0,
        top_p=1.0,
        max_tokens=4096,
        streaming=False
    )

    # Define chain
    chain = prompt | llm | StrOutputParser()

    # Run it
    result = chain.invoke({"email_content": email_content})
    return np.array(result.split("|"))


def meet_information_extraction(row, prompt_msg, model, token, examples=None):
    if eval(row['mistral_MEET']) == True:
        result = extract_MEET_info(model, token, prompt_msg, row['email'], examples)
        if(len(result) != 6):
            print(result)
            return np.array(["Error"] * 6)
        return result

    else:
        return np.array([np.nan, np.nan, np.nan, np.nan, np.nan, np.nan])




In [5]:
df = pd.read_excel(f"{path}/data/email_classification_annotated_data.xlsx", sheet_name="updated_annotated_data")
df

Unnamed: 0,email,JOB,MEET,OTHER
0,"Hi Saideep,\n\nThanks for your interest in iSp...",True,False,False
1,"Hello, Saideep,\n\nWe received your job applic...",True,False,False
2,"Hi Saideep,\n\nWe're super-pumped that you sha...",True,False,False
3,"Hi Saideep,\n\nThank you for applying to the S...",True,False,False
4,"Hello Saideep,\n\nThank you for your interest ...",True,False,False
5,"Dear Saideep,\n\nThank you for your interest i...",True,False,False
6,"Dear Saideep,\n\nThank you for your interest i...",True,False,False
7,"Hi Saideep,\nThank you for your interest in th...",True,False,False
8,"Hi Saideep,\n\nThanks for your interest in Red...",True,False,False
9,"Dear Saideep,\n\nThank you for giving us the o...",True,False,False


## Email Classification

In [6]:
prompt = """You are an expert email class identification system. Your task is to analyze the provided email and identify if the email falls it each category or not. Your response must be **only** the True/False for each category without any additional text or metadata.

### **Categories**

**1. JOB**
Identify an email as `JOB` if it concerns the status of a specific job application. Recipient of the mail will be informed about the status of their application. This includes notifications that a candidate has:
*   Successfully applied/in the process of applying for a job (application confirmation).
*   Been shortlisted for a position.
*   Been invited to an interview or assessment.
*   Been rejected for a position.
*   Job recommendations or suggestions or ads from job boards, recruiters, companies, etc., do not classify as JOB.

**2. MEET**
Identify an email as `MEET` if the sender is requesting to schedule a meeting, virtual or in-person.
*   Can be a virtual meeting via Zoom, Google Meet, Teams, Video call, etc.
*   Can be an in-person meeting at a specific location.
*   Can be an invitation to job interviews, assessments as well.
*   Cannot be classified as MEET unless it is mentioned explicitly in the email. Future steps of a process or a job application having a meeting do not classify as MEET.

**3. OTHER**
Identify an email as `OTHER` only if it does not fit into the `JOB` or `MEET` categories. This includes, but is not limited to:
*   General job-related discussions that are not about application status (e.g., networking, asking about a role, advertisements, job suggestions or requesting for an interview).
*   Newsletters, marketing emails, personal correspondence, etc.

**Important Rules:**
*   An email cannot be identified as any other class if it classifies as OTHER.
*   An email can be identified as either JOB or either MEET or both JOB and MEET.
*   Possible combinations of classes for email are:
    *   JOB, MEET
    *   JOB
    *   MEET
    *   OTHER

### **Output Format**

Your answer must be three True or False values, one for each JOB, MEET and OTHER in this exact order:
<is a JOB>, <is a MEET>, <is OTHER>"""

In [7]:
data_df = df.copy()
data_df[['mistral_JOB', 'mistral_MEET', 'mistral_OTHER']] = (
    data_df['email']
    .apply(lambda x: classify_email(model2, token, prompt, x))
    .apply(pd.Series)  # convert array to 3 separate columns
)

In [8]:
data_df

Unnamed: 0,email,JOB,MEET,OTHER,mistral_JOB,mistral_MEET,mistral_OTHER
0,"Hi Saideep,\n\nThanks for your interest in iSp...",True,False,False,True,False,False
1,"Hello, Saideep,\n\nWe received your job applic...",True,False,False,True,False,False
2,"Hi Saideep,\n\nWe're super-pumped that you sha...",True,False,False,True,False,False
3,"Hi Saideep,\n\nThank you for applying to the S...",True,False,False,True,False,False
4,"Hello Saideep,\n\nThank you for your interest ...",True,False,False,True,False,False
5,"Dear Saideep,\n\nThank you for your interest i...",True,False,False,True,False,False
6,"Dear Saideep,\n\nThank you for your interest i...",True,False,False,True,False,False
7,"Hi Saideep,\nThank you for your interest in th...",True,False,False,True,False,False
8,"Hi Saideep,\n\nThanks for your interest in Red...",True,False,False,True,False,False
9,"Dear Saideep,\n\nThank you for giving us the o...",True,False,False,True,False,False


## Information extraction

### Prompt-1 test

In [9]:
info_extract_prompt1 = """You are an information extraction assistant.  
You will be given the full text of an email regarding a meeting request. It can be a virtual meeting, in-person meeting, audio/video call, a job interview request, etc.

Your task is to read the email carefully and extract the following details exactly as they appear in the email (or return empty string "" if the detail is missing):

1. Request sent by — The name of the person or body that is requesting the meeting.
2. Meet start time — start time of the meeting if specified in the email.
3. Meet end time — end time of the meeting if specified in the email.
4. Total duration — Total duration of the meeting if specified in the email.
5. Meeting requested date - The date when the meeting was requested/ scheduled if specified in the mail.
6. Reason for meeting - The reason for meeting if specified in the email. Make sure to summarize the reason in no more than 15 words.

** Important Rule:**
- The output must have exactly 6 values separated by a pipe (|).

### Output format:
<request_sent_by>|<meet_start_time>|<meet_end_time>|<total_duration>|<meeting_requested_date>|<reason_for_meeting>

Return the extracted information in the format above, with each detail separated by a pipe, without any additional text or metadata. If any detail is not present in the email, return an empty string for that detail (e.g., if the meet start time, end time and total duration is not mentioned, return <request_sent_by>||||<meeting_requested_date>|<reason_for_meeting>."""

In [10]:
info_data_df = data_df.copy()
info_data_df[['request_sent_by', 'meet_start_time', 'meet_end_time', 'total_duration', 'meeting_requested_date', 'reason_for_meeting']] = (
    data_df
    .apply(lambda row: meet_information_extraction(row, info_extract_prompt1, model2, token), axis=1)
    .apply(pd.Series)  # convert array to 3 separate columns
)
info_data_df

['Elena' '' '' '2023-03-08' 'Denver warehouse optimization idea']
['Michael Rodriguez' '' '' '30 minutes' '' ''
 'Discuss potential synergies and predictive modeling']
['David Park' '' '' '30 minutes' '' '' 'Discuss dissertation proposal']
['Rakshitha' '' '' '1 hour' '' ''
 'Grad Coop interview for Data Scientist/Engineer']


Unnamed: 0,email,JOB,MEET,OTHER,mistral_JOB,mistral_MEET,mistral_OTHER,request_sent_by,meet_start_time,meet_end_time,total_duration,meeting_requested_date,reason_for_meeting
0,"Hi Saideep,\n\nThanks for your interest in iSp...",True,False,False,True,False,False,,,,,,
1,"Hello, Saideep,\n\nWe received your job applic...",True,False,False,True,False,False,,,,,,
2,"Hi Saideep,\n\nWe're super-pumped that you sha...",True,False,False,True,False,False,,,,,,
3,"Hi Saideep,\n\nThank you for applying to the S...",True,False,False,True,False,False,,,,,,
4,"Hello Saideep,\n\nThank you for your interest ...",True,False,False,True,False,False,,,,,,
5,"Dear Saideep,\n\nThank you for your interest i...",True,False,False,True,False,False,,,,,,
6,"Dear Saideep,\n\nThank you for your interest i...",True,False,False,True,False,False,,,,,,
7,"Hi Saideep,\nThank you for your interest in th...",True,False,False,True,False,False,,,,,,
8,"Hi Saideep,\n\nThanks for your interest in Red...",True,False,False,True,False,False,,,,,,
9,"Dear Saideep,\n\nThank you for giving us the o...",True,False,False,True,False,False,,,,,,


In [11]:
info_data_df = data_df.copy()
info_data_df[['request_sent_by', 'meet_start_time', 'meet_end_time', 'total_duration', 'meeting_requested_date', 'reason_for_meeting']] = (
    data_df
    .apply(lambda row: meet_information_extraction(row, info_extract_prompt1, model3, token), axis=1)
    .apply(pd.Series)  # convert array to 3 separate columns
)
info_data_df

['Michael Rodriguez' '' '' '' '30-minute' ''
 'Discuss potential synergies between Zenith Solutions and Brightwave Analytics']
['Alex' '' '9 AM EST' '' 'about an hour' 'this Thursday'
 'Review mobile app redesign wireframes']
['Patricia Gonzalez' '' '' '' '45-minute' ''
 'Evaluate logistics partnership for East Coast distribution']


Exception: [429] Too Many Requests
{'status': 429, 'title': 'Too Many Requests'}

### Prompt-2 test

In [12]:
info_extract_prompt2 = """
You are an information extraction assistant.  
You will receive the full text of an email containing a meeting request.  
This may be for a virtual meeting, in-person meeting, or job interview.

Your task:  
Read the email carefully and extract the following details exactly as they appear in the email.  
If a detail is not mentioned, return an empty string for that field.

Fields to extract (in order):  
1. Request sent by – Name of the person or organization requesting the meeting.  
2. Meet start time – Start time of the meeting.  
3. Meet end time – End time of the meeting.  
4. Total duration – Total duration of the meeting.  
5. Meeting requested date – Date the meeting is scheduled/requested.  
6. Reason for meeting – Summarize in ≤15 words.

**Output format:**  
<request_sent_by>|<meet_start_time>|<meet_end_time>|<total_duration>|<meeting_requested_date>|<reason_for_meeting>  

**Rules:**  
- There must be exactly 6 values separated by 5 pipe (|) characters.
- Use exactly the wording from the email for all fields except “Reason for meeting,” which should be a concise summary.  
- Do not add extra words, punctuation, or formatting outside the specified structure.  
- Maintain the order of fields exactly as shown.  
- If a value is missing, leave it blank but keep the pipe separators.  
- The output must be a single line only.

Example:  
If only the requester's name, requested date, and reason are present, return:  
John Smith||||2024-08-15|Discuss project timeline
"""


In [13]:
info_data_df = data_df.copy()
info_data_df[['request_sent_by', 'meet_start_time', 'meet_end_time', 'total_duration', 'meeting_requested_date', 'reason_for_meeting']] = (
    data_df
    .apply(lambda row: meet_information_extraction(row, info_extract_prompt2, model2, token), axis=1)
    .apply(pd.Series)  # convert array to 3 separate columns
)
info_data_df

['Elena' '' '2 PM PST' '' '' 'Wednesday'
 'Denver warehouse optimization idea']
['Michael Rodriguez' '' '' '30 minutes' '' ''
 'Discuss potential synergies']
['Patricia Gonzalez' '' '' '45 minutes' '' ''
 'Discuss logistics partnership']
['Samantha' '' '' '25 minutes' '' ''
 'Discuss gamifying employee onboarding']
['Michelle' '' '3 PM' '' '' 'today' 'Discuss client situation']
['David Park' '' '' '30 minutes' '' '' 'Dissertation proposal discussion']
['Rakshitha' '' '' '1 hour' '' '' 'Grad Coop interview']


Unnamed: 0,email,JOB,MEET,OTHER,mistral_JOB,mistral_MEET,mistral_OTHER,request_sent_by,meet_start_time,meet_end_time,total_duration,meeting_requested_date,reason_for_meeting
0,"Hi Saideep,\n\nThanks for your interest in iSp...",True,False,False,True,False,False,,,,,,
1,"Hello, Saideep,\n\nWe received your job applic...",True,False,False,True,False,False,,,,,,
2,"Hi Saideep,\n\nWe're super-pumped that you sha...",True,False,False,True,False,False,,,,,,
3,"Hi Saideep,\n\nThank you for applying to the S...",True,False,False,True,False,False,,,,,,
4,"Hello Saideep,\n\nThank you for your interest ...",True,False,False,True,False,False,,,,,,
5,"Dear Saideep,\n\nThank you for your interest i...",True,False,False,True,False,False,,,,,,
6,"Dear Saideep,\n\nThank you for your interest i...",True,False,False,True,False,False,,,,,,
7,"Hi Saideep,\nThank you for your interest in th...",True,False,False,True,False,False,,,,,,
8,"Hi Saideep,\n\nThanks for your interest in Red...",True,False,False,True,False,False,,,,,,
9,"Dear Saideep,\n\nThank you for giving us the o...",True,False,False,True,False,False,,,,,,


In [14]:
info_data_df = data_df.copy()
info_data_df[['request_sent_by', 'meet_start_time', 'meet_end_time', 'total_duration', 'meeting_requested_date', 'reason_for_meeting']] = (
    data_df
    .apply(lambda row: meet_information_extraction(row, info_extract_prompt2, model3, token), axis=1)
    .apply(pd.Series)  # convert array to 3 separate columns
)
info_data_df

['Marcus' '' '' '' '' '' 'Review Morrison campaign focus-group findings']


Exception: [429] Too Many Requests
{'status': 429, 'title': 'Too Many Requests'}

In [15]:
info_extract_prompt3 = """You are an information extraction assistant.  
You will be given the full text of an email regarding a meeting request. It can be a virtual meeting, in-person meeting, audio/video call, a job interview request, etc.

Your task is to read the email carefully and extract the following details exactly as they appear in the email (or return empty string "" if the detail is missing):

1. Request sent by — The name of the person or body that is requesting the meeting.
2. Meet start time — start time of the meeting if specified in the email.
3. Meet end time — end time of the meeting if specified in the email.
4. Total duration — Total duration of the meeting if specified in the email.
5. Meeting requested date - The date when the meeting was requested/ scheduled if specified in the mail.
6. Reason for meeting - The reason for meeting if specified in the email. Make sure to summarize the reason in no more than 15 words.

** Important Rule:**
- The output must have exactly 6 values separated by a pipe (|).

### Output format:
<request_sent_by>|<meet_start_time>|<meet_end_time>|<total_duration>|<meeting_requested_date>|<reason_for_meeting>"""

In [16]:
info_data_df = data_df.copy()
info_data_df[['request_sent_by', 'meet_start_time', 'meet_end_time', 'total_duration', 'meeting_requested_date', 'reason_for_meeting']] = (
    data_df
    .apply(lambda row: meet_information_extraction(row, info_extract_prompt3, model2, token), axis=1)
    .apply(pd.Series)  # convert array to 3 separate columns
)
info_data_df

['Jake' '' '' '20 minutes' '' ''
 'Barcelona client contract expertise needed']
['David Park' '' '' '30 minutes' '' '' 'Discuss dissertation proposal']


Unnamed: 0,email,JOB,MEET,OTHER,mistral_JOB,mistral_MEET,mistral_OTHER,request_sent_by,meet_start_time,meet_end_time,total_duration,meeting_requested_date,reason_for_meeting
0,"Hi Saideep,\n\nThanks for your interest in iSp...",True,False,False,True,False,False,,,,,,
1,"Hello, Saideep,\n\nWe received your job applic...",True,False,False,True,False,False,,,,,,
2,"Hi Saideep,\n\nWe're super-pumped that you sha...",True,False,False,True,False,False,,,,,,
3,"Hi Saideep,\n\nThank you for applying to the S...",True,False,False,True,False,False,,,,,,
4,"Hello Saideep,\n\nThank you for your interest ...",True,False,False,True,False,False,,,,,,
5,"Dear Saideep,\n\nThank you for your interest i...",True,False,False,True,False,False,,,,,,
6,"Dear Saideep,\n\nThank you for your interest i...",True,False,False,True,False,False,,,,,,
7,"Hi Saideep,\nThank you for your interest in th...",True,False,False,True,False,False,,,,,,
8,"Hi Saideep,\n\nThanks for your interest in Red...",True,False,False,True,False,False,,,,,,
9,"Dear Saideep,\n\nThank you for giving us the o...",True,False,False,True,False,False,,,,,,


In [17]:
info_data_df = data_df.copy()
info_data_df[['request_sent_by', 'meet_start_time', 'meet_end_time', 'total_duration', 'meeting_requested_date', 'reason_for_meeting']] = (
    data_df
    .apply(lambda row: meet_information_extraction(row, info_extract_prompt3, model3, token), axis=1)
    .apply(pd.Series)  # convert array to 3 separate columns
)
info_data_df

['Marcus' '' '30-minute' '' 'Tuesday through Thursday 10 AM-4 PM EST' ''
 'Review Morrison campaign focus group insights']


Exception: [429] Too Many Requests
{'status': 429, 'title': 'Too Many Requests'}

In [18]:
def extract_MEET_info(model, token, prompt_msg, email_content: str, examples: list = None):
    """
    Extract information from JOB emails using an NVIDIA LLM with optional few-shot examples.

    Args:
        model (str): NVIDIA LLM model name.
        token (str): API key for NVIDIA LLM.
        prompt_msg (str): Classification instruction message.
        email_content (str): Email text to classify.
        examples (list, optional): Few-shot examples in the form 
                                   [{"email": "example email", "label": "JOB"}, ...]

    Returns:
        str: Extracted information result.
    """

    # Build few-shot messages
    messages = []

    # Add few-shot examples if provided
    if examples:
        for ex in examples:
            messages.append(("user", ex["email"]))
            messages.append(("assistant", ex["label"]))

    # Add the actual email to classify
    messages.append((
        "user",
       f"""{prompt_msg}\n\n EMAIL TO ANALYZE: {{email_content}} \n\n PIPE-DELIMITED OUTPUT:"""
    ))

    # Create prompt template
    prompt = ChatPromptTemplate.from_messages(messages)

    # Use the NVIDIA LLM via LangChain
    llm = ChatNVIDIA(
        model=model,
        api_key=token,
        temperature=0.2,
        max_tokens=4096,
        streaming=False
    )

    # Define chain
    chain = prompt | llm | StrOutputParser()

    # Run it
    result = chain.invoke({"email_content": email_content})
    return np.array(result.strip().split("|"))


def meet_information_extraction(row, prompt_msg, model, token, examples=None):
    if eval(row['mistral_MEET']) == True:
        result = extract_MEET_info(model, token, prompt_msg, row['email'], examples)
        if(len(result) != 3):
            print(result)
            return np.array(["Error"] * 3)
        return result

    else:
        return np.array([np.nan, np.nan, np.nan])




### Prompt-3 test

In [19]:
info_extract_prompt3 = """You are an information extraction assistant.  
You will be given the full text of an email regarding a meeting request. It can be a virtual meeting, in-person meeting, audio/video call, a job interview request, etc.

Your task is to read the email carefully and extract the following details exactly as they appear in the email (or return empty string "" if the detail is missing):

1. Request sent by — The name of the person or body that is requesting the meeting.
2. Requested date and time - The date and time when the meeting is requested to be held. Make sure to return the exact content if specified.
6. Reason for meeting - The reason for meeting if specified in the email. Make sure to summarize the reason in no more than 15 words.

** Important Rule:**
- The output must have exactly 3 values separated by a pipe (|).

### Output format:
<request_sent_by>|<requested_date_time>|<reason_for_meeting>"""

In [20]:
info_data_df = data_df.copy()
info_data_df[['request_sent_by', 'requested_date_time', 'reason_for_meeting']] = (
    data_df
    .apply(lambda row: meet_information_extraction(row, info_extract_prompt3, model1, token), axis=1)
    .apply(pd.Series)  # convert array to 3 separate columns
)
info_data_df

Unnamed: 0,email,JOB,MEET,OTHER,mistral_JOB,mistral_MEET,mistral_OTHER,request_sent_by,requested_date_time,reason_for_meeting
0,"Hi Saideep,\n\nThanks for your interest in iSp...",True,False,False,True,False,False,,,
1,"Hello, Saideep,\n\nWe received your job applic...",True,False,False,True,False,False,,,
2,"Hi Saideep,\n\nWe're super-pumped that you sha...",True,False,False,True,False,False,,,
3,"Hi Saideep,\n\nThank you for applying to the S...",True,False,False,True,False,False,,,
4,"Hello Saideep,\n\nThank you for your interest ...",True,False,False,True,False,False,,,
5,"Dear Saideep,\n\nThank you for your interest i...",True,False,False,True,False,False,,,
6,"Dear Saideep,\n\nThank you for your interest i...",True,False,False,True,False,False,,,
7,"Hi Saideep,\nThank you for your interest in th...",True,False,False,True,False,False,,,
8,"Hi Saideep,\n\nThanks for your interest in Red...",True,False,False,True,False,False,,,
9,"Dear Saideep,\n\nThank you for giving us the o...",True,False,False,True,False,False,,,


In [21]:
info_data_df = data_df.copy()
info_data_df[['request_sent_by', 'requested_date_time', 'reason_for_meeting']] = (
    data_df
    .apply(lambda row: meet_information_extraction(row, info_extract_prompt3, model2, token), axis=1)
    .apply(pd.Series)  # convert array to 3 separate columns
)
info_data_df

['Alex' 'this week' '' '']


Unnamed: 0,email,JOB,MEET,OTHER,mistral_JOB,mistral_MEET,mistral_OTHER,request_sent_by,requested_date_time,reason_for_meeting
0,"Hi Saideep,\n\nThanks for your interest in iSp...",True,False,False,True,False,False,,,
1,"Hello, Saideep,\n\nWe received your job applic...",True,False,False,True,False,False,,,
2,"Hi Saideep,\n\nWe're super-pumped that you sha...",True,False,False,True,False,False,,,
3,"Hi Saideep,\n\nThank you for applying to the S...",True,False,False,True,False,False,,,
4,"Hello Saideep,\n\nThank you for your interest ...",True,False,False,True,False,False,,,
5,"Dear Saideep,\n\nThank you for your interest i...",True,False,False,True,False,False,,,
6,"Dear Saideep,\n\nThank you for your interest i...",True,False,False,True,False,False,,,
7,"Hi Saideep,\nThank you for your interest in th...",True,False,False,True,False,False,,,
8,"Hi Saideep,\n\nThanks for your interest in Red...",True,False,False,True,False,False,,,
9,"Dear Saideep,\n\nThank you for giving us the o...",True,False,False,True,False,False,,,


In [22]:
info_data_df = data_df.copy()
info_data_df[['request_sent_by', 'requested_date_time', 'reason_for_meeting']] = (
    data_df
    .apply(lambda row: meet_information_extraction(row, info_extract_prompt3, model3, token), axis=1)
    .apply(pd.Series)  # convert array to 3 separate columns
)
info_data_df

Exception: [429] Too Many Requests
{'status': 429, 'title': 'Too Many Requests'}

### Prompt-4 test

In [23]:
def extract_MEET_info(model, token, prompt_msg, email_content: str, examples: list = None):
    """
    Extract information from JOB emails using an NVIDIA LLM with optional few-shot examples.

    Args:
        model (str): NVIDIA LLM model name.
        token (str): API key for NVIDIA LLM.
        prompt_msg (str): Classification instruction message.
        email_content (str): Email text to classify.
        examples (list, optional): Few-shot examples in the form 
                                   [{"email": "example email", "label": "JOB"}, ...]

    Returns:
        str: Extracted information result.
    """

    # Build few-shot messages
    messages = []

    # Add few-shot examples if provided
    if examples:
        for ex in examples:
            messages.append(("user", ex["email"]))
            messages.append(("assistant", ex["label"]))

    # Add the actual email to classify
    messages.append((
        "user",
        f"""{prompt_msg}\n\n EMAIL TO ANALYZE: {{email_content}} \n\n PIPE-DELIMITED OUTPUT:"""
    ))

    # Create prompt template
    prompt = ChatPromptTemplate.from_messages(messages)

    # Use the NVIDIA LLM via LangChain
    llm = ChatNVIDIA(
        model=model,
        api_key=token,
        temperature=0.0,
        top_p=1.0,
        max_tokens=4096,
        streaming=False
    )

    # Define chain
    chain = prompt | llm | StrOutputParser()

    # Run it
    result = chain.invoke({"email_content": email_content})
    return np.array(result.split("|"))


def meet_information_extraction(row, prompt_msg, model, token, examples=None):
    if eval(row['mistral_MEET']) == True:
        result = extract_MEET_info(model, token, prompt_msg, row['email'], examples)
        if(len(result) != 5):
            print(result)
            return np.array(["Error"] * 5)
        return result

    else:
        return np.array([np.nan, np.nan, np.nan, np.nan, np.nan])




In [24]:
info_extract_prompt4 = """You are an expert information extraction assistant.

You will be given the full text of an email about a meeting request. You will also be given the current date and time for context to resolve relative dates (e.g., "tomorrow").

Your task is to read the email carefully and extract the following details.

Fields to Extract (in order):
request_sent_by: The name of the person or organization requesting the meeting.

meeting_date: The full date or date range of the meeting. Convert relative dates into YYYY-MM-DD format. For a range, use YYYY-MM-DD to YYYY-MM-DD.

start_time: The meeting's start time, normalized to a 24-hour HH:MM format.

end_time: The meeting's end time, normalized to a 24-hour HH:MM format. If only a start time and duration are given (e.g., "a 1-hour meeting at 2 PM"), you must calculate and format the end time.

reason_for_meeting: A concise summary of the meeting's purpose in 15 words or less.

Output Format:
Provide the output as a single line of pipe-delimited (|) text. If a detail is not mentioned, leave the field blank.

<request_sent_by>|<meeting_date>|<start_time>|<end_time>|<reason_for_meeting>

Rules:
There must be exactly 5 values separated by 4 pipe (|) characters.

Time and Date Formatting is Strict:

meeting_date must be YYYY-MM-DD for a single day or YYYY-MM-DD to YYYY-MM-DD for a range.

start_time and end_time must be HH:MM (24-hour clock). For example, "3 PM" becomes "15:00".

Handle Ambiguity: If the email proposes a range of possible dates or a window of time, extract the date range into the meeting_date field. Leave start_time and end_time blank unless a specific, single meeting slot is confirmed.

If a value is missing, leave it blank but keep the pipe separators. For example: Jane Smith|2025-08-26|||Catch up on project status.

The output must be a single line only. Do not add any extra words or explanations.

Examples:
Current Context: Monday, August 25, 2025, 10:00 AM.

Example Email 1:
"Hi team, I'd like to schedule a meeting for tomorrow, August 26th, from 2:00 PM to 3:30 PM to review the quarterly budget. Let me know if that works. Best, Sarah Chen"

Correct Output 1:
Sarah Chen|2025-08-26|14:00|15:30|Review the quarterly budget

Example Email 2:
"Hello, This is a confirmation from Acme Corp for your interview on Wednesday at 11 AM. The interview will last for 45 minutes. We look forward to speaking with you."

Correct Output 2:
Acme Corp|2025-08-27|11:00|11:45|Interview confirmation

Example Email 3:
"Hi Pat, Following up on our call. Can we connect to discuss the project proposal? Let me know what time works for you. -Alex"

Correct Output 3:
Alex|||||Discuss the project proposal

Example Email 4 (Handling a Window):
"Hi Mark, Are you free to connect sometime this week to go over the new designs? I'm generally available from Wednesday to Friday between 9 AM and noon. Let me know what works. -Jen"

Correct Output 4:
Jen|2025-08-27 to 2025-08-29|||Go over the new designs"""

In [25]:
info_data_df = data_df.copy()
info_data_df[['request_sent_by','meeting_requested_date', 'meet_start_time', 'meet_end_time', 'reason_for_meeting']] = (
    data_df
    .apply(lambda row: meet_information_extraction(row, info_extract_prompt4, model1, token), axis=1)
    .apply(pd.Series)  # convert array to 3 separate columns
)
info_data_df

['Alex' '' '' '' '' 'Catch up and share updates\n']


Unnamed: 0,email,JOB,MEET,OTHER,mistral_JOB,mistral_MEET,mistral_OTHER,request_sent_by,meeting_requested_date,meet_start_time,meet_end_time,reason_for_meeting
0,"Hi Saideep,\n\nThanks for your interest in iSp...",True,False,False,True,False,False,,,,,
1,"Hello, Saideep,\n\nWe received your job applic...",True,False,False,True,False,False,,,,,
2,"Hi Saideep,\n\nWe're super-pumped that you sha...",True,False,False,True,False,False,,,,,
3,"Hi Saideep,\n\nThank you for applying to the S...",True,False,False,True,False,False,,,,,
4,"Hello Saideep,\n\nThank you for your interest ...",True,False,False,True,False,False,,,,,
5,"Dear Saideep,\n\nThank you for your interest i...",True,False,False,True,False,False,,,,,
6,"Dear Saideep,\n\nThank you for your interest i...",True,False,False,True,False,False,,,,,
7,"Hi Saideep,\nThank you for your interest in th...",True,False,False,True,False,False,,,,,
8,"Hi Saideep,\n\nThanks for your interest in Red...",True,False,False,True,False,False,,,,,
9,"Dear Saideep,\n\nThank you for giving us the o...",True,False,False,True,False,False,,,,,


In [26]:
info_data_df = data_df.copy()
info_data_df[['request_sent_by', 'meeting_requested_date', 'meet_start_time', 'meet_end_time', 'reason_for_meeting']] = (
    data_df
    .apply(lambda row: meet_information_extraction(row, info_extract_prompt4, model2, token), axis=1)
    .apply(pd.Series)  # convert array to 3 separate columns
)
info_data_df

['Patricia Gonzalez' '' '' '' ''
 'Discuss logistics partnership requirements']
['Alex' '' '' '' '' 'Catch up over coffee']
['Kahlil John Somera' '' '' '' '' 'Phone screen for internship']


Unnamed: 0,email,JOB,MEET,OTHER,mistral_JOB,mistral_MEET,mistral_OTHER,request_sent_by,meeting_requested_date,meet_start_time,meet_end_time,reason_for_meeting
0,"Hi Saideep,\n\nThanks for your interest in iSp...",True,False,False,True,False,False,,,,,
1,"Hello, Saideep,\n\nWe received your job applic...",True,False,False,True,False,False,,,,,
2,"Hi Saideep,\n\nWe're super-pumped that you sha...",True,False,False,True,False,False,,,,,
3,"Hi Saideep,\n\nThank you for applying to the S...",True,False,False,True,False,False,,,,,
4,"Hello Saideep,\n\nThank you for your interest ...",True,False,False,True,False,False,,,,,
5,"Dear Saideep,\n\nThank you for your interest i...",True,False,False,True,False,False,,,,,
6,"Dear Saideep,\n\nThank you for your interest i...",True,False,False,True,False,False,,,,,
7,"Hi Saideep,\nThank you for your interest in th...",True,False,False,True,False,False,,,,,
8,"Hi Saideep,\n\nThanks for your interest in Red...",True,False,False,True,False,False,,,,,
9,"Dear Saideep,\n\nThank you for giving us the o...",True,False,False,True,False,False,,,,,


In [27]:
info_data_df = data_df.copy()
info_data_df[['request_sent_by', 'meeting_requested_date', 'meet_start_time', 'meet_end_time', 'reason_for_meeting']] = (
    data_df
    .apply(lambda row: meet_information_extraction(row, info_extract_prompt4, model3, token), axis=1)
    .apply(pd.Series)  # convert array to 3 separate columns
)
info_data_df

['Jake' '2025-08-25' '' '' '' 'Discuss Barcelona client contract issue']
['Patricia Gonzalez' '' '' '' ''
 'Introductory call to discuss logistics partnership']


Exception: [429] Too Many Requests
{'status': 429, 'title': 'Too Many Requests'}

### Prompt-5 test

In [32]:
info_extract_prompt5 = """You are an expert information extraction assistant.

You will be given the full text of an email about a meeting request.

Your task is to read the email carefully and extract the following details.

Fields to Extract (in order):
request_sent_by: The name of the person or organization requesting the meeting.

meeting_date: The date or date range of the meeting, extracted exactly as it appears in the text (e.g., "tomorrow", "Wednesday", "August 26th", "Wednesday to Friday").

start_time: The meeting's start time, normalized to a 24-hour HH:MM format.

end_time: The meeting's end time, normalized to a 24-hour HH:MM format. If only a start time and duration are given (e.g., "a 1-hour meeting at 2 PM"), you must calculate and format the end time.

reason_for_meeting: A concise summary of the meeting's purpose in 15 words or less.

Output Format:
Provide the output as a single line of pipe-delimited (|) text. If a detail is not mentioned, leave the field blank.

<request_sent_by>|<meeting_date>|<start_time>|<end_time>|<reason_for_meeting>

Rules:
There must be exactly 5 values separated by 4 pipe (|) characters.

Time Formatting is Strict:

start_time and end_time must be HH:MM (24-hour clock). For example, "3 PM" becomes "15:00".

Handle Ambiguity: If the email proposes a range of possible dates or a window of time, extract the date range into the meeting_date field. Leave start_time and end_time blank unless a specific, single meeting slot is confirmed.

If a value is missing, leave it blank but keep the pipe separators. For example: Jane Smith|August 26th|||Catch up on project status.

The output must be a single line only. Do not add any extra words or explanations.

Examples:
Example Email 1:
"Hi team, I'd like to schedule a meeting for tomorrow, August 26th, from 2:00 PM to 3:30 PM to review the quarterly budget. Let me know if that works. Best, Sarah Chen"

Correct Output 1:
Sarah Chen|tomorrow, August 26th|14:00|15:30|Review the quarterly budget

Example Email 2:
"Hello, This is a confirmation from Acme Corp for your interview on Wednesday at 11 AM. The interview will last for 45 minutes. We look forward to speaking with you."

Correct Output 2:
Acme Corp|Wednesday|11:00|11:45|Interview confirmation

Example Email 3:
"Hi Pat, Following up on our call. Can we connect to discuss the project proposal? Let me know what time works for you. -Alex"

Correct Output 3:
Alex|||||Discuss the project proposal

Example Email 4 (Handling a Window):
"Hi Mark, Are you free to connect sometime this week to go over the new designs? I'm generally available from Wednesday to Friday between 9 AM and noon. Let me know what works. -Jen"

Correct Output 4:
Jen|Wednesday to Friday|||Go over the new designs"""

In [33]:
info_data_df = data_df.copy()
info_data_df[['request_sent_by','meeting_requested_date', 'meet_start_time', 'meet_end_time', 'reason_for_meeting']] = (
    data_df
    .apply(lambda row: meet_information_extraction(row, info_extract_prompt5, model1, token), axis=1)
    .apply(pd.Series)  # convert array to 3 separate columns
)
info_data_df

['Alex' 'this week' '' '' '' 'Catch up and share updates\n']


Unnamed: 0,email,JOB,MEET,OTHER,mistral_JOB,mistral_MEET,mistral_OTHER,request_sent_by,meeting_requested_date,meet_start_time,meet_end_time,reason_for_meeting
0,"Hi Saideep,\n\nThanks for your interest in iSp...",True,False,False,True,False,False,,,,,
1,"Hello, Saideep,\n\nWe received your job applic...",True,False,False,True,False,False,,,,,
2,"Hi Saideep,\n\nWe're super-pumped that you sha...",True,False,False,True,False,False,,,,,
3,"Hi Saideep,\n\nThank you for applying to the S...",True,False,False,True,False,False,,,,,
4,"Hello Saideep,\n\nThank you for your interest ...",True,False,False,True,False,False,,,,,
5,"Dear Saideep,\n\nThank you for your interest i...",True,False,False,True,False,False,,,,,
6,"Dear Saideep,\n\nThank you for your interest i...",True,False,False,True,False,False,,,,,
7,"Hi Saideep,\nThank you for your interest in th...",True,False,False,True,False,False,,,,,
8,"Hi Saideep,\n\nThanks for your interest in Red...",True,False,False,True,False,False,,,,,
9,"Dear Saideep,\n\nThank you for giving us the o...",True,False,False,True,False,False,,,,,


In [34]:
info_data_df = data_df.copy()
info_data_df[['request_sent_by','meeting_requested_date', 'meet_start_time', 'meet_end_time', 'reason_for_meeting']] = (
    data_df
    .apply(lambda row: meet_information_extraction(row, info_extract_prompt5, model2, token), axis=1)
    .apply(pd.Series)  # convert array to 3 separate columns
)
info_data_df

['Patricia Gonzalez' '' '' '' ''
 'Discuss logistics partnership requirements']
['Maya' '' '' '' '' 'Discuss documentary collaboration']
['Alex' '' '' '' '' 'Catch up and discuss startup updates']
['Kahlil John Somera' '' '' '' '' 'Phone screen for internship']


Unnamed: 0,email,JOB,MEET,OTHER,mistral_JOB,mistral_MEET,mistral_OTHER,request_sent_by,meeting_requested_date,meet_start_time,meet_end_time,reason_for_meeting
0,"Hi Saideep,\n\nThanks for your interest in iSp...",True,False,False,True,False,False,,,,,
1,"Hello, Saideep,\n\nWe received your job applic...",True,False,False,True,False,False,,,,,
2,"Hi Saideep,\n\nWe're super-pumped that you sha...",True,False,False,True,False,False,,,,,
3,"Hi Saideep,\n\nThank you for applying to the S...",True,False,False,True,False,False,,,,,
4,"Hello Saideep,\n\nThank you for your interest ...",True,False,False,True,False,False,,,,,
5,"Dear Saideep,\n\nThank you for your interest i...",True,False,False,True,False,False,,,,,
6,"Dear Saideep,\n\nThank you for your interest i...",True,False,False,True,False,False,,,,,
7,"Hi Saideep,\nThank you for your interest in th...",True,False,False,True,False,False,,,,,
8,"Hi Saideep,\n\nThanks for your interest in Red...",True,False,False,True,False,False,,,,,
9,"Dear Saideep,\n\nThank you for giving us the o...",True,False,False,True,False,False,,,,,


In [35]:
info_data_df = data_df.copy()
info_data_df[['request_sent_by','meeting_requested_date', 'meet_start_time', 'meet_end_time', 'reason_for_meeting']] = (
    data_df
    .apply(lambda row: meet_information_extraction(row, info_extract_prompt5, model3, token), axis=1)
    .apply(pd.Series)  # convert array to 3 separate columns
)
info_data_df

Exception: [429] Too Many Requests
{'status': 429, 'title': 'Too Many Requests'}

In [36]:
email4 = """Interview Invitation – Data Scientist Role

Hi Priya,

We’re pleased to inform you that you have successfully cleared the assessment for the Data Scientist position at Quantum Analytics. Our team was impressed with your performance and would like to proceed with the next stage of the hiring process.

We would like to schedule your interview on either Tuesday, August 12th or Thursday, August 14th, between 10:00 AM and 2:00 PM. Please let us know your preferred date and time, and we will confirm the slot accordingly.

The interview will be conducted virtually via Microsoft Teams, and further details will be shared once your schedule is confirmed.

Looking forward to your response.

Best regards,
Ananya Kapoor
Recruitment Coordinator
Quantum Analytics Pvt. Ltd.
ananya.kapoor@quantumanalytics.com
+1 (628) 555-4173"""


extract_MEET_info(model2, token, info_extract_prompt5, email4)

array(['Quantum Analytics Pvt. Ltd.',
       'Tuesday, August 12th or Thursday, August 14th', '', '',
       'Interview for Data Scientist role'], dtype='<U45')