In [1]:
import torch
from huggingface_hub import login
from transformers import (
    RobertaTokenizerFast,
    RobertaForSequenceClassification,
    TrainingArguments,
    Trainer,
    AutoConfig,
    pipeline
)
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
import groq
from dotenv import load_dotenv
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_id = "pulkitgautam/email-classifier"
persist_directory = "..//data//document_embeddings"

In [3]:
id2label = {0:"student", 1:"corporate", 2:"researcher"}
config = AutoConfig.from_pretrained(model_id)
config.update({"id2label": id2label})
model = RobertaForSequenceClassification.from_pretrained(model_id, config=config)
tokenizer = RobertaTokenizerFast.from_pretrained(model_id)

In [4]:
text = """
I wanted to talk about my research in neuroscience, can we talk about the same? Can you suggest a suitable time for it, I am reseaching uder prof mayank.
""" 

In [5]:
def getLabelRoBERTA(text):
    pipe = pipeline('text-classification',model, tokenizer=tokenizer, device='cuda') 
    result = pipe(text)

    predicted_label = result[0]["label"]
    return predicted_label

In [6]:
getLabelRoBERTA(text)

'researcher'

In [7]:
model_kwargs = {'device': 'cuda'}
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2", model_kwargs=model_kwargs)

vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding_function)

In [8]:
def getRelevantDocs(text):
    resources = vectordb.similarity_search(text, k=2)
    return "".join(("Document No: " + str(i) + "\n" + resource.page_content + '\n') for i, resource in enumerate(resources))


In [9]:
print(getRelevantDocs(text))

Document No: 0
Humanities and Social Sciences Courses of Study  2022-2023279of the course on research methods, the course will focus on different 
experimental paradigms such as eye-tracking, etc. The course will 
first summarize the basics of experimentation. Following this, we will 
focus on a particular experimental paradigm (e.g., Eye-tracking, EEG) 
where the key concepts related to the paradigm will be discussed. 
Some research papers that have used this paradigm in domains such 
as attention, language processing, etc. will be discussed. Advantages 
and challenges to the paradigm will be discussed. In addition, 
practical sessions will be conducted to get a hands-on experience 
on a particular paradigm.
HSL725 Advanced Qualitative Methods 
1.5 Credits (0.5-0-2)
Pre-requisite(s): HSL721
This course will introduce the philosophical foundations of qualitative 
paradigm and discuss how it complements rather than contradict 
with experimental and/or computational approach to research.

In [10]:
load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

In [11]:
llm = ChatGroq(temperature=0.1, model="llama-3.1-70b-versatile", groq_api_key=GROQ_API_KEY)

In [12]:
system = """
You are an advanced automated email reply tool designed to assist the Head of Department (HOD) at a university. Your role is to generate professional and concise email replies based on predefined categories and the guidelines below.

Guidelines:
1. **Professionalism**: Maintain a formal tone in all responses. Keep replies clear, concise, and relevant to the subject of the email.
2. **Sensitive or Confidential Data**: 
   - If the email contains any sensitive or confidential information (such as legal matters or confidential partnerships), simply respond with: "Forwarding to HOD" and nothing more.
3. **Corporate Emails**: 
   - For emails categorized as 'Corporate', respond with: "Forwarded to HOD for review" and nothing more.
4. **Other Categories**: 
   - For non-sensitive emails (other than 'Corporate'), draft a reply relevant to the content of the email and its category. Ensure it directly addresses the inquiry or request.
5. **Document Usage**: 
   - If the email asks for specific data or details, check the provided documents for reference. Do not invent or guess any information. Use only the data found in the documents.
6. **General Queries (Students)**: 
   - If the email is categorized as 'Students', you may respond on behalf of the HOD. Ensure the response answers their query appropriately.
7. **Insufficient Information**: 
   - If there isn’t enough information to formulate a response, reply with: "Not enough info, will get back to you."
   
Your task is to return only the body of the reply, with no additional text or comments."""

human = """
Email: {email}
Category: {category}
Useful Documents: 
{documents}

Instructions:
- Emails containing sensitive or confidential information (e.g., legal matters, private partnerships) should always be escalated by responding with "Forwarding to HOD."
- For emails categorized as 'Corporate', simply respond with "Forwarded to HOD for review."
- For emails from 'Researchers', first check the provided documents for relevant information. If you can find the required details, draft an appropriate reply. If not, respond with "Will get back to you."
- For 'Students' queries or general inquiries, draft a full response on behalf of the HOD. 
- If insufficient data is available, use the response: "Not enough info, will get back to you."
- Ensure the reply is professional, to the point, and based on the given information.
- Only return the email body as your response, nothing else."""


prompt = ChatPromptTemplate.from_messages([("system", system), ("human", human)])

chain = prompt | llm

In [13]:
answer = chain.invoke({"email":text, "category":getLabelRoBERTA(text), "documents":getRelevantDocs(text)})
print(answer.content)

I'd be happy to discuss your research in neuroscience. However, I would like to schedule a meeting at a time that suits you. Would you please suggest a few dates and times that work for you? I will make sure to coordinate with Prof. Mayank and get back to you with a confirmation.
