In [1]:
import pandas as pd
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_core.documents import Document

# 컬럼 매핑 정의
COLUMN_MAPPING = {
    'text': 'SIT',
    'emotion': 'EMOT'
}

# Retriever Component
class Retriever:
    def __init__(self, csv_file, column_mapping=COLUMN_MAPPING):
        self.df = pd.read_csv(csv_file, sep='|')
        self.column_mapping = column_mapping
        self.documents = [
            Document(
                page_content=row[self.column_mapping['text']], 
                metadata={"emotion": row[self.column_mapping['emotion']]}
            ) for _, row in self.df.iterrows()
        ]
        self.embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
        self.vector = FAISS.from_documents(self.documents, self.embeddings)
        self.retriever = self.vector.as_retriever()

    def retrieve(self, query):
        retrieved_docs = self.retriever.invoke(query)  
        return "\n".join([doc.page_content for doc in retrieved_docs])  

# Generator Component
class Generator:
    def __init__(self):
        self.model = ChatOpenAI(model_name='gpt-4o')
        self.prompt = ChatPromptTemplate.from_template("""You are an expert in emotion analysis. Given a text input, classify the emotion **strictly** as one of the following:  
        **'joy', 'fear', 'anger', 'sadness', 'disgust', 'shame', 'guilt'.**  
        **No other emotions are allowed.**  

        ## Instructions:
        1. **Read and understand** the input text carefully.
        2. **Strictly classify** the text as one of these **only**: 'joy', 'fear', 'anger', 'sadness', 'disgust', 'shame', or 'guilt'.
        
        <context>
        {context}
        </context>
        
        Text: {input}
        Emotion:""")

    def generate(self, context, text):
        document_chain = create_stuff_documents_chain(self.model, self.prompt)
        context_docs = [Document(page_content=context)]  
        response = document_chain.invoke({"context": context_docs, "input": text})
        return response

# Emotion Analysis Component for API Integration
class EmotionAnalyzer:
    def __init__(self, csv_file, column_mapping=COLUMN_MAPPING):
        self.retriever = Retriever(csv_file, column_mapping)
        self.generator = Generator()
        self.column_mapping = column_mapping
    
    def analyze_emotion(self, text):
        retrieved_context = self.retriever.retrieve(text)
        return self.generator.generate(retrieved_context, text)
    

In [None]:

# Client Component
csv_file = "isear.csv"


# Read the csv file
df = pd.read_csv(csv_file, sep='|', on_bad_lines='skip')


In [4]:
analyzer = EmotionAnalyzer(csv_file)

for index, row in df.head(5).iterrows():
    predicted_emotion = analyzer.analyze_emotion(row[COLUMN_MAPPING['text']])
    print(f"Actual Emotion: {row[COLUMN_MAPPING['emotion']]} Predicted Emotion: {predicted_emotion}")

ParserError: Error tokenizing data. C error: Expected 43 fields in line 199, saw 44


In [18]:

# Client Component
csv_file = "isear.csv"

# Read the csv file
df = pd.read_csv(csv_file,  on_bad_lines='skip')

ModuleNotFoundError: No module named 'numpy.rec'

In [6]:

# Client Component
csv_file = "isear.csv"
analyzer = EmotionAnalyzer(csv_file)

# Read the csv file
df = pd.read_csv(csv_file)
for index, row in df.head(5).iterrows():
    predicted_emotion = analyzer.analyze_emotion(row['text'])

    print(f"Actual Emotion: {row['emotion']} Predicted Emotion: {predicted_emotion}")

ParserError: Error tokenizing data. C error: Expected 2 fields in line 4, saw 3
