In [24]:
import csv
import traceback
from openai import OpenAI
import os
import re
from dotenv import load_dotenv

load_dotenv('/home/ubuntu/nepal_chatbot/.env')
open_ai_key = os.getenv("OPENAI_API_KEY")

try:
    if open_ai_key:
        print("OpenAI key is loaded")
    else:
        raise ValueError("OpenAI key is not set")
except Exception as e:
    print(f"Error loading OpenAI API key: {e}")

OpenAI key is loaded


In [3]:
# class ActionCaptureGrievanceText(Action):
#     def name(self) -> Text:
#         return "action_capture_grievance_text"

def load_classification_data(file_path):
    """Loads grievance classification data from CSV into a dictionary"""
    categories = []
    try:
        with open(file_path, "r", encoding="utf-8") as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                categories.append(row["Classification"].title() + " - " + row["Generic Grievance Name"].title())  # Normalize case
    except Exception as e:
        print(f"Error loading CSV file: {e}")
        traceback.print_exc()
    return list(set(categories))

cat_path = '/home/ubuntu/nepal_chatbot/resources/grievances_categorization_v1.csv'

print(load_classification_data(cat_path))

['Environmental - Drainage And Sewage Management', 'Destruction Of Agrarian Resources - Crop Destruction', 'Wildlife, Environmental - Wildlife Passage', 'Wildlife, Environmental - Wildlife Destruction', 'Relocation Of Public Utilities - Access To Electricity', 'Relocation Issues - Forced Relocation Issues', 'Relocation Issues - Lack Of Infrastructure Of The Resettlement Site', 'Environmental - Noise Pollution', 'Cultural, Social - Cultural Site Disturbances', 'Destruction Of Agrarian Resources - Soil Pollution', 'Environmental - Air Pollution', 'Malicious Behavior, Environmental - Fire Incidents', 'Economic, Social - Employment Opportunities', 'Relocation Issues - Poor Location Of The Resettlement Site', 'Malicious Behavior - Theft Of Crops', 'Safety - Road Safety Provisions', 'Destruction Of Agrarian Resources - Destruction Of Agrarian Soils', 'Economic, Social - Land Acquisition Issues', 'Relocation Of Public Utilities - Access To Water', 'Environmental, Social - Cutting Of Trees', '

In [None]:
# def run(self, dispatcher: CollectingDispatcher, tracker: Tracker, domain: Dict[Text, Any]) -> List[Dict[Text, Any]]:
#     grievance_details = tracker.latest_message.get("text")
#     print(f"User input: {grievance_details}")

def open_ai_categorization(grievance_details, csv_file_path):
    # Load CSV Data
    # csv_file_path = "/home/ubuntu/nepal_chatbot/resources/grievances_categorization_v1.csv"
    classification_data = load_classification_data(csv_file_path)


    # Step 1: use OpenAI but restrict the category choices
    predefined_categories = load_classification_data(csv_file_path)# Extract unique categories
    category_list_str = "\n".join(f"- {c}" for c in predefined_categories)  # Format as list

    client = OpenAI(api_key=open_ai_key)
        
    response = client.chat.completions.create(
        messages=[
            {"role": "system", "content": "You are an assistant helping to categorize grievances."},
            {"role": "user", "content": f"""
                Step 1:
                Categorize this grievance: "{grievance_details}"
                Only choose from the following categories:
                {category_list_str}
                Do not create new categories.
                Reply only with the categories, if many categories apply just list them with the following format:
                Category 1: category, Category 2: category, Category 3: category etc when applicable - END Category
                
                Step 2: summarize the grievance with simple and direct words so they can be understood with people with limited litteracy.
                Provide your answer with the following format
                Grievance Summary: lorum ipsum etc - END Summary
                
            """}
        ],
        model="gpt-4",

    )

    result = response.choices[0].message.content.strip()
    print(result)
    #Step 2 : parse the results
    
    result_dict = parse_summary_and_category(result)
    
    n = 0
    print(result_dict)
    for k in result_dict.keys():
        n+=1 if "category" in k else n

    
    print(result_dict, n)

    # # Step 2: Validate category with the user
    # buttons = [
    #     {"title": "Yes", "payload": "/agree"},
    #     {"title": "No, choose another category", "payload": "/deny"},
    #     {"title": "Exit", "payload": "/exit_grievance_process"}
    # ]

    # # dispatcher.utter_message(
    # #     text=f"Here's the category I identified: '{best_category}'. Does this seem correct?",
    # #     buttons=buttons
    # # )
    # print(f"Here's the category I identified: '{best_category}'. Does this seem correct?")

    # # return [
    # #     SlotSet("grievance_details", grievance_details),
    # #     SlotSet("grievance_category", best_category)
    # # ]

def parse_summary_and_category(result: str):
    """
    Parse the result from OpenAI to extract the grievance summary and categories into a structured dictionary.
    """

    # Extract category using regex
    category_match = re.search(r'Category.*?- END Category', result, re.DOTALL)
    category_text = category_match.group(0).replace("- END Category", "").strip() if category_match else ""

    # Extract summary using regex
    summary_match = re.search(r'Grievance Summary: (.*?)- END Summary', result, re.DOTALL)
    grievance_summary = summary_match.group(1).strip() if summary_match else ""
    print(grievance_summary)

    # Initialize result dictionary
    result_dict = {"grievance_summary": grievance_summary}
    

    # Process category string dynamically
    if category_text:
        category_list = category_text.split("Category ")
        category_list = [i for i in category_list if len(i)> 0 and "Category" not in i]
        print(category_list)
        # idx = 1
        for idx, category in enumerate(category_list, start =1):
            print(category)
            result_dict[f"category_{idx}"] = category.split(": ")[1].strip().strip(',') # Extract category name

    return result_dict
    
grievance = "tehy killed my duck and stole my rice"

print(open_ai_categorization(grievance, cat_path))

Category 1: Destruction Of Agrarian Resources - Crop Destruction, Category 2: Malicious Behavior - Theft Of Crops, Category 3: Wildlife, Environmental - Wildlife Destruction - END Category

Grievance Summary: Someone killed my pet bird and took my food grain - END Summary
Someone killed my pet bird and took my food grain
['1: Destruction Of Agrarian Resources - Crop Destruction, ', '2: Malicious Behavior - Theft Of Crops, ', '3: Wildlife, Environmental - Wildlife Destruction']
1: Destruction Of Agrarian Resources - Crop Destruction, 
2: Malicious Behavior - Theft Of Crops, 
3: Wildlife, Environmental - Wildlife Destruction
{'grievance_summary': 'Someone killed my pet bird and took my food grain', 'category_1': 'Destruction Of Agrarian Resources - Crop Destruction', 'category_2': 'Malicious Behavior - Theft Of Crops', 'category_3': 'Wildlife, Environmental - Wildlife Destruction'}
{'grievance_summary': 'Someone killed my pet bird and took my food grain', 'category_1': 'Destruction Of Ag