In [92]:
"""
This script demonstrates a simple approach to:
1) Read and classify user messages from Data.csv
2) Implement a basic chatbot interface to filter messages
3) Provide simple conversational context handling

Prerequisites:
- pandas

Usage:
 python chatbot.py
"""

import openai
import os
import pandas as pd
import re
from datetime import datetime
openai.api_key = os.getenv("OPENAI_API_KEY")


In [93]:
# Updated category definitions with nine categories
CATEGORIES = {
    "deposit_issues": ["deposit", "funds", "wallet", "moonpay"],
    "withdrawal_issues": ["withdraw", "cashout", "payout"],
    "bonus_issues": ["bonus", "free spin", "freeplay", "freespin"],
    "game_issues": ["game", "slot", "spin", "bet"],
    "account_issues": ["account", "disabled", "password"],
    "login_issues": ["login", "sign in", "authenticate"],
    "payment_issues": ["payment", "transaction", "credit", "card"],
    "technical_issues": ["error", "bug", "crash", "freeze", "lag"],
    "other_issues": []  # Fallback for messages that don't match any keywords
}

def classify_message(message):
    """Naive rule-based approach to assign a category to a message."""
    msg_lower = message.lower()
    for category, keywords in CATEGORIES.items():
        # If there are any keywords defined, check them; otherwise, skip to fallback
        if keywords and any(k in msg_lower for k in keywords):
            return category
    return "other_issues"

# Load data

class Chatbot:
    """
    A very basic chatbot that:
    - Filters messages by category, source, and time range
    - Keeps track of user’s last query context
    """

    def __init__(self, dataframe):
        self.df = dataframe
        self.context = {}

    def filter_messages(self, category=None, source=None, since=None, until=None):
        """Filter messages based on params."""
        data = self.df
        if category:
            data = data[data["category"] == category]
        if source:
            data = data[data["source"].str.lower() == source.lower()]
        if since:
            data = data[data["timestamp"] >= since]
        if until:
            data = data[data["timestamp"] <= until]
        return data

    def parse_query(self, user_query):
        """
        Extremely simplified parser:
         - looks for category words
         - checks for 'livechat' or 'telegram'
         - checks for 'week'/'month' as relative date
        """
        parsed = {}
        lower_q = user_query.lower()

        # Detect categories
        for cat in CATEGORIES.keys():
            if cat.replace("_", " ") in lower_q or any(k in lower_q for k in CATEGORIES[cat]):
                parsed["category"] = cat

        # Detect source
        if "livechat" in lower_q:
            parsed["source"] = "livechat"
        elif "telegram" in lower_q:
            parsed["source"] = "telegram"

        # Detect timeframe (very naive: last month vs last week)
        now = datetime.now()
        if "month" in lower_q:
            # set range to last 30 days
            parsed["since"] = now.replace(day=1)
        if "week" in lower_q:
            # set range to last 7 days (naive)
            parsed["since"] = now - pd.Timedelta(days=7)

        return parsed

    def handle_query(self, user_query):
        """Use naive context approach: update existing context if user is refining."""
        new_context = self.parse_query(user_query)

        # Merge new context with old context
        self.context.update(new_context)

        # Filter data
        data = self.filter_messages(
            category=self.context.get("category"),
            source=self.context.get("source"),
            since=self.context.get("since"),
            until=self.context.get("until")
        )

        # Return simple stats
        count_msgs = len(data)
        unique_users = data["id_user"].nunique()
        return {
            "category": self.context.get("category", "not specified"),
            "source": self.context.get("source", "not specified"),
            "count": count_msgs,
            "unique_users": unique_users,
        }

def load_and_classify_data(csv_path="Data.csv"):
    """Load the CSV file into a DataFrame and classify each message."""
    df = pd.read_csv(csv_path)
    df["category"] = df["message"].apply(classify_message)
    # Convert timestamp to datetime if needed
    # Example assumption: the format is M/D/YYYY
    df["timestamp"] = pd.to_datetime(df["timestamp"], format="%m/%d/%Y", errors="coerce")
    #pritn timestamp
    print(df["timestamp"])
    return df

df = load_and_classify_data("Data.csv")
bot = Chatbot(df)

0      2024-11-01
1      2024-11-01
2      2024-11-01
3      2024-11-01
4      2024-11-01
          ...    
4522   2025-01-30
4523   2025-01-30
4524   2025-01-30
4525   2025-01-30
4526   2025-01-30
Name: timestamp, Length: 4527, dtype: datetime64[ns]


In [94]:
example_queries = [

    "Telegram other issues",
    "Livechat other issues",
    "Game issues via LiveChat in the last year",
    "Game issues via LiveChat in the last 15 year",
    "Login issues reported in the last 10 years"
]

for query in example_queries:
    print("User query:", query)
    response = bot.handle_query(query)
    print(response, "\n")

User query: Telegram other issues
{'category': 'other_issues', 'source': 'telegram', 'count': 203, 'unique_users': 147} 

User query: Livechat other issues
{'category': 'other_issues', 'source': 'livechat', 'count': 2463, 'unique_users': 2355} 

User query: Game issues via LiveChat in the last year
{'category': 'game_issues', 'source': 'livechat', 'count': 238, 'unique_users': 238} 

User query: Game issues via LiveChat in the last 15 year
{'category': 'game_issues', 'source': 'livechat', 'count': 238, 'unique_users': 238} 

User query: Login issues reported in the last 10 years
{'category': 'login_issues', 'source': 'livechat', 'count': 10, 'unique_users': 10} 

