In [None]:
#Če niso nameščene knjižnice, jih namestimo lahko s pomočjo ukaza
#%pip install _______

from datetime import datetime
import pandas as pd
from datetime import datetime, timedelta
import ipywidgets as widgets
from IPython.display import display
import plotly.express as px
import ipywidgets as widgets
from ipywidgets import interact
import openai
#Tukaj se vpiše API ključ


In [2]:
CATEGORIES = [
    "deposit_issues",
    "withdrawal_issues",
    "bonus_issues",
    "game_issues",
    "account_issues",
    "login_issues",
    "payment_issues",
    "technical_issues",
    "other_issues",
]

def classify_message(message_text: str) -> str:
    prompt = (
        "You are a message classifier for a casino support system. "
        "Given a user's message, classify it into one of these categories:\n"
        f"{', '.join(CATEGORIES)}.\n"
        "If it doesn't match any category well, classify it as other_issues.\n"
        "Message:\n"
        f"{message_text}\n"
        "Category:"
    )

    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt},
        ],
        temperature=0
    )

    category_guess = response.choices[0].message.content.strip().lower()

    # to je potrebno ker chat ni popolnoma zanesljiv
    if category_guess not in CATEGORIES:
        category_guess = "other_issues"
    return category_guess

# primer uporabe
test_message = "Čuj te,  zakaj denar še ni prišel na moj bančni račun"
predicted_category = classify_message(test_message)
print("Message:", test_message)
print("Predicted category:", predicted_category)

Message: Čuj te,  zakaj denar še ni prišel na moj bančni račun
Predicted category: withdrawal_issues


In [3]:
def load_and_classify_data(csv_path="Data.csv") -> pd.DataFrame:

    df = pd.read_csv(csv_path)
    
    df["timestamp"] = pd.to_datetime(df["timestamp"], format="%m/%d/%Y", errors="coerce")

    
    categories = []
    for msg in df["message"]:
        
        cat = classify_message(str(msg))
        categories.append(cat)
        print(msg,cat)
    df["category"] = categories
    return df

# Traja 37 minut in stane denar, zato sem zakomentiral
# tega ni potrebno runnat vsakič, ker je že narejen data_classified.csv file
''' 
df = load_and_classify_data("Data.csv")
df.to_csv('data_classified.csv', index=False)
'''

' \ndf = load_and_classify_data("Data.csv")\ndf.to_csv(\'data_classified.csv\', index=False)\n'

In [4]:


class Chatbot:

    def __init__(self, dataframe: pd.DataFrame):
        
        self.df = dataframe.copy()
        self.df["timestamp"] = pd.to_datetime(self.df["timestamp"], errors="coerce")
        self.context = {}
    
    def filter_messages(self, category=None, source=None, since=None, until=None) -> pd.DataFrame:
        """Filtrirajmo DataFrame na podlagi izbranih parametrov."""
        filtered = self.df
        if category:
            filtered = filtered[filtered["category"] == category]
        if source:
            filtered = filtered[filtered["source"].str.lower() == source.lower()]
        if since:
            filtered = filtered[filtered["timestamp"] >= since]
        if until:
            filtered = filtered[filtered["timestamp"] <= until]
        return filtered

    def parse_query(self, user_query: str) -> dict:
        """
        Iz povedi izlušči pomen. Kategorijo, vir in časovno obdobje.
        """
        parsed = {}
        lower_q = user_query.lower()

        
        for cat in CATEGORIES:
            if cat.replace("_", " ") in lower_q:
                parsed["category"] = cat

       
        if "livechat" in lower_q:
            parsed["source"] = "livechat"
        elif "telegram" in lower_q:
            parsed["source"] = "telegram"

       
        now = datetime.now()

        prompt = f"""
        Extract timeframe information from the following text. Analyze the text and output a JSON object.
        The JSON object must be one of two formats:
        1) For relative time, output {{ "amount": <number>, "unit": "<week|month|year>" }}.
        2) For an absolute date range, output {{ "since": "<YYYY-MM-DD>", "until": "<YYYY-MM-DD>" }}.
        If no timeframe is found, output an empty JSON object {{}}.
        Text: "{user_query}"
        For example, if the text is "last 2 weeks", return {{"amount": 2, "unit": "week"}}.
        If the text is "first 5 days of november 2024", return {{"since": "2024-11-01", "until": "2024-11-05"}}.
        """
        try:
            response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": "You extract timeframe information from text."},
                    {"role": "user", "content": prompt},
                ],
                temperature=0,
            )
            reply = response.choices[0].message.content.strip()
            import json
            time_info = json.loads(reply)

            
            if "since" in time_info and "until" in time_info:
                try:
                    parsed["since"] = datetime.strptime(time_info["since"], "%Y-%m-%d")
                    parsed["until"] = datetime.strptime(time_info["until"], "%Y-%m-%d")
                except Exception as e:
                    
                    pass
            
            elif "amount" in time_info and "unit" in time_info:
                amount = int(time_info["amount"])
                unit = time_info["unit"].lower()
                if unit.startswith("week"):
                    parsed["since"] = now - timedelta(weeks=amount)
                elif unit.startswith("month"):
                    parsed["since"] = now - timedelta(days=30 * amount)
                elif unit.startswith("year"):
                    parsed["since"] = now.replace(year=now.year - amount)
            
        except Exception as e:
            
            pass

        return parsed

    def handle_query(self, user_query: str) -> dict:
        """
        tukaj se vse začne in konča
        """
        lower_q = user_query.lower()
        new_context = self.parse_query(user_query)
        
        
        if "telegram" in lower_q and "livechat" in lower_q:
            
            base_context = new_context.copy()
            
            telegram_context = base_context.copy()
            telegram_context["source"] = "telegram"
            telegram_data = self.filter_messages(
                category=self.context.get("category", telegram_context.get("category")),
                source=telegram_context.get("source"),
                since=self.context.get("since", telegram_context.get("since")),
                until=self.context.get("until", telegram_context.get("until"))
            )
            telegram_response = {
                "source": "telegram",
                "count": len(telegram_data),
                "unique_users": telegram_data["id_user"].nunique()
            }

            
            livechat_context = base_context.copy()
            livechat_context["source"] = "livechat"
            livechat_data = self.filter_messages(
                category=self.context.get("category", livechat_context.get("category")),
                source=livechat_context.get("source"),
                since=self.context.get("since", livechat_context.get("since")),
                until=self.context.get("until", livechat_context.get("until"))
            )
            livechat_response = {
                "source": "livechat",
                "count": len(livechat_data),
                "unique_users": livechat_data["id_user"].nunique()
            }
            
            return {"telegram": telegram_response, "livechat": livechat_response}
        else:
            
            self.context.update(new_context)
            data = self.filter_messages(
                category=self.context.get("category"),
                source=self.context.get("source"),
                since=self.context.get("since"),
                until=self.context.get("until")
            )
            return {
                "category": self.context.get("category", "not specified"),
                "source": self.context.get("source", "not specified"),
                "count": len(data),
                "unique_users": data["id_user"].nunique()
            }

In [5]:
df = pd.read_csv('data_classified.csv')
df.head()

Unnamed: 0,id_user,timestamp,source,message,category
0,4844,2024-11-01,livechat,"""What time is it where u are""",other_issues
1,3985,2024-11-01,livechat,"""What happened to the Cashback piggy?""",game_issues
2,2578,2024-11-01,livechat,"""Deactivated""",other_issues
3,1040,2024-11-01,telegram,"""It says access issues detected""",technical_issues
4,4288,2024-11-01,livechat,"""Im in arizona""",other_issues


In [6]:


SteveTheBot = Chatbot(df)


example_queries = [
    "Telegram deposit issues in the first 1 day of November 2024",
    "in the first 11 days of November 2024",
    "What about LiveChat?",
    "Now show me game issues", 
    "Actually, restrict to just the last month",
    "Switch to login issues reported in the last year",
    "Switch to telegram and livechat"
]

# Zalaufa chatbot in izpise odgovore na zgornje primere
for query in example_queries:
    print("User query:", query)
    response = SteveTheBot.handle_query(query)
    print(response, "\n")
    


User query: Telegram deposit issues in the first 1 day of November 2024
{'category': 'deposit_issues', 'source': 'telegram', 'count': 1, 'unique_users': 1} 

User query: in the first 11 days of November 2024
{'category': 'deposit_issues', 'source': 'telegram', 'count': 9, 'unique_users': 9} 

User query: What about LiveChat?
{'category': 'deposit_issues', 'source': 'livechat', 'count': 24, 'unique_users': 24} 

User query: Now show me game issues
{'category': 'game_issues', 'source': 'livechat', 'count': 41, 'unique_users': 41} 

User query: Actually, restrict to just the last month
{'category': 'game_issues', 'source': 'livechat', 'count': 0, 'unique_users': 0} 

User query: Switch to login issues reported in the last year
{'category': 'login_issues', 'source': 'livechat', 'count': 6, 'unique_users': 6} 

User query: Switch to telegram and livechat
{'telegram': {'source': 'telegram', 'count': 1, 'unique_users': 1}, 'livechat': {'source': 'livechat', 'count': 6, 'unique_users': 6}} 



In [7]:

query_input = widgets.Text(
    placeholder="Enter your query...",
    description="Query:",
    layout=widgets.Layout(width='70%')
)
send_button = widgets.Button(description="Send")
output_area = widgets.Output(layout={'border': '1px solid black'})

def on_send_click(b):
    user_query = query_input.value
    response = SteveTheBot.handle_query(user_query)
    with output_area:
        output_area.clear_output()
        print(f"User query: {user_query}")
        print("Response:", response)

send_button.on_click(on_send_click)
display(query_input, send_button, output_area)


Text(value='', description='Query:', layout=Layout(width='70%'), placeholder='Enter your query...')

Button(description='Send', style=ButtonStyle())

Output(layout=Layout(border_bottom='1px solid black', border_left='1px solid black', border_right='1px solid b…

In [8]:


df = pd.read_csv('data_classified.csv')
df.columns = ["id_user", "timestamp", "source", "message", "category"]

df['timestamp'] = pd.to_datetime(df['timestamp'], errors='raise')
df['date'] = df['timestamp'].dt.date

category_counts = df.groupby(['date', 'category']).size().unstack(fill_value=0)

categories = list(category_counts.columns)

def interactive_plot(target_category):
    fig = px.line(category_counts, x=category_counts.index, y=target_category, markers=True,
                  title=f"Daily Count of '{target_category}'")
    
    count_data = category_counts[target_category]
    mean_val = count_data.mean()
    std_val = count_data.std()
    threshold = mean_val + 2 * std_val
    
    spike_dates = count_data[count_data > threshold].index.tolist()
    spike_values = count_data[count_data > threshold].tolist()
    
    fig.add_scatter(x=spike_dates, y=spike_values, mode="markers",
                    marker=dict(color="red", size=10),
                    name="Spike (Mean + 2*Std)")
    fig.update_layout(xaxis_title="Date", yaxis_title="Number of Issues")
    fig.show()

interact(interactive_plot, target_category=widgets.Dropdown(options=categories, description="Category:"))

interactive(children=(Dropdown(description='Category:', options=('account_issues', 'bonus_issues', 'deposit_is…

<function __main__.interactive_plot(target_category)>