In [16]:
import json
import pandas as pd
import dotenv
from tqdm import tqdm
from langchain_openai import ChatOpenAI
from chatsky_llm_autoconfig.graph import Graph
from chatsky_llm_autoconfig.dialogue import Dialogue
import chatsky_llm_autoconfig.metrics.automatic_metrics as auto_metrics
import chatsky_llm_autoconfig.metrics.llm_metrics as llm_metrics
from chatsky_llm_autoconfig.algorithms.dialogue_generation import DialoguePathSampler, DialogueSampler

dotenv.load_dotenv()

True

In [17]:
with open('data.json', encoding="utf-8") as f:
    data = json.load(f)
print(len(data))

10


The scheme is so:

```json
[
    {
        "topic": str,
        "graph": dict,
        "dialogues": [
            {
                "messages": list,
                "topic": str
            }
        ]
    }
]
```

In [18]:
model = ChatOpenAI(name="gpt-4o-mini")

In [15]:
for case in tqdm(data):
    print(case['dialogues'])
    # dialogues=[Dialogue(messages=x['messages']) for x in case['dialogues']]

100%|██████████| 10/10 [00:00<00:00, 12066.47it/s]

[{'messages': [{'text': 'Hello! This is tech support. How can I help you today?', 'participant': 'assistant'}, {'text': 'The website loading takes too long', 'participant': 'user'}, {'text': 'Try to reload your device.', 'participant': 'assistant'}, {'text': 'Sure, I will try that', 'participant': 'user'}, {'text': 'Has this resolved the issue for you?', 'participant': 'assistant'}, {'text': 'Yes, that fixed everything', 'participant': 'user'}, {'text': 'Thank you for contacting us, have a nice day.', 'participant': 'assistant'}], 'topic': ''}, {'messages': [{'text': 'Hello! This is tech support. How can I help you today?', 'participant': 'assistant'}, {'text': 'The website loading takes too long', 'participant': 'user'}, {'text': 'Try to reload your device.', 'participant': 'assistant'}, {'text': 'Sure, I will try that', 'participant': 'user'}, {'text': 'Has this resolved the issue for you?', 'participant': 'assistant'}, {'text': 'No, it did not work ', 'participant': 'user'}, {'text'




In [23]:
results = []
for case in tqdm(data):
    case_results = {"topic": case["topic"]}
    triplets = llm_metrics.are_triplets_valid(G=Graph(case['graph']), model=model)
    case_results["are_triplets_valid"] = triplets['value']
    case_results['are_triplets_valid_details'] = triplets['description']
    themes = llm_metrics.is_theme_valid(G=Graph(case['graph']), topic=case['topic'], model=model)
    case_results['is_theme_valid'] = themes['value']
    case_results['is_theme_valid_details'] = themes['description']
    case_results['all_utterances_present'] = auto_metrics.all_utterances_present(G=Graph(case['graph']), dialogues=[Dialogue(messages=x['messages']) for x in case['dialogues']])

    results.append(case_results)

df = pd.DataFrame(results)
print(df)
df.to_csv('results.csv')

  0%|          | 0/10 [00:00<?, ?it/s]INFO:httpx:HTTP Request: POST https://api.vsegpt.ru/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.vsegpt.ru/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.vsegpt.ru/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.vsegpt.ru/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.vsegpt.ru/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.vsegpt.ru/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.vsegpt.ru/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.vsegpt.ru/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.vsegpt.ru/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.vsegpt.ru/v1/chat/completions "HTTP/1.1 200 OK"
 10%|█         | 1/10 [00:18<02:43, 18.20s/it]INFO:httpx:HTTP 

                             topic  are_triplets_valid  \
0                     Tech Support                True   
1                    food delivery                True   
2      Medical Appointment Booking                True   
3                          library                True   
4                        auto care                True   
5             booking a hotel room                True   
6                abstract purchase                True   
7  chatting with a smart assistant                True   
8          taking a loan in a bank                True   
9                      coffee shop                True   

   are_triplets_valid_details  is_theme_valid  \
0  All transitions are valid.           False   
1  All transitions are valid.            True   
2  All transitions are valid.            True   
3  All transitions are valid.            True   
4  All transitions are valid.            True   
5  All transitions are valid.           False   
6  All transitions




In [24]:
print(f"are_triplets_valid: {df['are_triplets_valid'].mean()}")
print(f"is_theme_valid: {df['is_theme_valid'].mean()}")
print(f"all_utterances_present: {df['all_utterances_present'].mean()}")

are_triplets_valid: 1.0
is_theme_valid: 0.7
all_utterances_present: 0.8
