## Imports

In [None]:
# General imports
import time
import json
import random
import os

# Intent recognition imports
import torch
from transformers import BertTokenizer, BertForSequenceClassification

# ChatGPT imports
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("..")
from gpt import GPT
from intent_model import IntentModel

## Load data

In [None]:
# Load the data from JSON file
with open("../intent_recognition_v2.json", "r") as f:
    data = json.load(f)

validation_data = []
train_data = []
for datum in data:
    texts = datum["train_questions"]
    label = datum["intent"]
    validation = datum["test_questions"]
    for text in texts:
        train_data.append((text, label))
    for text in validation:
        validation_data.append((text, label))

# Define the mapping between top-level labels and integers
train_label_map = {label: i for i, label in enumerate(set([data[1] for data in train_data]))}
validation_label_map = {label: i for i, label in enumerate(set([data[1] for data in validation_data]))} 

# Convert the training data labels to integers using the label_map
train_labels = torch.tensor([train_label_map[data[1]] for data in train_data])
validation_labels = torch.tensor([validation_label_map[data[1]] for data in validation_data])
    

## Load models

In [None]:
intent_model = IntentModel(train_data, validation_data, train_label_map, train_labels, validation_label_map, validation_labels)

## Check data completeness

In [None]:
from collections import defaultdict

# Your data is already a list of dictionaries
categories = defaultdict(list)

for item in data:
    intent_text = item['intent']
    if not item['use_gpt'] and not item['responses']:

        intent_text += ' (Needs answers)'
    categories[item['category']].append(intent_text)

for category, intents in categories.items():
    # if(category == 'privacy_policy' or category ==  'legal_statement'):
    #     continue

    print(f'Category: {category}')
    for intent in intents:
        print(f'  Intent: {intent}')
    print('\n')

## Get evaluation data

In [None]:
evaluation_file = "./evaluation_results.json"

if os.path.isfile(evaluation_file):
    with open(evaluation_file, "r") as file:
        accuracies = json.load(file)
else:
    accuracies = intent_model.evaluate_model(data)

## Check data correctness

In [None]:
print("total correctness", accuracies["total_correctness"])

# "total_correct_counts": 608,
# "total_counts": 663,

for evaluation in accuracies["results"]:
    if (evaluation["total_correctness"] == 100):
        continue

    print(f"Intent: {evaluation['correct_intent']}")
    print(f"Train question results: {evaluation['train_questions_results']}")

    test_results = evaluation.get('test_questions_results')
    if test_results:
        print(f"Test question results: {test_results}")
        
    advanced_results = evaluation.get('test_questions_advanced_results')
    if advanced_results:
        print(f"Advanced test question results: {advanced_results}")
    
    print(f"Total correctness for all questions: {evaluation['total_correctness']}%")
    print("\n")
