## Imports

In [109]:
# General imports
import time
import json
import random
import os

# Intent recognition imports
import torch
from transformers import BertTokenizer, BertForSequenceClassification

# ChatGPT imports
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("..")
from gpt import GPT
from intent_model import IntentModel

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Load data

In [110]:
# Load the data from JSON file
with open("../intent_recognition.json", "r") as f:
    data = json.load(f)

train_data = []
for datum in data:
    texts = datum["train_questions"]
    label = datum["intent"]
    for text in texts:
        train_data.append((text, label))

# Define the mapping between top-level labels and integers
label_map = {label: i for i, label in enumerate(set([data[1] for data in train_data]))}

# Convert the training data labels to integers using the label_map
labels = torch.tensor([label_map[data[1]] for data in train_data])
    

## Load models

In [111]:
intent_model = IntentModel(train_data, label_map, labels)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch 1, Loss: 4.0627923011779785
Epoch 2, Loss: 4.076840400695801
Epoch 3, Loss: 4.011010646820068
Epoch 4, Loss: 3.991832971572876
Epoch 5, Loss: 3.96270751953125
Epoch 6, Loss: 3.976637125015259
Epoch 7, Loss: 3.8393263816833496
Epoch 8, Loss: 3.820101022720337
Epoch 9, Loss: 3.8908259868621826
Epoch 10, Loss: 3.456650495529175
Epoch 11, Loss: 3.200932264328003
Epoch 12, Loss: 2.9399046897888184
Epoch 13, Loss: 2.7963833808898926
Epoch 14, Loss: 2.3329925537109375
Epoch 15, Loss: 2.121279001235962
Epoch 16, Loss: 1.878674864768982
Epoch 17, Loss: 1.6221086978912354
Epoch 18, Loss: 1.3446102142333984
Epoch 19, Loss: 1.153952956199646
Epoch 20, Loss: 0.9768708944320679
Epoch 21, Loss: 0.811667799949646
Epoch 22, Loss: 0.6792150735855103
Epoch 23, Loss: 0.5603086352348328
Epoch 24, Loss: 0.4657444953918457
Epoch 25, Loss: 0.38105008006095886


## Check data completeness

In [112]:
from collections import defaultdict

# Your data is already a list of dictionaries
categories = defaultdict(list)

for item in data:
    intent_text = item['intent']
    if not item['use_gpt'] and not item['responses']:

        intent_text += ' (Needs answers)'
    categories[item['category']].append(intent_text)

for category, intents in categories.items():
    if(category == 'privacy_policy' or category ==  'legal_statement'):
        continue

    print(f'Category: {category}')
    for intent in intents:
        print(f'  Intent: {intent}')
    print('\n')

Category: platform_settings
  Intent: location_password_change
  Intent: reset_password_change
  Intent: update_password_change
  Intent: problem_update_password_change


Category: My_visibility_settings
  Intent: visibility_publish_profile_status
  Intent: visibility_publish_profile_how_to


Category: system notifications
  Intent: change_system_critical_notification_setting
  Intent: change_iYYU_news_notification_setting
  Intent: change_update_notification_setting


Category: connect
  Intent: change_note_follower_notification_setting
  Intent: change_connection_response_notification_setting
  Intent: change_connection_request_notification_setting


Category: interact
  Intent: change_followed_user_note_notification_setting
  Intent: change_message_notification_preference


Category: generic
  Intent: yes_response
  Intent: no_response




## Get evaluation data

In [113]:
evaluation_file = "./evaluation_results.json"

if os.path.isfile(evaluation_file):
    with open(evaluation_file, "r") as file:
        accuracies = json.load(file)
else:
    accuracies = intent_model.evaluate_model(data)

## Check data correctness

In [115]:
print("total correctness", accuracies["total_correctness"])

# "total_correct_counts": 608,
# "total_counts": 663,

for evaluation in accuracies["results"]:
    if (evaluation["total_correctness"] == 100):
        continue

    print(f"Intent: {evaluation['correct_intent']}")
    print(f"Train question results: {evaluation['train_questions_results']}")

    test_results = evaluation.get('test_questions_results')
    if test_results:
        print(f"Test question results: {test_results}")
        
    advanced_results = evaluation.get('test_questions_advanced_results')
    if advanced_results:
        print(f"Advanced test question results: {advanced_results}")
    
    print(f"Total correctness for all questions: {evaluation['total_correctness']}%")
    print("\n")


total correctness 86.58
Intent: summarization_privacy
Train question results: 5 out of 6 correct
Total correctness for all questions: 83.33%


Intent: third_parties
Train question results: 0 out of 6 correct
Total correctness for all questions: 0.0%


Intent: personal_information
Train question results: 3 out of 10 correct
Total correctness for all questions: 30.0%


Intent: personal_information_rights
Train question results: 0 out of 10 correct
Total correctness for all questions: 0.0%


Intent: terms_of_service_legal_statement
Train question results: 4 out of 10 correct
Total correctness for all questions: 40.0%


Intent: use_restrictions_legal_statement
Train question results: 9 out of 10 correct
Total correctness for all questions: 90.0%


Intent: location_password_change
Train question results: 1 out of 10 correct
Total correctness for all questions: 10.0%


Intent: reset_password_change
Train question results: 3 out of 10 correct
Total correctness for all questions: 30.0%


Inten