## Imports

In [1]:
# General imports
import time
import json
import random
import os

# Intent recognition imports
import torch
from transformers import BertTokenizer, BertForSequenceClassification

# ChatGPT imports
%load_ext autoreload
%autoreload 2
from gpt import GPT
from settings import Settings
from intent_model import IntentModel

# Chat demo
import gradio as gr

  from .autonotebook import tqdm as notebook_tqdm


# Load data 

In [2]:
# Load the data from JSON file
with open("intent_recognition_v2.json", "r") as f:
    data = json.load(f)

validation_data = []
train_data = []
for datum in data:
    texts = datum["train_questions"]
    label = datum["intent"]
    validation = datum["test_questions"]
    for text in texts:
        train_data.append((text, label))
    for text in validation:
        validation_data.append((text, label))

# Define the mapping between top-level labels and integers
train_label_map = {label: i for i, label in enumerate(set([data[1] for data in train_data]))}
validation_label_map = {label: i for i, label in enumerate(set([data[1] for data in validation_data]))} 

# Convert the training data labels to integers using the label_map
train_labels = torch.tensor([train_label_map[data[1]] for data in train_data])
validation_labels = torch.tensor([validation_label_map[data[1]] for data in validation_data])
    

# Intent recognition model

In [3]:
intent_model = IntentModel(train_data, validation_data, train_label_map, train_labels, validation_label_map, validation_labels)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch 1, Training Loss: 3.117727041244507, Validation Loss: 3.0433967113494873, Accuracy: 5
Epoch 2, Training Loss: 3.0595016479492188, Validation Loss: 3.170370578765869, Accuracy: 0
Epoch 3, Training Loss: 3.031751871109009, Validation Loss: 3.1771719455718994, Accuracy: 0
Epoch 4, Training Loss: 3.029236316680908, Validation Loss: 2.7707626819610596, Accuracy: 11
Epoch 5, Training Loss: 3.079627275466919, Validation Loss: 2.9536750316619873, Accuracy: 0
Epoch 6, Training Loss: 3.0354180335998535, Validation Loss: 2.907620906829834, Accuracy: 0
Epoch 7, Training Loss: 3.0357699394226074, Validation Loss: 2.8966922760009766, Accuracy: 0
Epoch 8, Training Loss: 3.0091331005096436, Validation Loss: 2.924370527267456, Accuracy: 0
Epoch 9, Training Loss: 2.9524598121643066, Validation Loss: 3.1711690425872803, Accuracy: 0
Epoch 10, Training Loss: 2.6657209396362305, Validation Loss: 2.804082155227661, Accuracy: 0
Epoch 11, Training Loss: 3.008416175842285, Validation Loss: 3.2723648548126

In [5]:
intent_model = IntentModel(train_data, validation_data, train_label_map, train_labels, validation_label_map, validation_labels).test_best_model()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch 1, Training Loss: 3.117727041244507, Validation Loss: 2.9810636043548584, Accuracy: 0


KeyboardInterrupt: 

In [4]:
import pandas as pd

# test the intent model
predicted_intent, confidence_scores = intent_model.get_intent("Can you give me a summary of the privacy policy?")

# Retrieve the intent label and its corresponding confidence score
highest_confidence_intent = predicted_intent

print("Highest confidence intent:", highest_confidence_intent)
print("Highest confidence score:", confidence_scores)
print(len(confidence_scores))
print(len(intent_model.train_label_map.keys()))

pd.DataFrame({
    'keys': intent_model.train_label_map.keys(),
    'confidences': confidence_scores
})

Highest confidence intent: privacy_policy_updates
Highest confidence score: [0.008940329775214195, 0.10988102108240128, 0.07036087661981583, 0.07391127198934555, 0.01764173060655594, 0.005276218522340059, 0.008469056338071823, 0.0872868150472641, 0.07654278725385666, 0.13838869333267212, 0.008889921009540558, 0.1184031292796135, 0.008590993471443653, 0.010456804186105728, 0.00536392442882061, 0.06878825277090073, 0.05223017558455467, 0.08321867883205414, 0.011111143045127392, 0.005497409496456385, 0.030750731006264687]
21
21


Unnamed: 0,keys,confidences
0,refunds_legal_statement,0.00894
1,access_personal_data,0.109881
2,data_breaches,0.070361
3,summarization_privacy_policy,0.073911
4,summarization_legal_statement,0.017642
5,hello_response,0.005276
6,transfer_legal_statement,0.008469
7,data_collection,0.087287
8,data_sharing_opt_out,0.076543
9,privacy_policy_updates,0.138389


## ChatGPT model

In [9]:
# GPT model here
gpt_model = GPT()

# gpt_model = False

# Test ChatGPT model
# gpt_model.answer_question(question='What is the most important thing I need to know about your privacy statement?')


KeyboardInterrupt: 

# Load Settings class

In [None]:
settings_model = Settings()

# Test the settings model
# settings_model.update_user_setting("1", "profile_published", False)
settings_model.set_user_calling_card_visibility(user_id = "0", individual_user_id = "2", profile_card_component = "profile_card_component_3_visible", component_state = True)



## Get response using intent

In [None]:
def get_object_by_intent(intent):
    for object in data:
        if object['intent'] == intent:
            return object
    return None

# Set up gradio chat for demo

In [None]:
theme = gr.themes.Soft(
    primary_hue="orange",
    secondary_hue="orange",
).set(
    body_text_color_dark='*neutral_800',
    background_fill_primary_dark='*neutral_50',
    background_fill_secondary_dark='*neutral_50',
    border_color_accent_dark='*primary_300',
    border_color_primary_dark='*neutral_200',
    color_accent_soft_dark='*primary_50',
    link_text_color_dark='*secondary_600',
    link_text_color_active_dark='*secondary_600',
    link_text_color_hover_dark='*secondary_700',
    link_text_color_visited_dark='*secondary_500',
    block_background_fill='*neutral_100',
    block_background_fill_dark='*neutral_100',
    block_label_background_fill='*primary_400',
    block_label_background_fill_dark='*primary_400',
    block_label_text_color='*neutral_50',
    block_label_text_color_dark='*neutral_50',
    block_title_text_color='*neutral_50',
    block_title_text_color_dark='*neutral_50',
    checkbox_background_color_dark='*background_fill_primary',
    checkbox_background_color_selected='*primary_500',
    checkbox_background_color_selected_dark='*primary_500',
    checkbox_border_color_dark='*neutral_100',
    checkbox_border_color_focus='*primary_300',
    checkbox_border_color_focus_dark='*primary_300',
    checkbox_border_color_hover_dark='*neutral_300',
    checkbox_border_color_selected='*primary_500',
    checkbox_border_color_selected_dark='*primary_500',
    checkbox_border_width_dark='1px',
    checkbox_label_background_fill_selected_dark='*primary_500',
    checkbox_label_text_color_selected_dark='white',
    error_background_fill_dark='#fee2e2',
    error_border_color_dark='#fecaca',
    input_background_fill_dark='white',
    input_background_fill_focus_dark='*secondary_500',
    input_border_color_dark='*neutral_50',
    input_border_color_focus_dark='*secondary_300',
    input_placeholder_color_dark='*neutral_400',
    slider_color_dark='*primary_500',
    stat_background_fill_dark='*primary_300',
    table_border_color_dark='*neutral_300',
    table_even_background_fill_dark='white',
    table_odd_background_fill_dark='*neutral_50',
    button_primary_background_fill_dark='*primary_500',
    button_primary_background_fill_hover_dark='*primary_400',
    button_primary_border_color_dark='*primary_200',
    button_secondary_background_fill_dark='white',
    button_secondary_background_fill_hover_dark='*neutral_100',
    button_secondary_border_color_dark='*neutral_200',
    button_secondary_text_color_dark='*neutral_800'
)

with gr.Blocks(theme=theme, css="chat/chat.css") as demo:
    gr.Image("https://iyyu.com/_nuxt/img/navbar_logoW@2x.79eba99.png", interactive=False,
             tool="image", show_label=False, elem_classes="logo").style(width=200)
    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    submit = gr.Button("Submit")

    def user(user_message, history):
        return "", history + [[user_message, None]]

    def bot(history):
        user_message = history[-1][0]

        intent, confidence_score = intent_model.get_intent(question=user_message)
        print('intent:', intent)
        print('confidence:', confidence_score)

        confidence_score = max(confidence_score)

        intent = get_object_by_intent(intent)


        # generating a response with GPT if the main intent was 'privacy_policy' or 'legal_statement'
        use_gpt = intent['use_gpt']
        generate_context = True

        # if the intent is 'privacy_policy' or 'legal_statement', use GPT to generate a response
        response = ('(ChatGPT - intent found: {}): \n{}'.format(intent['intent'], gpt_model.answer_question(question=user_message))
                    if use_gpt else
                    '<em><strong>(intent found ({}): {}):</strong></em> \n{}'.format(confidence_score, intent['intent'], intent['responses'][0]))
        # response = '(ChatGPT - intent found: {}): \n{}'.format(intent['intent'], gpt_model.answer_question(question=user_message)) if use_gpt else '(intent found: {}): \n{}'.format(intent['intent'], intent['responses'][0])

        # response = random.choice(response_map[intent])
        history[-1][1] = response
        # The sleep is to simulate a more natural conversation
        if not use_gpt:
            time.sleep(1)
        return history

    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        bot, chatbot, chatbot
    )
    submit.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        bot, chatbot, chatbot
    )

demo.launch()


Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


