# Sentiment analysis

In [13]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification

import json

# Load the data from JSON file
with open("sentiment_analysis.json", "r") as f:
    data = json.load(f)

# Define the training data
train_data = []
sub_labels = {}
for top_level_label, sub_level_labels in data.items():
    for sub_level_label_data in sub_level_labels:
        text = sub_level_label_data["text"]
        sub_level_label = sub_level_label_data["sub_level_label"]
        train_data.append((text, top_level_label, sub_level_label))
        if top_level_label not in sub_labels:
            sub_labels[top_level_label] = []
        sub_labels[top_level_label].append(sub_level_label)

# Define the mapping between top-level labels and integers
top_level_label_map = {label: i for i, label in enumerate(set([data[1] for data in train_data]))}

# Define the mapping between sub-level labels and integers
sub_level_label_map = {sub_label: i for i, sub_label in enumerate(set([sub_label for sub_labels_list in sub_labels.values() for sub_label in sub_labels_list]))}

# Convert the training data labels to integers using the label_map and sub_label_map
# A tensor is a multi-dimensional array that looks like a numpy array, it's used for neural networks
top_level_labels = torch.tensor([top_level_label_map[data[1]] for data in train_data])
sub_level_labels = torch.tensor([sub_level_label_map[sub_label] for data in train_data for sub_label in sub_labels[data[1]]])

# Load the pre-trained BERT model and tokenizer
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=len(top_level_label_map))
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Tokenize the training data and convert to tensors
inputs = tokenizer.batch_encode_plus([data[0] for data in train_data], padding=True, truncation=True, return_tensors="pt")

# Fine-tune the model on the training data
optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)
loss_fn = torch.nn.CrossEntropyLoss()
for epoch in range(10):
    optimizer.zero_grad()
    outputs = model(inputs["input_ids"], attention_mask=inputs["attention_mask"], labels=top_level_labels)
    loss = outputs.loss
    loss.backward()
    optimizer.step()
    

    print(f"Epoch {epoch+1}, Loss: {loss.item()}")

    # Evaluate the model on the training data
    predictions = outputs.logits.argmax(axis=1)
    accuracy = (predictions == top_level_labels).sum()


Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Downloading pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Epoch 1, Loss: 1.461476445198059
Epoch 2, Loss: 1.285217046737671
Epoch 3, Loss: 1.1575936079025269
Epoch 4, Loss: 1.0369764566421509
Epoch 5, Loss: 0.9474220275878906
Epoch 6, Loss: 0.8697894811630249
Epoch 7, Loss: 0.8198726773262024
Epoch 8, Loss: 0.7558162212371826
Epoch 9, Loss: 0.7613324522972107
Epoch 10, Loss: 0.7010558843612671


## Sentiment analysis query function

In [14]:
# Test the model
text = "Where can I change my account password?"

def predict_intent(text):
    top_level_inputs = tokenizer.encode_plus(text, padding=True, truncation=True, return_tensors="pt")
    top_level_outputs = model(top_level_inputs["input_ids"], attention_mask=top_level_inputs["attention_mask"])
    top_level_predicted_labels = torch.argsort(top_level_outputs.logits, descending=True).tolist()[0]
    top_level_predicted_intents = [k for k, v in top_level_label_map.items() if v in top_level_predicted_labels]
    
    sub_level_inputs = tokenizer.encode_plus(text, padding=True, truncation=True, return_tensors="pt")
    sub_level_outputs = model(sub_level_inputs["input_ids"], attention_mask=sub_level_inputs["attention_mask"])
    sub_level_predicted_labels = torch.argsort(sub_level_outputs.logits, descending=True).tolist()[0]
    sub_level_predicted_intents = [k for k, v in sub_level_label_map.items() if v in sub_level_predicted_labels]
    
    return top_level_predicted_intents, sub_level_predicted_intents



top_level_intents, sub_level_intents = predict_intent(text)
print(top_level_intents, sub_level_intents)


['platform_settings', 'privacy_policy', 'external_platform_settings', 'legal_statement'] ['question_answering', 'summarization']


## GPT

In [10]:
# GPT model herel 
%load_ext autoreload
%autoreload 2

from gpt import GPT

model = GPT()

model.answer_question(question='What is the most important thing I need to know about your privacy statement?')


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


  serie = serie.str.replace('\\n', ' ')


'We are committed to securing your personal information, but we cannot guarantee that unauthorized third parties will never be able to access it. You have the right to ask for an overview of the information we have about you, correct or delete certain data, transfer some of this information to other organizations, withdraw your consent, and object to and restrict certain processing of your information.'

In [None]:
# GPT query function here


In [15]:
import gradio as gr
import time
import random

# load the pre-trained intent analysis model
# nlp = spacy.load("en_trf_bertbaseuncased_lg")

response_map = {
    ("security", "security_relating_to"): ["Our security measures include...", "We take security very seriously and have implemented..."],
    ("security", "security_concerns"): ["We understand your security concerns and have taken steps to address them.", "You can trust that your information is safe with us."],
    ("ordering", "ordering_type_of_food"): ["Our menu features a variety of Italian dishes, including pizza and pasta.", "We also offer a selection of salads and appetizers."],
    ("ordering", "ordering_delivery"): ["You can place a delivery order on our website or by calling our delivery hotline.", "Delivery is available within a 10-mile radius of our store."],
    ("information", "information_about"): ["Our store offers a variety of products, including...", "We also have a rewards program that allows you to earn points on your purchases."],
    ("information", "information_schedule"): ["We are open from 9am to 10pm, 7 days a week.", "Our business hours are 9am to 5pm, Monday to Friday."],
    ("help", "help_with_finding"): ["Here are some hotels near the airport:...", "I can help you find a hotel that meets your needs."],
    ("help", "help_with_booking"): ["You can book a room on our website or by calling our reservation hotline.", "We also offer a loyalty program that gives you discounts on future bookings."],
    ("information", "ordering"): ["You can place an order on our website or by calling our order hotline.", "We also offer a loyalty program that gives you discounts on future orders."]
}

# Example usage
intent = "information"
sub_intent = "ordering_delivery"

try:
    response = random.choice(response_map[(intent, sub_intent)])
except KeyError:
    response = "I'm sorry, I don't have a response for that."

print(response)


with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    clear = gr.Button("Clear")

    def user(user_message, history):
        return "", history + [[user_message, None]]

    def bot(history):
        user_message = history[-1][0]
        predicted_intent = predict_intent(user_message)
        # Random choice randomly chooses one of the options that matches the intent
     
        response = random.choice(response_map[predicted_intent])
        history[-1][1] = response
        # The sleep is to simulate a more natural conversation
        time.sleep(1)
        return history



    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        bot, chatbot, chatbot
    )
    clear.click(lambda: None, None, chatbot, queue=False)

demo.launch()


I'm sorry, I don't have a response for that.


AttributeError: 'dict' object has no attribute 'then'