# Sentiment analysis

In [5]:
# import torch
# from transformers import BertTokenizer, BertForSequenceClassification

# import json

# # Load the data from JSON file
# with open("sentiment_analysis.json", "r") as f:
#     data = json.load(f)

# print(data.items)

# # Define the training data
# train_data = []
# sub_labels = {}
# for top_level_label, sub_level_labels in data.items():
#     for sub_level_label_data in sub_level_labels:
#         text = sub_level_label_data["text"]
#         sub_level_label = sub_level_label_data["sub_level_label"]
#         train_data.append((text, top_level_label, sub_level_label))
#         if top_level_label not in sub_labels:
#             sub_labels[top_level_label] = []
#         sub_labels[top_level_label].append(sub_level_label)



# # Define the mapping between top-level labels and integers
# top_level_label_map = {label: i for i, label in enumerate(set([data[1] for data in train_data]))}

# # Define the mapping between sub-level labels and integers
# sub_level_label_map = {sub_label: i for i, sub_label in enumerate(set([sub_label for sub_labels_list in sub_labels.values() for sub_label in sub_labels_list]))}

# # Convert the training data labels to integers using the label_map and sub_label_map
# # A tensor is a multi-dimensional array that looks like a numpy array, it's used for neural networks
# top_level_labels = torch.tensor([top_level_label_map[data[1]] for data in train_data])
# sub_level_labels = torch.tensor([sub_level_label_map[sub_label] for data in train_data for sub_label in sub_labels[data[1]]])

# # Load the pre-trained BERT model and tokenizer
# model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=len(top_level_label_map))
# tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# # Tokenize the training data and convert to tensors
# inputs = tokenizer.batch_encode_plus([data[0] for data in train_data], padding=True, truncation=True, return_tensors="pt")

# # Fine-tune the model on the training data
# optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)
# loss_fn = torch.nn.CrossEntropyLoss()
# for epoch in range(10):
#     optimizer.zero_grad()
#     outputs = model(inputs["input_ids"], attention_mask=inputs["attention_mask"], labels=top_level_labels)
#     loss = outputs.loss
#     loss.backward()
#     optimizer.step()
    

#     print(f"Epoch {epoch+1}, Loss: {loss.item()}")

#     # Evaluate the model on the training data
#     predictions = outputs.logits.argmax(axis=1)
#     accuracy = (predictions == top_level_labels).sum()


In [6]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification
import json

# Load the data from JSON file
with open("sentiment_analysis_v2.json", "r") as f:
    data = json.load(f)

# Define the training data
train_data = []
for datum in data:
    texts = datum["text"]
    label = datum["intent"]
    for text in texts:
        train_data.append((text, label))

# Define the mapping between top-level labels and integers
label_map = {label: i for i, label in enumerate(set([data[1] for data in train_data]))}

# Convert the training data labels to integers using the label_map
labels = torch.tensor([label_map[data[1]] for data in train_data])

# Load the pre-trained BERT model and tokenizer
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=len(label_map))
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Tokenize the training data and convert to tensors
inputs = tokenizer.batch_encode_plus([data[0] for data in train_data], padding=True, truncation=True, return_tensors="pt")

# Fine-tune the model on the training data
optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)
loss_fn = torch.nn.CrossEntropyLoss()
for epoch in range(10):
    optimizer.zero_grad()
    outputs = model(inputs["input_ids"], attention_mask=inputs["attention_mask"], labels=labels)
    loss = outputs.loss
    loss.backward()
    optimizer.step()

    print(f"Epoch {epoch+1}, Loss: {loss.item()}")

    # Evaluate the model on the training data
    predictions = outputs.logits.argmax(axis=1)
    accuracy = (predictions == labels).sum()



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch 1, Loss: 3.880333423614502
Epoch 2, Loss: 3.780472993850708
Epoch 3, Loss: 3.7233712673187256
Epoch 4, Loss: 3.644914150238037
Epoch 5, Loss: 3.5763819217681885
Epoch 6, Loss: 3.5722906589508057
Epoch 7, Loss: 3.4905059337615967
Epoch 8, Loss: 3.4626595973968506
Epoch 9, Loss: 3.412302017211914
Epoch 10, Loss: 3.344963788986206


In [7]:
# Sample test question
test_question = "Why can't I change my password?"

# Tokenize the test question and convert to tensors
inputs = tokenizer.encode_plus(test_question, padding=True, truncation=True, return_tensors="pt")

# Get the model's prediction for the test question
with torch.no_grad():
    outputs = model(inputs["input_ids"], attention_mask=inputs["attention_mask"])
predictions = outputs.logits.argmax(axis=1)
predicted_label = list(label_map.keys())[list(label_map.values()).index(predictions[0].item())]

print(f"Predicted intent: {predicted_label}")


Predicted intent: delete_personal_information


## Sentiment analysis query function

In [8]:
# Test the model

# def predict_intent(text):
#     top_level_inputs = tokenizer.encode_plus(text, padding=True, truncation=True, return_tensors="pt")
#     top_level_outputs = model(top_level_inputs["input_ids"], attention_mask=top_level_inputs["attention_mask"])
#     # top_level_predicted_labels = torch.argsort(top_level_outputs.logits, descending=True).tolist()[0]
#     top_level_predicted_label = torch.argmax(top_level_outputs.logits).item()
#     top_level_predicted_intent = [k for k, v in top_level_label_map.items() if v == top_level_predicted_label]
    
#     sub_level_inputs = tokenizer.encode_plus(text, padding=True, truncation=True, return_tensors="pt")
#     sub_level_outputs = model(sub_level_inputs["input_ids"], attention_mask=sub_level_inputs["attention_mask"])
#     sub_level_predicted_label = torch.argmax(sub_level_outputs.logits).item()
#     # sub_level_predicted_intents = [k for k, v in sub_level_label_map.items() if v in sub_level_predicted_labels]
#     sub_level_predicted_intent = [k for k, v in sub_level_label_map.items() if v == sub_level_predicted_label]
    
#     return top_level_predicted_intent[0] if top_level_predicted_intent else None, sub_level_predicted_intent[0] if sub_level_predicted_intent else None



# top_level_intents, sub_level_intents = predict_intent("How can i change my password")
# print(top_level_intents, sub_level_intents)


## GPT

In [9]:
# GPT model herel 
%load_ext autoreload
%autoreload 2

from gpt import GPT

gpt_model = GPT()

#gpt_model.answer_question(question='What is the most important thing I need to know about your privacy statement?')


  serie = serie.str.replace('\\n', ' ')


In [10]:
# GPT query function here


In [34]:
import gradio as gr
import time
import random

# load the pre-trained intent analysis model
# nlp = spacy.load("en_trf_bertbaseuncased_lg")

response_map = {
    ("security", "security_relating_to"): ["Our security measures include...", "We take security very seriously and have implemented..."],
    ("security", "security_concerns"): ["We understand your security concerns and have taken steps to address them.", "You can trust that your information is safe with us."],
    ("information", "information_about"): ["Our store offers a variety of products, including...", "We also have a rewards program that allows you to earn points on your purchases."],
    ("information", "information_schedule"): ["We are open from 9am to 10pm, 7 days a week.", "Our business hours are 9am to 5pm, Monday to Friday."],
    ("help", "help_with_finding"): ["Here are some hotels near the airport:...", "I can help you find a hotel that meets your needs."],
    ("help", "help_with_booking"): ["You can book a room on our website or by calling our reservation hotline.", "We also offer a loyalty program that gives you discounts on future bookings."],
    ("information", "ordering"): ["You can place an order on our website or by calling our order hotline.", "We also offer a loyalty program that gives you discounts on future orders."]
}

import gradio as gr

import gradio as gr

import gradio as gr

import gradio as gr

theme = gr.themes.Soft(
    primary_hue="orange",
    secondary_hue="orange",
).set(
    body_text_color_dark='*neutral_800',
    background_fill_primary_dark='*neutral_50',
    background_fill_secondary_dark='*neutral_50',
    border_color_accent_dark='*primary_300',
    border_color_primary_dark='*neutral_200',
    color_accent_soft_dark='*primary_50',
    link_text_color_dark='*secondary_600',
    link_text_color_active_dark='*secondary_600',
    link_text_color_hover_dark='*secondary_700',
    link_text_color_visited_dark='*secondary_500',
    block_background_fill='*neutral_100',
    block_background_fill_dark='*neutral_100',
    block_label_background_fill='*primary_400',
    block_label_background_fill_dark='*primary_400',
    block_label_text_color='*neutral_50',
    block_label_text_color_dark='*neutral_50',
    block_title_text_color='*neutral_50',
    block_title_text_color_dark='*neutral_50',
    checkbox_background_color_dark='*background_fill_primary',
    checkbox_background_color_selected='*primary_500',
    checkbox_background_color_selected_dark='*primary_500',
    checkbox_border_color_dark='*neutral_100',
    checkbox_border_color_focus='*primary_300',
    checkbox_border_color_focus_dark='*primary_300',
    checkbox_border_color_hover_dark='*neutral_300',
    checkbox_border_color_selected='*primary_500',
    checkbox_border_color_selected_dark='*primary_500',
    checkbox_border_width_dark='1px',
    checkbox_label_background_fill_selected_dark='*primary_500',
    checkbox_label_text_color_selected_dark='white',
    error_background_fill_dark='#fee2e2',
    error_border_color_dark='#fecaca',
    input_background_fill_dark='white',
    input_background_fill_focus_dark='*secondary_500',
    input_border_color_dark='*neutral_50',
    input_border_color_focus_dark='*secondary_300',
    input_placeholder_color_dark='*neutral_400',
    slider_color_dark='*primary_500',
    stat_background_fill_dark='*primary_300',
    table_border_color_dark='*neutral_300',
    table_even_background_fill_dark='white',
    table_odd_background_fill_dark='*neutral_50',
    button_primary_background_fill_dark='*primary_500',
    button_primary_background_fill_hover_dark='*primary_400',
    button_primary_border_color_dark='*primary_200',
    button_secondary_background_fill_dark='white',
    button_secondary_background_fill_hover_dark='*neutral_100',
    button_secondary_border_color_dark='*neutral_200',
    button_secondary_text_color_dark='*neutral_800'
)

with gr.Blocks(theme=theme, css="chat/chat.css") as demo:
    gr.Image("https://iyyu.com/_nuxt/img/navbar_logoW@2x.79eba99.png", interactive=False, tool="image", show_label=False, elem_classes="logo").style(width=200)
    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    clear = gr.Button("Clear")

    def user(user_message, history):
        return "", history + [[user_message, None]]

    def bot(history):
        user_message = history[-1][0]

        # intent = predict_intent(text=user_message)
        intent = "hello"
        print('intent:', intent)
        # Random choice randomly chooses one of the options that matches the intent
     
        # generating a response with GPT if the main intent was 'privacy_policy' or 'legal_statement'
        use_gpt = intent[0] == 'privacy_policy' or intent[0] == 'legal_statement'

        response = gpt_model.answer_question(question=user_message) if use_gpt else 'No idea, bitch'

        # response = random.choice(response_map[intent])
        history[-1][1] = response
        # The sleep is to simulate a more natural conversation
        time.sleep(1)
        return history

    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        bot, chatbot, chatbot
    )
    clear.click(lambda: None, None, chatbot, queue=False)

demo.launch()


Running on local URL:  http://127.0.0.1:7883

To create a public link, set `share=True` in `launch()`.




intent: hello
