In [1]:
!pip install transformers torch



[notice] A new release of pip is available: 24.1.1 -> 24.1.2
[notice] To update, run: C:\Users\sujat\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip




In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
import re

# Load model and tokenizer
model_name = "bert-large-uncased-whole-word-masking-finetuned-squad"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForQuestionAnswering.from_pretrained(model_name)

# Function to read the content of a text file
def read_text_file(file_path):
    with open(file_path, 'r', encoding='latin-1') as file:
        return file.read()

# Function to extract customer care numbers from the context
def extract_customer_care_numbers(context):
    numbers = re.findall(r'(\w+):\s*(\+?\d[\d\s-]{7,}\d)', context)
    return numbers

# Function to generate a response using the model
def generate_response(context, question):
    # Truncate the context to ensure the total length is within the model's limit
    max_length = 512
    inputs = tokenizer.encode_plus(question, context, add_special_tokens=True, return_tensors='pt', truncation=True, max_length=max_length)
    input_ids = inputs["input_ids"].tolist()[0]
    
    # Get the model's prediction
    outputs = model(**inputs)
    answer_start_scores = outputs.start_logits
    answer_end_scores = outputs.end_logits

    # Get the most likely beginning of answer with the argmax of the score
    answer_start = torch.argmax(answer_start_scores)
    # Get the most likely end of answer with the argmax of the score
    answer_end = torch.argmax(answer_end_scores) + 1
    
    # Combine the tokens in the answer and decode them
    answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))
    
    # If the answer is empty, provide customer care numbers
    if not answer.strip():
        numbers = extract_customer_care_numbers(context)
        if numbers:
            answer = "I couldn't find the answer to your question. Here are the customer care numbers:\n"
            for name, number in numbers:
                answer += f"{name}: {number}\n"
        else:
            answer = "I couldn't find the answer to your question and there are no customer care numbers available."
    
    return answer

# Load the text file content
text_file_path = 'responses.txt'  # Replace with your text file path
context = read_text_file(text_file_path)

# Main chatbot loop
def chatbot():
    print("Chatbot is ready to answer your questions. Type 'exit' to end.")
    while True:
        question = input("You: ")
        if question.lower() == 'exit':
            print("Chatbot: Goodbye!")
            break
        answer = generate_response(context, question)
        print(f"Chatbot: {answer}")

# Run the chatbot
chatbot()



Some weights of the model checkpoint at bert-large-uncased-whole-word-masking-finetuned-squad were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Chatbot is ready to answer your questions. Type 'exit' to end.


You:  What is the name of the Organisation?


Chatbot: intel corporation


You:  What will be your estimated market price in 5 years?


Chatbot: $ 150 billion


You:  exit


Chatbot: Goodbye!
