In [3]:
# pip install transformers

In [4]:
# pip install torch

In [5]:
from transformers import BertForQuestionAnswering
from transformers import BertTokenizer
import torch
from torch.optim import AdamW
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [7]:
model = BertForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')
tokenizer = BertTokenizer.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')

In [8]:
model.to(device)
model.train()

BertForQuestionAnswering(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 1024, padding_idx=0)
      (position_embeddings): Embedding(512, 1024)
      (token_type_embeddings): Embedding(2, 1024)
      (LayerNorm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-23): 24 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=1024, out_features=1024, bias=True)
              (key): Linear(in_features=1024, out_features=1024, bias=True)
              (value): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=1024, out_features=1024, bias=True)
              (LayerNorm): LayerNorm((1024,), ep

In [9]:
# pip install datasets


In [10]:
from datasets import load_dataset

# Load the SQuAD dataset
dataset = load_dataset("squad", "plain_text", split="train")


Found cached dataset squad (/Users/shashwatbindal/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453)


In [11]:
train_features = []

for example in dataset:
    start_positions = []
    end_positions = []

    context = example["context"]
    questions = example["question"]
    answers = example["answers"]
    answer_start=0
    answer_end=0
    tokenized_context = tokenizer.tokenize(context)
    tokenized_question = tokenizer.tokenize(questions)
    tokenized_input = ["[CLS]"] + tokenized_question + ["[SEP]"] + tokenized_context + ["[SEP]"]
    inputs = tokenizer.convert_tokens_to_ids(tokenized_input)
    char_to_token = []
    token_index = 1  # Skip the [CLS] token
    for char_index, char in enumerate(context):
            if char != " " and char != "\n":
                char_to_token.append(token_index)
            if token_index < len(tokenized_input) - 1 and char_index + 1 < len(tokenized_input[token_index]):
                token_index += 1

    for answer in answers["answer_start"]:
        answer_start = answer
        token_start = char_to_token[min(answer_start, len(char_to_token) - 1)]
        token_start = min(token_start, len(inputs) - 1)
        start_positions.append(token_start)



    for text in answers["text"]:
        answer_end = answer_start + len(text) - 1
        token_end = char_to_token[min(answer_end, len(char_to_token) - 1)]
        token_end = min(token_end, len(inputs) - 1)
        end_positions.append(token_end)

    


In [12]:
train_features.append(
            {
                "input_ids": inputs,
                "attention_mask": [1] * len(inputs),
                "start_positions": start_positions,
                "end_positions": end_positions,
            }
        )

In [14]:
# pip install wrapt


In [15]:
import torch
from transformers import BertForQuestionAnswering, AdamW
from torch.utils.data import DataLoader, RandomSampler
from transformers import AdamW
import torch.optim as optim

# Define the batch size and number of training epochs
batch_size = 4
num_epochs = 50

# Convert the train_features list to a PyTorch DataLoader
train_dataset = torch.utils.data.TensorDataset(
    torch.tensor([f["input_ids"] for f in train_features], dtype=torch.long),
    torch.tensor([f["attention_mask"] for f in train_features], dtype=torch.long),
    torch.tensor([f["start_positions"] for f in train_features], dtype=torch.long),
    torch.tensor([f["end_positions"] for f in train_features], dtype=torch.long),
)
train_sampler = RandomSampler(train_dataset)
train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=batch_size)

# Load the pre-trained BERT model for question answering
model = BertForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")

# Set the model to training mode
model.train()

# Define the optimizer and learning rate
# optimizer = AdamW(model.parameters(), lr=3e-6)
optimizer = optim.Adamax(model.parameters(), lr=3e-6)


In [16]:
for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    for batch in train_dataloader:
        batch = tuple(t.to(device) for t in batch)
        input_ids, attention_mask, start_positions, end_positions = batch
        
        optimizer.zero_grad()
        
        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            start_positions=start_positions,
            end_positions=end_positions
        )
        
        loss = outputs.loss
        loss.backward()
        optimizer.step()

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [17]:
save_path = "/Users/shashwatbindal/Fine_Tuned_model"

# Save the fine-tuned model
model.save_pretrained(save_path)

In [38]:
from transformers import BertForQuestionAnswering, BertTokenizer

# Load the fine-tuned BERT model
model = BertForQuestionAnswering.from_pretrained(save_path)
tokenizer = BertTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")

# Prepare the input
question = "what are dogs?"
context = """dogs are animals"""
inputs = tokenizer.encode_plus(question, context, add_special_tokens=True, return_tensors="pt")

# Perform inference
with torch.no_grad():
    outputs = model(**inputs)

start_logits = outputs.start_logits
end_logits = outputs.end_logits

# Process the logits to obtain the predicted answer span
start_index = torch.argmax(start_logits)
end_index = torch.argmax(end_logits)

# Decode the predicted answer span
all_tokens = tokenizer.convert_ids_to_tokens(inputs.input_ids[0])
answer = tokenizer.convert_tokens_to_string(all_tokens[start_index:end_index+1])

# Print the predicted answer
print("Question:", question)
print("Predicted Answer:", answer)


In [44]:
def bert_question_answer(question, passage, max_len=512):
    # Tokenize input question and passage
    # Add special tokens - [CLS] and [SEP]
    input_ids = tokenizer.encode(question, passage, max_length=max_len, truncation=True)

    # Getting number of tokens in 1st sentence (question) and 2nd sentence (passage that contains answer)
    sep_index = input_ids.index(102)
    len_question = sep_index + 1
    len_passage = len(input_ids) - len_question

    # Need to separate question and passage
    # Segment ids will be 0 for question and 1 for passage
    segment_ids = [0] * len_question + [1] * len_passage

    # Converting token ids to tokens
    tokens = tokenizer.convert_ids_to_tokens(input_ids)

    # Getting start and end scores for answer
    # Converting input arrays to torch tensors before passing to the model
    start_token_scores = model(torch.tensor([input_ids]), token_type_ids=torch.tensor([segment_ids]))[0]
    end_token_scores = model(torch.tensor([input_ids]), token_type_ids=torch.tensor([segment_ids]))[1]

    # Converting scores tensors to numpy arrays
    start_token_scores = start_token_scores.detach().numpy().flatten()
    end_token_scores = end_token_scores.detach().numpy().flatten()

    # Getting start and end index of answer based on highest scores
    answer_start_index = np.argmax(start_token_scores)
    answer_end_index = np.argmax(end_token_scores)

    # Getting scores for start and end token of the answer
    start_token_score = np.round(start_token_scores[answer_start_index], 2)
    end_token_score = np.round(end_token_scores[answer_end_index], 2)

    # Combining subwords starting with ## and get full words in output.
    # It is because tokenizer breaks words which are not in its vocab.
    answer = tokens[answer_start_index]
    for i in range(answer_start_index + 1, answer_end_index + 1):
        if tokens[i][0:2] == '##':
            answer += tokens[i][2:]
        else:
            answer += ' ' + tokens[i]

    # If the answer didn't find in the passage
    if (answer_start_index == 0) or (start_token_score < 0) or (answer == '[SEP]') or (answer_end_index < answer_start_index):
        answer = "Sorry!, I could not find an answer in the passage."

    return (answer_start_index, answer_end_index, start_token_score, end_token_score, answer)

def split_passage_and_process(question, passage, max_len=10000, chunk_size=500):
    # Split the passage into smaller chunks
    passage_chunks = [passage[i:i+chunk_size] for i in range(0, len(passage), chunk_size)]
    answers = []
    # Process each chunk separately
    for chunk in passage_chunks:
        answer = bert_question_answer(question, chunk, max_len=max_len)
        if answer[-1] != "Sorry!, I could not find an answer in the passage.":
            answers.append(answer[-1])

    # Combine the answers from all chunks
    combined_answer = " ".join(answers)
    return combined_answer

In [46]:
split_passage_and_process("dogs are animals","whata are dogs?")

'animals'

In [None]:
from transformers import BertForQuestionAnswering, BertTokenizer
from datasets import load_dataset
from torch.utils.data import DataLoader
import torch

# Load the fine-tuned BERT model
model = BertForQuestionAnswering.from_pretrained(save_path)
tokenizer = BertTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")

# Load the SQuAD validation dataset
datasetv = load_dataset("squad", "plain_text", split="validation[:500]")


Found cached dataset squad (/Users/shashwatbindal/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453)


In [None]:
from transformers import BertForQuestionAnswering, BertTokenizer

# Load the fine-tuned BERT model
model = BertForQuestionAnswering.from_pretrained(save_path)
tokenizer = BertTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")

# # Prepare the input

# inputs = tokenizer.encode_plus(question, context, add_special_tokens=True, return_tensors="pt")

# # Perform inference
# with torch.no_grad():
#     outputs = model(**inputs)

# start_logits = outputs.start_logits
# end_logits = outputs.end_logits

# # Process the logits to obtain the predicted answer span
# start_index = torch.argmax(start_logits)
# end_index = torch.argmax(end_logits)

# # Decode the predicted answer span
# all_tokens = tokenizer.convert_ids_to_tokens(inputs.input_ids[0])
# answer = tokenizer.convert_tokens_to_string(all_tokens[start_index:end_index+1])
def generate_predictions(dataset, tokenizer, model):
    predictions = []
    for example in dataset:
        inputs = tokenizer.encode_plus(example["question"], example["context"], add_special_tokens=True, return_tensors="pt")
        input_ids = inputs["input_ids"].to(model.device)
        attention_mask = inputs["attention_mask"].to(model.device)

        with torch.no_grad():
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            start_logits = outputs.start_logits
            end_logits = outputs.end_logits

        start_index = torch.argmax(start_logits)
        end_index = torch.argmax(end_logits)

        answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[0][start_index:end_index + 1]))
        predictions.append(answer)

    return predictions


In [None]:
# predictions = generate_predictions(datasetv, tokenizer, model)


In [None]:
predictions

['denver broncos',
 'carolina panthers',
 'san francisco bay area at santa clara , california',
 'denver broncos',
 'gold',
 'golden anniversary',
 'february 7 , 2016',
 'american football conference',
 'golden anniversary',
 'american football conference',
 'february 7 , 2016',
 'denver broncos',
 "levi ' s stadium",
 'santa clara , california',
 'super bowl l',
 '2015',
 '2016',
 'santa clara , california',
 "levi ' s stadium",
 '24 – 10',
 'february 7 , 2016',
 '2015',
 'denver broncos',
 'carolina panthers',
 'denver broncos',
 '2015',
 'denver broncos',
 "levi ' s stadium in the san francisco bay area at santa clara , california",
 'super bowl 50',
 'denver broncos',
 'cam newton',
 'eight',
 '1995',
 'arizona cardinals',
 'new england patriots',
 'arizona cardinals',
 'new england patriots',
 'new england patriots',
 'four',
 'cam newton',
 '15 – 1',
 'cam newton',
 '12 – 4 record',
 'four',
 'new england patriots',
 'cam newton',
 'arizona cardinals',
 'eight',
 'new england pat

In [None]:
# true_answers=[]
# for ans in datasetv:
#     true_answers.append(ans["answers"]["text"][0])
#     # print(ans["answers"]["text"])


In [47]:
# true_answers

In [48]:
# predictions[5:10],true_answers[5:10]

In [51]:
# from sklearn.metrics import f1_score

# def exact_match(pred, true):
#     return int(pred == true)

# def evaluate(predictions, true_answers):
#     em = 0  # Exact Match
#     f1 = 0  # F1 Score

#     for pred, true in zip(predictions, true_answers):
#         em += exact_match(pred, true)
#         f1 += f1_score([true], [pred], average="micro")

#     total_examples = len(predictions)
#     em /= total_examples
#     f1 /= total_examples

#     return em, f1



# em_score, f1_score = evaluate(predictions, true_answers)
# print(f"Exact Match: {em_score}")
# print(f"F1 Score: {f1_score}")

In [52]:
# def token_overlap_similarity(pred, true):
#     pred_tokens = set(pred.lower().split())
#     true_tokens = set(true.lower().split())
    
#     intersection = pred_tokens.intersection(true_tokens)
#     union = pred_tokens.union(true_tokens)
    
#     if len(union) == 0:
#         return 0.0
    
#     similarity = len(intersection) / len(union)
#     return similarity

# def evaluate(predictions, true_answers):
#     similarity_scores = []

#     for pred, true in zip(predictions, true_answers):
#         similarity = token_overlap_similarity(pred, true)
#         similarity_scores.append(similarity)

#     average_similarity = sum(similarity_scores) / len(similarity_scores)

#     return average_similarity



# similarity_score = evaluate(predictions, true_answers)
# print(f"Similarity Score: {similarity_score}")


In [53]:
import sounddevice as sd
import soundfile as sf

# Set the audio settings
sample_rate = 44100
duration = 5 # Duration in seconds
output_file = "audio.wav"

# Record audio from the laptop's microphone
# print("Recording audio...")
# audio = sd.rec(int(sample_rate * duration), samplerate=sample_rate, channels=1)
# sd.wait()

# # Save the audio to a file
# sf.write(output_file, audio, sample_rate)

# print(f"Audio saved to {output_file}")

In [54]:

import speech_recognition as sr

# Create a recognizer object
# r = sr.Recognizer()

# # Path to the audio file
# audio_file = "audio.wav"

# # Load the audio file
# with sr.AudioFile(audio_file) as source:
#     # Read the audio data
#     audio = r.record(source)

# try:
#     # Recognize speech from the audio
#     text = r.recognize_google(audio)
#     print("Transcription:", text)
# except sr.UnknownValueError:
#     print("Speech recognition could not understand audio")
# except sr.RequestError as e:
#     print("Could not request results from Google Speech Recognition service; {0}".format(e))


In [55]:
def askquestion(audio_file,output_file):
    print("Recording audio...")
    audio = sd.rec(int(sample_rate * duration), samplerate=sample_rate, channels=1)
    sd.wait()

    # Save the audio to a file
    sf.write(output_file, audio, sample_rate)

    print(f"Audio saved to {output_file}")
    r = sr.Recognizer()

    # Path to the audio file

    # Load the audio file
    with sr.AudioFile(audio_file) as source:
        # Read the audio data
        audio = r.record(source)
    text=""    

    try:
        # Recognize speech from the audio
        text = r.recognize_google(audio)
        print("Transcription:", text)
    except sr.UnknownValueError:
        print("Speech recognition could not understand audio")
    except sr.RequestError as e:
        print("Could not request results from Google Speech Recognition service; {0}".format(e))

    return text
    

In [56]:
from gtts import gTTS

def text_to_speech(text, output_file):
    # Create a gTTS object with the text and desired language
    tts = gTTS(text=text, lang='en')

    # Save the audio to a file
    tts.save(output_file)

# Example usage

output_file = "output.mp3"

text_to_speech(text, output_file)

In [57]:
import pygame

def play_mp3(file_path):
    pygame.mixer.init()
    pygame.mixer.music.load(file_path)
    pygame.mixer.music.play()

pygame 2.4.0 (SDL 2.26.4, Python 3.10.11)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [58]:
def Passageready():

    output_file2="passage.mp3"
    text_to_speech("Give the passage you want to ask questions from", output_file2)
    play_mp3(output_file2)
    passage=input("Enter the Passage: ")
    return passage


In [59]:
def QuestionAnswer(passage):
    
    output_file="Ask.mp3"
    text_to_speech("Ask the question", output_file)
    play_mp3(output_file)
    question=askquestion("question.wav","question.wav")
    ans  = split_passage_and_process( question, passage)
    output_file3="answer.mp3"
    text_to_speech("The Answer to the question....: "+question+"..."+"is....:"+ans, output_file3)
    play_mp3(output_file3)
    return ans


In [60]:
passage=Passageready()

In [61]:
ans=QuestionAnswer(passage)

Recording audio...
Audio saved to question.wav
Transcription: ask question what doing


In [62]:
ans

'c + +'