In [None]:
pip install transformers

Collecting transformers
  Downloading transformers-4.30.1-py3-none-any.whl (7.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m17.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting numpy>=1.17
  Downloading numpy-1.24.3-cp310-cp310-macosx_11_0_arm64.whl (13.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.9/13.9 MB[0m [31m29.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.3-cp310-cp310-macosx_12_0_arm64.whl (3.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.9/3.9 MB[0m [31m24.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting regex!=2019.12.17
  Downloading regex-2023.6.3-cp310-cp310-macosx_11_0_arm64.whl (288 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m289.0/289.0 kB[0m [31m16.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting huggingface-hub<1.0,>=0.14.1
  Using cached 

In [None]:
pip install torch

Collecting torch
  Downloading torch-2.0.1-cp310-none-macosx_11_0_arm64.whl (55.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.8/55.8 MB[0m [31m25.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting sympy
  Using cached sympy-1.12-py3-none-any.whl (5.7 MB)
Collecting networkx
  Using cached networkx-3.1-py3-none-any.whl (2.1 MB)
Collecting mpmath>=0.19
  Using cached mpmath-1.3.0-py3-none-any.whl (536 kB)
Installing collected packages: mpmath, sympy, networkx, torch
Successfully installed mpmath-1.3.0 networkx-3.1 sympy-1.12 torch-2.0.1
Note: you may need to restart the kernel to use updated packages.


In [12]:
from transformers import BertForQuestionAnswering
from transformers import BertTokenizer
import torch

import numpy as np




In [13]:
model = BertForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')

tokenizer_for_bert = BertTokenizer.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')


In [14]:
def bert_question_answer(question, passage, max_len=500):
    #Tokenize input question and passage 
    #Add special tokens - [CLS] and [SEP]
    input_ids = tokenizer_for_bert.encode (question, passage,  max_length= max_len, truncation=True)  
    
    #Getting number of tokens in 1st sentence (question) and 2nd sentence (passage that contains answer)
    sep_index = input_ids.index(102) 
    len_question = sep_index + 1   
    len_passage = len(input_ids)- len_question  
    
    #Need to separate question and passage
    #Segment ids will be 0 for question and 1 for passage
    segment_ids =  [0]*len_question + [1]*(len_passage)  
    
    #Converting token ids to tokens
    tokens = tokenizer_for_bert.convert_ids_to_tokens(input_ids) 
   
    #Getting start and end scores for answer
    #Converting input arrays to torch tensors before passing to the model
    start_token_scores = model(torch.tensor([input_ids]), token_type_ids=torch.tensor([segment_ids]) )[0]
    end_token_scores = model(torch.tensor([input_ids]), token_type_ids=torch.tensor([segment_ids]) )[1]
   
    #Converting scores tensors to numpy arrays
    start_token_scores = start_token_scores.detach().numpy().flatten()
    end_token_scores = end_token_scores.detach().numpy().flatten()
    
    #Getting start and end index of answer based on highest scores
    answer_start_index = np.argmax(start_token_scores)
    answer_end_index = np.argmax(end_token_scores)
    
    #Getting scores for start and end token of the answer
    start_token_score = np.round(start_token_scores[answer_start_index], 2)
    end_token_score = np.round(end_token_scores[answer_end_index], 2)
   
    #Combining subwords starting with ## and get full words in output. 
    #It is because tokenizer breaks words which are not in its vocab.
    answer = tokens[answer_start_index] 
    for i in range(answer_start_index + 1, answer_end_index + 1):
        if tokens[i][0:2] == '##':  
            answer += tokens[i][2:] 
        else:
            answer += ' ' + tokens[i]  

    # If the answer didn't find in the passage
    if ( answer_start_index == 0) or (start_token_score < 0 ) or  (answer == '[SEP]') or ( answer_end_index <  answer_start_index):
        answer = "Sorry!, I could not find an answer in the passage."
    
    return (answer_start_index, answer_end_index, start_token_score, end_token_score,  answer)

#Testing function
bert_question_answer("What is the name of YouTube Channel", "Watch complete playlist of Natural Language Processing. Don't forget to like, share and subscribe my channel IG Tech Team ")

(31, 34, 1.59, 1.43, 'ig tech team')

In [43]:
# # Let me define one passage
# passage = """Hello, I am Ishwar. My friend name is Ajay. He is the son of Kristen. I spend most of the time with Ajay. 
# He always call me by my nick name. Ajay call me programmer. Except Ajay, my other friend call me by my original name. 
# Bijay is also my friend. """

# print (f'Length of the passage: {len(passage.split())} words')

# question1 ="What is my name" 
# print ('\nQuestion 1:\n', question1)
# _, _ , _ , _, ans  = bert_question_answer( question1, passage)
# print('\nAnswer from BERT: ', ans ,  '\n')


# question2 ="Who is the father of Ajay"
# print ('\nQuestion 2:\n', question2)
# _, _ , _ , _, ans  = bert_question_answer( question2, passage)
# print('\nAnswer from BERT: ', ans ,  '\n')

# question3 ="With whom Ishwar spend most of the time" 
# print ('\nQuestion 3:\n', question3)
# _, _ , _ , _, ans  = bert_question_answer( question3, passage)
# print('\nAnswer from BERT: ', ans ,  '\n')

# # Let me define another passage
# passage= """NLP is a subfield of computer science and artificial intelligence concerned with interactions between 
# computers and human (natural) languages. It is used to apply machine learning algorithms to text and speech. For 
# example, we can use NLP to create systems like speech recognition, document summarization, machine translation, spam 
# detection, named entity recognition, question answering, autocomplete, predictive typing and so on. Nowadays, most of 
# us have smartphones that have speech recognition. These smartphones use NLP to understand what is said. Also, many 
# people use laptops which operating system has a built-in speech recognition. NLTK (Natural Language Toolkit) is a 
# leading platform for building Python programs to work with human language data. It provides easy-to-use interfaces 
# to many corpora and lexical resources. Also, it contains a suite of text processing libraries for classification, 
# tokenization, stemming, tagging, parsing, and semantic reasoning. Best of all, NLTK is a free, open source, 
# community-driven project. We’ll use this toolkit to show some basics of the natural language processing field. For 
# the examples below, I’ll assume that we have imported the NLTK toolkit. We can do this like this: import nltk. 
# Sentence tokenization (also called sentence segmentation) is the problem of dividing a string of written language into 
# its component sentences. The idea here looks very simple. Word tokenization (also called word segmentation)
# is the problem of dividing a string of written language into its component words. In English and many other languages
# using some form of Latin alphabet, space is a good approximation of a word divider. However, we still can have problems
# we only split by space to achieve the wanted results. Some English compound nouns are variably written and sometimes
# they contain a space. In most cases, we use a library to achieve the wanted results, so again don’t worry too much 
# for the details. Stop words are words which are filtered out before or after processing of text. When applying machine
# learning to text, these words can add a lot of noise. That’s why we want to remove these irrelevant words.
# Stop words usually refer to the most common words such as “and”, “the”, “a” in a language, but there is no single
# universal list of stopwords. The list of the stop words can change depending on your application. The NLTK tool has
# a predefined list of stopwords that refers to the most common words. If you use it for your first time, you need to
# download the stop words using this code: nltk.download(“stopwords”). Once we complete the downloading, we can load
# the stopwords package from the nltk.corpus and use it to load the stop words."""

# print (f'Length of the passage: {len(passage.split())} words')


# question ="What is full form of NLTK"
# print ('\nQuestion 1:\n', question)
# _, _ , _ , _, ans  = bert_question_answer( question, passage)
# print('\nAnswer from BERT: ', ans ,  '\n')

# question ="What are stop words "
# print ('\nQuestion 2:\n', question)
# _, _ , _ , _, ans  = bert_question_answer( question, passage)
# print('\nAnswer from BERT: ', ans ,  '\n')

# # question ="What is NLP "
# # print ('\nQuestion 3:\n', question)
# # _, _ , _ , _, ans  = bert_question_answer( question, passage)
# # print('\nAnswer from BERT: ', ans ,  '\n')

# # question ="How to download stop words from nltk"
# # print ('\nQuestion 4:\n', question)
# # _, _ , _ , _, ans  = bert_question_answer( question, passage)
# # print('\nAnswer from BERT: ', ans ,  '\n')

# # question ="What do smartphones use to understand speech recognition "
# # print ('\nQuestion 5:\n', question)
# # _, _ , _ , _, ans  = bert_question_answer( question, passage)
# # print('\nAnswer from BERT: ', ans ,  '\n')

# # question ="What is Computer vision"
# # print ('\nQuestion 6:\n', question)
# # _, _ , _ , _, ans  = bert_question_answer( question, passage)
# # print('\nAnswer from BERT: ', ans ,  '\n')

# # question ="What is supervised learning"

# # print ('\nQuestion 7:\n', question)
# # _, _ , _ , _, ans  = bert_question_answer( question, passage)
# # print('\nAnswer from Shashwat: ', ans ,  '\n')


In [5]:
pip install soundfile

Note: you may need to restart the kernel to use updated packages.


In [17]:
import sounddevice as sd
import soundfile as sf

# Set the audio settings
sample_rate = 44100
duration = 10  # Duration in seconds
output_file = "audio.wav"

# Record audio from the laptop's microphone
# print("Recording audio...")
# audio = sd.rec(int(sample_rate * duration), samplerate=sample_rate, channels=1)
# sd.wait()

# # Save the audio to a file
# sf.write(output_file, audio, sample_rate)

# print(f"Audio saved to {output_file}")

In [None]:
# brew install flac

In [None]:
pip install pyttsx3


In [None]:
pip install comtypes


In [None]:
pip install gtts


In [None]:
pip install python-vlc


In [22]:

import speech_recognition as sr

# Create a recognizer object
# r = sr.Recognizer()

# # Path to the audio file
# audio_file = "audio.wav"

# # Load the audio file
# with sr.AudioFile(audio_file) as source:
#     # Read the audio data
#     audio = r.record(source)

# try:
#     # Recognize speech from the audio
#     text = r.recognize_google(audio)
#     print("Transcription:", text)
# except sr.UnknownValueError:
#     print("Speech recognition could not understand audio")
# except sr.RequestError as e:
#     print("Could not request results from Google Speech Recognition service; {0}".format(e))


In [23]:
def askquestion(audio_file,output_file):
    print("Recording audio...")
    audio = sd.rec(int(sample_rate * duration), samplerate=sample_rate, channels=1)
    sd.wait()

    # Save the audio to a file
    sf.write(output_file, audio, sample_rate)

    print(f"Audio saved to {output_file}")
    r = sr.Recognizer()

    # Path to the audio file

    # Load the audio file
    with sr.AudioFile(audio_file) as source:
        # Read the audio data
        audio = r.record(source)
    text=""    

    try:
        # Recognize speech from the audio
        text = r.recognize_google(audio)
        print("Transcription:", text)
    except sr.UnknownValueError:
        print("Speech recognition could not understand audio")
    except sr.RequestError as e:
        print("Could not request results from Google Speech Recognition service; {0}".format(e))

    return text
    

In [24]:
text=askquestion("text.wav","text.wav")
text

Recording audio...
Audio saved to text.wav
Transcription: hello how are you want good what to do when I hope you are nice and miss you will miss you


'hello how are you want good what to do when I hope you are nice and miss you will miss you'

In [11]:
text




'what do you mean by elephant'

In [None]:
pip install gtts


Collecting gtts
  Using cached gTTS-2.3.2-py3-none-any.whl (28 kB)
Installing collected packages: gtts
Successfully installed gtts-2.3.2
Note: you may need to restart the kernel to use updated packages.


In [25]:
from gtts import gTTS

def text_to_speech(text, output_file):
    # Create a gTTS object with the text and desired language
    tts = gTTS(text=text, lang='en')

    # Save the audio to a file
    tts.save(output_file)

# Example usage

output_file = "output.mp3"

text_to_speech(text, output_file)

In [None]:
pip install pygame


Collecting pygame
  Downloading pygame-2.4.0-cp310-cp310-macosx_11_0_arm64.whl (12.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.2/12.2 MB[0m [31m29.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: pygame
Successfully installed pygame-2.4.0
Note: you may need to restart the kernel to use updated packages.


In [26]:
import pygame

def play_mp3(file_path):
    pygame.mixer.init()
    pygame.mixer.music.load(file_path)
    pygame.mixer.music.play()



# Example usage
mp3_file = "output.mp3"
play_mp3(mp3_file)

pygame 2.4.0 (SDL 2.26.4, Python 3.10.11)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [14]:
output_file="passage.mp3"
text_to_speech("Give the passage you want to ask questions from", output_file)
play_mp3(output_file)



In [15]:
passage=input("Enter the Passage: ")

In [16]:
output_file="question.mp3"
text_to_speech("Ask the question", output_file)
play_mp3(output_file)

In [17]:
question=askquestion("question.wav","question.wav")
question

Recording audio...
Audio saved to question.wav
Transcription: what do you mean by elephant


'what do you mean by elephant'

In [18]:
_, _ , _ , _, ans  = bert_question_answer( question, passage)


In [20]:
print('\nAnswer from BERT: ', ans ,  '\n')



Answer from BERT:  a large animal 



In [27]:
def Passageready():

    output_file2="passage.mp3"
    text_to_speech("Give the passage you want to ask questions from", output_file2)
    play_mp3(output_file2)
    passage=input("Enter the Passage: ")
    return passage


In [28]:
def QuestionAnswer(passage):
    
    output_file="Ask.mp3"
    text_to_speech("Ask the question", output_file)
    play_mp3(output_file)
    question=askquestion("question.wav","question.wav")
    _, _ , _ , _, ans  = bert_question_answer( question, passage)
    output_file3="answer.mp3"
    text_to_speech("The Answer to the question....: "+question+"..."+"is....:"+ans, output_file3)
    play_mp3(output_file3)

In [29]:
passage=Passageready()


In [30]:
QuestionAnswer(passage)

Recording audio...
Audio saved to question.wav
Transcription: why is Shashwat a good boy
