#  Setting Up Environment and Libraries

In [1]:
# Install required libraries
# !pip install nltk speechrecognition pyttsx3

# Import necessary libraries
import speech_recognition as sr
import pyttsx3
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import re
import random


# Data Preprocessing and NLP Setup

In [2]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\RAHULSRI\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\RAHULSRI\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\RAHULSRI\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [3]:
# Preprocessing functions
def preprocess_text(text):
    text = re.sub(r'\[[0-9]*\]', ' ', text)
    text = re.sub(r'\s+', ' ', text)
    text = text.lower()
    text = re.sub(r'\W', ' ', text)
    text = re.sub(r'\d', ' ', text)
    text = re.sub(r'\s+', ' ', text)
    return text

# Lemmatization
def perform_lemmatization(text):
    lemmatizer = WordNetLemmatizer()
    tokens = word_tokenize(text)
    lemmatized_tokens = [lemmatizer.lemmatize(word) for word in tokens]
    return ' '.join(lemmatized_tokens)


# TF-IDF Vectorization and Stop Words Removal

In [4]:
# TF-IDF Vectorization
def vectorize_text(corpus):
    tfidf_vectorizer = TfidfVectorizer()
    tfidf_matrix = tfidf_vectorizer.fit_transform(corpus)
    return tfidf_matrix

# Stop words removal
stop_words = set(stopwords.words('english'))


# Voice Recognition Integration

In [5]:
def recognize_speech():
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print("Listening...")
        recognizer.adjust_for_ambient_noise(source)
        audio = recognizer.listen(source)

    try:
        print("Recognizing...")
        query = recognizer.recognize_google(audio)
        print(f"You said: {query}")
        return query
    except sr.UnknownValueError:
        print("Sorry, could not understand the audio.")
        return ""
    except sr.RequestError:
        print("Sorry, cannot access Google Speech Recognition service.")
        return ""


# Intent-Based Query Processing

In [6]:
def process_query(query):
    # Process the query using NLP techniques
    query = preprocess_text(query)
    query = perform_lemmatization(query)
    query_vectorized = vectorize_text([query])

    # Find the most similar URL based on cosine similarity
    similarities = cosine_similarity(query_vectorized, tfidf_matrix)
    similar_urls = [(similarity, url) for similarity, url in zip(similarities[0], urls)]
    similar_urls.sort(reverse=True)
    return similar_urls[0][1]


# Chatbot Implementation

In [7]:
def chat():
    print("Voice-Activated Search Assistant: Listening...")
    while True:
        query = recognize_speech()
        if query.lower() == 'exit':
            print("Exiting the chatbot.")
            break
        else:
            similar_url = process_query(query)
            print("Here is the most relevant URL:", similar_url)


# Main Function

In [9]:
# Main function
if __name__ == "__main__":
    urls = ['https://bhuvan.nrsc.gov.in',
            'https://bhuvan-app3.nrsc.gov.in/aadhaar/',
            'https://bhuvan-app2.nrsc.gov.in/mgnrega/mgnrega_phase2.php',
            'https://bhuvan-app3.nrsc.gov.in/data/',
            'https://bhuvan-app1.nrsc.gov.in/bhuvan2d/bhuvan/bhuvan2d.php',
            'https://bhuvan.nrsc.gov.in/home/index.php',
            'https://bhuvan-app1.nrsc.gov.in/api/',
            'https://bhuvan-app1.nrsc.gov.in/hfa/housing_for_all.php',
            'https://bhuvan-app1.nrsc.gov.in/apshcl',
            'https://bhuvan-app1.nrsc.gov.in/ntr',
            'https://bhuvan.nrsc.gov.in/forum',
            'https://bhuvan-wbis.nrsc.gov.in/',
            'https://bhuvan.nrsc.gov.in/geonetwork/',
            'https://bhuvan-app2.nrsc.gov.in/planner/',
            'https://bhuvan-app1.nrsc.gov.in/globe/3d.php',
            'https://bhuvan-app1.nrsc.gov.in/mhrd_rusa/',
            'https://bhuvan-app1.nrsc.gov.in/geographicalindication/index.php',
            'https://bhuvan-app1.nrsc.gov.in/flycatchers/flycatchers.php',
            'https://bhuvan-app1.nrsc.gov.in/mhrd_ncert',
            'https://bhuvan-app1.nrsc.gov.in/nabard',
            'https://bhuvan-app1.nrsc.gov.in/iwmp',
            'https://bhuvan-app1.nrsc.gov.in/tourism/tourism.php',
            'https://bhuvan-app1.nrsc.gov.in/hp_forest',
            'https://bhuvan-app1.nrsc.gov.in/disaster/disaster.php',
            'https://bhuvan-app1.nrsc.gov.in/thematic/thematic/index.php',
            'https://bhuvan-app1.nrsc.gov.in/imd/',
            'https://bhuvan-app1.nrsc.gov.in/pdmc/',
            'https://bhuvan-app1.nrsc.gov.in/heatwave/',
            'https://bhuvan-app1.nrsc.gov.in/mowr_ganga/',
            'https://bhuvan-app1.nrsc.gov.in/ts_forest/',
            'https://bhuvan-app1.nrsc.gov.in/pb_forest/',
            'https://bhuvan-app1.nrsc.gov.in/narl/',
            'https://bhuvan-app1.nrsc.gov.in/ka_forest',
            'https://bhuvan-app1.nrsc.gov.in/state/HR',
            'https://bhuvan-app1.nrsc.gov.in/saraswati/',
            'https://bhuvan-app1.nrsc.gov.in/web_view/index.php',
            'https://bhuvan-app1.nrsc.gov.in/pmjvk',
            'https://bhuvan-app1.nrsc.gov.in/anganwadi/',
            'https://bhuvan-app2.nrsc.gov.in/mgnregatpv/',
            'https://bhuvan-app3.nrsc.gov.in/corona/',
            'https://bhuvan-app1.nrsc.gov.in/moef_cris',
            'https://bhuvan-app1.nrsc.gov.in/2dresources/bhuvanstore.php',
            'https://bhuvan-app1.nrsc.gov.in/rbi',
            'https://bhuvan-app1.nrsc.gov.in/sitemap',
            'https://bhuvan-app1.nrsc.gov.in/mhrd_ncert/sb/sb.php',
            'https://bhuvan-app1.nrsc.gov.in/tourism/tourism.php?tourismid=1',
            'https://bhuvan-app1.nrsc.gov.in/tr_forest/',
            'https://bhuvan-app1.nrsc.gov.in/municipal/municipal.php',
            'https://bhuvan-app1.nrsc.gov.in/toll/morth_nhai.php',
            'https://bhuvan-app1.nrsc.gov.in/walamtari/walamtari.php',
            'https://bhuvan-app1.nrsc.gov.in/mines/mines.php',
            'https://bhuvan-app1.nrsc.gov.in/agriculture/agri.php',
            'https://bhuvan-app1.nrsc.gov.in/deltas/index.php',
            'https://bhuvan-app1.nrsc.gov.in/cdma/index.php',
            'https://bhuvan-app1.nrsc.gov.in/state/AP',
            'https://bhuvan-mapper1.nrsc.gov.in/twris',
            'https://bhuvan-app1.nrsc.gov.in/multilingual',
            'https://bhuvan-app1.nrsc.gov.in/disaster/disaster.php?id=fire',
            'https://bhuvan-app1.nrsc.gov.in/shipping',
            'https://bhuvan-app1.nrsc.gov.in/rkvy/index.php',
            'https://bhuvan-app1.nrsc.gov.in/aibp/aibp.php',
            'https://bhuvan-app1.nrsc.gov.in/mwrds/index.php',
            'https://bhuvan-app1.nrsc.gov.in/pmgsy/home/index.php',
            'https://bhuvan-app1.nrsc.gov.in/collaboration/']

    # Corpus preparation
    corpus = [preprocess_text(url) for url in urls]
    corpus = [perform_lemmatization(text) for text in corpus]

    # Vectorize the corpus
    tfidf_matrix = vectorize_text(corpus)

    # Start the chat
    chat()


Voice-Activated Search Assistant: Listening...
Listening...
Recognizing...
You said: forest around Bangalore


ValueError: Incompatible dimension for X and Y matrices: X.shape[1] == 3 while Y.shape[1] == 78