In [None]:
!pip install nltk googletrans==3.1.0a0 scikit-learn textblob flask

Collecting googletrans==3.1.0a0
  Downloading googletrans-3.1.0a0.tar.gz (19 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting httpx==0.13.3 (from googletrans==3.1.0a0)
  Downloading httpx-0.13.3-py3-none-any.whl.metadata (25 kB)
Collecting hstspreload (from httpx==0.13.3->googletrans==3.1.0a0)
  Downloading hstspreload-2025.1.1-py3-none-any.whl.metadata (2.1 kB)
Collecting chardet==3.* (from httpx==0.13.3->googletrans==3.1.0a0)
  Downloading chardet-3.0.4-py2.py3-none-any.whl.metadata (3.2 kB)
Collecting idna==2.* (from httpx==0.13.3->googletrans==3.1.0a0)
  Downloading idna-2.10-py2.py3-none-any.whl.metadata (9.1 kB)
Collecting rfc3986<2,>=1.3 (from httpx==0.13.3->googletrans==3.1.0a0)
  Downloading rfc3986-1.5.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting httpcore==0.9.* (from httpx==0.13.3->googletrans==3.1.0a0)
  Downloading httpcore-0.9.1-py3-none-any.whl.metadata (4.6 kB)
Collecting h11<0.10,>=0.8 (from httpcore==0.9.*->httpx==0.13.3->googletrans==3.1.0a0

In [None]:
# Import libraries
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from googletrans import Translator
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from textblob import TextBlob
import pandas as pd
import numpy as np
import pickle
from flask import Flask, request, jsonify
import threading

In [None]:
# Download NLTK resources
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [None]:
# 1. Text Preprocessing Module
class TextPreprocessor:
    def __init__(self):
        self.ps = PorterStemmer()
        self.translator = Translator()
        self.stopwords_en = set(stopwords.words('english'))

    def preprocess(self, text, language='en'):
        """Preprocess text based on language"""
        tokens = word_tokenize(text.lower())
        if language == 'en':
            filtered = [w for w in tokens if w not in self.stopwords_en]
        else:
            filtered = tokens  # Add more language-specific stopwords as needed
        stemmed = [self.ps.stem(word) for word in filtered]
        return ' '.join(stemmed)

In [None]:
# 2. Multilingual Translation Module
class LanguageHandler:
    def __init__(self):
        self.translator = Translator()
        self.supported_langs = {'en': 'english', 'mr': 'marathi', 'hi': 'hindi'}

    def detect_language(self, text):
        """Detect input language"""
        return self.translator.detect(text).lang

    def translate(self, text, src_lang, dest_lang='en'):
        """Translate text between languages"""
        if src_lang == dest_lang:
            return text
        return self.translator.translate(text, src=src_lang, dest=dest_lang).text

In [None]:
# 3. Feature Extraction Module
class FeatureExtractor:
    def __init__(self):
        self.tfidf = TfidfVectorizer(max_features=5000)

    def extract_features(self, texts):
        """Convert text to TF-IDF features"""
        return self.tfidf.fit_transform(texts)

    def transform(self, texts):
        """Transform new text using fitted vectorizer"""
        return self.tfidf.transform(texts)

In [None]:
# 4. Intent Recognition Module
class IntentClassifier:
    def __init__(self):
        self.model = LogisticRegression()
        self.feature_extractor = FeatureExtractor()

    def train(self, texts, labels):
        """Train intent classification model"""
        X = self.feature_extractor.extract_features(texts)
        self.model.fit(X, labels)

    def predict(self, text):
        """Predict intent for new text"""
        X = self.feature_extractor.transform([text])
        return self.model.predict(X)[0]

In [None]:
# 5. Sentiment Analysis Module
class SentimentAnalyzer:
    def analyze(self, text):
        """Analyze sentiment and return category"""
        blob = TextBlob(text)
        polarity = blob.sentiment.polarity
        if polarity > 0:
            return 'positive'
        elif polarity < 0:
            return 'negative'
        return 'neutral'

In [None]:
# 6. Fake News Detection Module
class FakeNewsDetector:
    def __init__(self):
        self.model = LogisticRegression()
        self.feature_extractor = FeatureExtractor()

    def train(self, texts, labels):
        """Train fake news detection model"""
        X = self.feature_extractor.extract_features(texts)
        self.model.fit(X, labels)

    def predict(self, text):
        """Predict if text is fake"""
        X = self.feature_extractor.transform([text])
        return self.model.predict(X)[0] == 0  # 0 for real, 1 for fake

In [None]:
# 7. Main Chatbot Class
class MaharashtraChatbot:
    def __init__(self):
        self.preprocessor = TextPreprocessor()
        self.lang_handler = LanguageHandler()
        self.intent_classifier = IntentClassifier()
        self.sentiment_analyzer = SentimentAnalyzer()
        self.fake_detector = FakeNewsDetector()

        # Sample knowledge base
        self.knowledge_base = {
            'schemes': 'Available schemes: 1. Farmer Support 2. Education Grant',
            'grievance': 'Please provide details of your grievance',
            'services': 'Services: 1. Birth Certificate 2. Tax Payment'
        }

    def process_input(self, text, user_lang='en'):
        """Process user input and generate response"""
        detected_lang = self.lang_handler.detect_language(text)
        eng_text = self.lang_handler.translate(text, detected_lang, 'en')
        processed_text = self.preprocessor.preprocess(eng_text)
        sentiment = self.sentiment_analyzer.analyze(eng_text)
        intent = self.intent_classifier.predict(processed_text)
        is_valid = self.fake_detector.predict(eng_text)

        response = self._generate_response(intent, sentiment, is_valid)
        final_response = self.lang_handler.translate(response, 'en', user_lang)

        return final_response

    def _generate_response(self, intent, sentiment, is_valid):
        """Generate appropriate response"""
        if not is_valid:
            return "Warning: Information could not be verified"

        if intent in self.knowledge_base:
            response = self.knowledge_base[intent]
            if sentiment == 'negative':
                response = f"I'm sorry to hear you're upset. {response}"
            return response
        return "I'm sorry, I couldn't understand your request. Please try again."

In [None]:
# 8. Flask Application (adapted for Colab)
app = Flask(__name__)
chatbot = MaharashtraChatbot()

In [None]:
# Sample training data
def initialize_models():
    intents = {
        'texts': ['government schemes', 'file a complaint', 'available services'],
        'labels': ['schemes', 'grievance', 'services']
    }
    news = {
        'texts': ['real government update', 'fake maharashtra news'],
        'labels': [0, 1]
    }

    chatbot.intent_classifier.train(intents['texts'], intents['labels'])
    chatbot.fake_detector.train(news['texts'], news['labels'])

@app.route('/chatbot', methods=['POST'])
def chatbot_endpoint():
    data = request.json
    user_input = data.get('input', '')
    user_lang = data.get('language', 'en')

    response = chatbot.process_input(user_input, user_lang)
    return jsonify({
        'response': response,
        'timestamp': pd.Timestamp.now().isoformat()
    })

In [None]:
# 9. Function to run Flask in Colab
def run_flask():
    app.run(host='0.0.0.0', port=5050)

# Main execution in Colab
if __name__ == "__main__":
    # Initialize models
    initialize_models()

    # Start Flask in a separate thread since Colab doesn't support direct Flask execution
    from threading import Thread
    flask_thread = Thread(target=run_flask)
    flask_thread.start()

    # Test the chatbot directly in Colab
    print("Testing chatbot directly:")
    test_input = "What are government schemes?"
    response = chatbot.process_input(test_input, 'en')
    print(f"Input: {test_input}")
    print(f"Response: {response}")

    # Additional test in Marathi
    test_input_marathi = "सरकारी योजना काय आहेत?"  # "What are government schemes?" in Marathi
    response_marathi = chatbot.process_input(test_input_marathi, 'mr')
    print(f"\nInput (Marathi): {test_input_marathi}")
    print(f"Response (Marathi): {response_marathi}")

Testing chatbot directly:
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5050
 * Running on http://172.28.0.12:5050
INFO:werkzeug:[33mPress CTRL+C to quit[0m


Input: What are government schemes?
Response: Please provide details of your grievance

Input (Marathi): सरकारी योजना काय आहेत?
Response (Marathi): कृपया आपल्या तक्रारीचा तपशील प्रदान करा


In [None]:
from google.colab import output
output.serve_kernel_port_as_iframe(5000)

# Then in a new cell, run:
import requests
import json

payload = json.dumps({"input": "What are government schemes?", "language": "en"})
headers = {'Content-Type': 'application/json'}
response = requests.post('http://localhost:5050/chatbot', data=payload, headers=headers)
print(response.json())

<IPython.core.display.Javascript object>

INFO:werkzeug:127.0.0.1 - - [19/Mar/2025 17:19:40] "POST /chatbot HTTP/1.1" 200 -


{'response': 'Please provide details of your grievance', 'timestamp': '2025-03-19T17:19:40.933780'}
