# Model Train on Urdu DataSet


In [None]:
import pandas as pd
import re
import nltk
import pickle
import seaborn as sns
import matplotlib.pyplot as plt
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, confusion_matrix
import ipywidgets as widgets
from IPython.display import display, clear_output
from sklearn.linear_model import LogisticRegression

# Ensure nltk resources are available
# nltk.download('punkt')
# nltk.download('stopwords')

# Create output widget to manage dynamic output
output = widgets.Output()

# Create text input widget globally (but not displayed yet)
text_input = widgets.Text(description="Enter Urdu text:", placeholder='Type a sentence in Urdu and press Enter')

# ✅ Urdu Preprocessing Function
def urdu_preprocessor(text):
    text = re.sub(r'[۔،؛؟!٭ء]', ' ', str(text))  # Remove Urdu punctuation
    text = re.sub(r'\s+', ' ', text).strip()
    try:
        tokens = word_tokenize(text)
        urdu_stopwords = set(stopwords.words('urdu')) if 'urdu' in stopwords.fileids() else set()
        tokens = [token for token in tokens if token not in urdu_stopwords]
        return ' '.join(tokens)
    except Exception:
        return text

# 📚 Train function
def train_model(b):
    with output:
        clear_output(wait=True)
        print("🔄 Training started...")

        # 📁 Load dataset
        try:
            df = pd.read_excel("/content/drive/My Drive/NLP_Project/2022-CS-622.xlsx", sheet_name='Data_Set(Sentiment Analysis)')
            df.columns = df.columns.str.strip()
            df = df.dropna(subset=['Urdu Sentiment', 'Sentiment'])
        except Exception as e:
            print(f"❌ Error loading data: {e}")
            return

        # 🧹 Preprocess Urdu
        df['cleaned_text'] = df['Urdu Sentiment'].apply(urdu_preprocessor)

        # 🧪 Split data
        X_train, X_test, y_train, y_test = train_test_split(df['cleaned_text'], df['Sentiment'], test_size=0.2, random_state=42)

        # 🔤 TF-IDF
        vectorizer = TfidfVectorizer(ngram_range=(1, 2), max_df=0.9, min_df=5)
        X_train_vec = vectorizer.fit_transform(X_train)
        X_test_vec = vectorizer.transform(X_test)

        # 🧠 Train model
        model = LogisticRegression(max_iter=1000)
        model.fit(X_train_vec, y_train)
        print("✅ Model trained!")

        # 📊 Evaluation
        y_pred = model.predict(X_test_vec)
        print("📊 Classification Report:")
        print(classification_report(y_test, y_pred))

        # 🔥 Confusion matrix
        cm = confusion_matrix(y_test, y_pred)
        plt.figure(figsize=(6, 4))
        sns.heatmap(cm, annot=True, fmt='d', cmap='YlGnBu', xticklabels=model.classes_, yticklabels=model.classes_)
        plt.xlabel("Predicted")
        plt.ylabel("Actual")
        plt.title("Confusion Matrix (Naive Bayes - Urdu)")
        plt.tight_layout()
        plt.savefig("urdu_confusion_matrix.png")
        plt.show()

        # 💾 Save model and vectorizer
        with open("urdu_sentiment_nb_classifier.pkl", "wb") as f:
            pickle.dump(model, f)
        print("✅ Saved: urdu_sentiment_nb_classifier.pkl")

        with open("urdu_tfidf_vectorizer.pkl", "wb") as f:
            pickle.dump(vectorizer, f)
        print("✅ Saved: urdu_tfidf_vectorizer.pkl")

        # 💾 Save predictions to Excel
        results_df = pd.DataFrame({
            'Original Sentence': X_test.values,
            'Actual Sentiment': y_test.values,
            'Predicted Sentiment': y_pred
        })
        results_df.to_excel("urdu_predictions_with_actual_predicated.xlsx", index=False)
        print("📁 Saved predictions to 'urdu_predictions_with_actual_predicated.xlsx'")

# 🔍 Test function
def test_model(b):
    with output:
        clear_output(wait=True)
        print("🔍 Testing loaded model...")

        try:
            with open("urdu_sentiment_nb_classifier.pkl", "rb") as f:
                model = pickle.load(f)
            with open("urdu_tfidf_vectorizer.pkl", "rb") as f:
                vectorizer = pickle.load(f)
            print("✅ Model and Vectorizer loaded!")
        except Exception as e:
            print(f"❌ Error loading model/vectorizer: {e}")
            return

        # Define the submit function
        def on_submit(change):
            with output:
                clear_output(wait=True)
                print("📝 Enter key pressed, processing input...")
                input_text = text_input.value.strip()
                if input_text:
                    try:
                        processed_input = urdu_preprocessor(input_text)
                        input_vector = vectorizer.transform([processed_input])
                        prediction = model.predict(input_vector)[0]
                        print(f"💡 Predicted Sentiment: {prediction}")
                    except Exception as e:
                        print(f"❌ Error predicting sentiment: {e}")
                else:
                    print("ℹ️ Please enter text to get a sentiment prediction.")

        text_input.unobserve_all()
        text_input.on_submit(on_submit)

        # ✅ Show the input box only during testing
        display(text_input)

# 🔘 Create buttons with layout
button_layout = widgets.Layout(width='300px')
train_button = widgets.Button(
    description="Train Urdu_Sentence Model",
    button_style='success',
    layout=button_layout
)
test_button = widgets.Button(
    description="Urdu_Sentence Model",
    button_style='info',
    layout=button_layout
)

train_button.on_click(train_model)
test_button.on_click(test_model)

# 📺 Display the UI
display(widgets.HBox([train_button, test_button]))
display(output)


HBox(children=(Button(button_style='success', description='Train Urdu_Sentence Model', layout=Layout(width='30…

Output()

# Model Train on English DataSet

In [None]:
import pandas as pd
import pickle
import ipywidgets as widgets
from IPython.display import display, clear_output
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report

# Global output and input widgets
output = widgets.Output()
text_input = widgets.Text(
    description="Enter text:",
    placeholder="Type a sentence and press Enter",
    layout=widgets.Layout(width="600px")
)

# TRAIN MODEL FUNCTION
def train_model(b):
    with output:
        clear_output(wait=True)
        print("🔄 Training started...")

        # Load dataset
        try:
            df = pd.read_excel("/content/drive/My Drive/NLP_Project/2022-CS-622.xlsx",
                               sheet_name="Data_Set(Sentiment Analysis)")
            df.columns = df.columns.str.strip()
            df.dropna(subset=["Sentence", "Sentiment"], inplace=True)
        except Exception as e:
            print(f"❌ Error loading data: {e}")
            return

        # Train/test split
        X_train, X_test, y_train, y_test = train_test_split(
            df["Sentence"], df["Sentiment"], test_size=0.2, random_state=42)

        # Vectorizer
        vectorizer = TfidfVectorizer(ngram_range=(1, 2), max_df=0.9, min_df=5)
        X_train_vec = vectorizer.fit_transform(X_train)
        X_test_vec = vectorizer.transform(X_test)

        # Model training
        model = MultinomialNB()
        model.fit(X_train_vec, y_train)
        print("✅ Model trained!")

        # Evaluation
        y_pred = model.predict(X_test_vec)
        print("📊 Evaluation:\n", classification_report(y_test, y_pred))

        # Save results
        results = pd.DataFrame({
            "Original Sentence": X_test.values,
            "Actual Sentiment": y_test.values,
            "Predicted Sentiment": y_pred
        })
        results.to_excel("english_predictions_with_actual_predicative.xlsx", index=False)
        print("📁 Predictions saved to 'english_predictions_with_actual_predicative.xlsx'")

        # Save model/vectorizer
        with open("naive_bayes_classifier.pkl", "wb") as f:
            pickle.dump(model, f)
        with open("tfidf_vectorizer.pkl", "wb") as f:
            pickle.dump(vectorizer, f)
        print("💾 Model and vectorizer saved.")

# TEXT SUBMIT HANDLER
def on_text_submit(sender):
    with output:
        clear_output(wait=True)
        print("🔍 Loading model for prediction...")

        try:
            with open("naive_bayes_classifier.pkl", "rb") as f:
                model = pickle.load(f)
            with open("tfidf_vectorizer.pkl", "rb") as f:
                vectorizer = pickle.load(f)
        except Exception as e:
            print(f"❌ Error loading model/vectorizer: {e}")
            return

        input_text = text_input.value.strip()
        if input_text:
            print("📝 Input received:", input_text)
            try:
                text_vec = vectorizer.transform([input_text])
                prediction = model.predict(text_vec)
                print(f"💡 Predicted Sentiment: {prediction[0]}")
            except Exception as e:
                print(f"❌ Prediction failed: {e}")
        else:
            print("ℹ️ Please type something.")

        # Reset input field
        text_input.value = ""

# Attach handler ONCE (prevent duplication)
text_input.on_submit(on_text_submit)

# BUTTONS
train_button = widgets.Button(
    description="Train English_Sentence Model",
    button_style="success",
    layout=widgets.Layout(width="300px")
)
test_button = widgets.Button(
    description="Test English Sentence Model",
    button_style="info",
    layout=widgets.Layout(width="300px")
)

train_button.on_click(train_model)
test_button.on_click(lambda b: print("📢 Type a sentence above and press Enter to test."))

# DISPLAY UI
display(widgets.HBox([train_button, test_button]))
display(text_input)
display(output)


HBox(children=(Button(button_style='success', description='Train English_Sentence Model', layout=Layout(width=…

Text(value='', description='Enter text:', layout=Layout(width='600px'), placeholder='Type a sentence and press…

Output()

📢 Type a sentence above and press Enter to test.
