In [13]:
import sys
import os
import gradio as gr
import joblib
import numpy as np
import spacy
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.models import load_model
from helpers.processing_helpers import clean_texts_spacy
from PIL import Image


In [14]:
# Load spaCy model
nlp = spacy.load('en_core_web_lg')

# Custom color palette
custom_palette = ['#32d99c', '#28b683', '#1e956a', '#46e5aa', '#5bf1b8']

In [15]:
# Load vectorizers
tfidf_vectorizer = joblib.load('outputs/models/tfidf_vectorizer.joblib')
bow_vectorizer = joblib.load('outputs/models/bow_vectorizer.joblib')

In [16]:
# Load models
models = {
    'Random Forest': {
        'TFIDF': joblib.load('outputs/models/Random_Forest_TFIDF.joblib'),
        'BOW': joblib.load('outputs/models/Random_Forest_BOW.joblib'),
        'SPACY': joblib.load('outputs/models/Random_Forest_SPACY.joblib'),
    },
    'Neural Network': {
        'TFIDF': load_model('outputs/models/Neural_Network_TFIDF.keras'),
        'BOW': load_model('outputs/models/Neural_Network_BOW.keras'),
        'SPACY': load_model('outputs/models/Neural_Network_SPACY.keras'),
    }
}

In [17]:
def predict_review(model_name, embedding_name, review_text):
    # Preprocess the review text
    try:
        # Convert the review text to a Pandas Series
        review_series = pd.Series([review_text])
        cleaned_text = clean_texts_spacy(review_series)[0]
    except Exception as e:
        return f"Error in preprocessing input: {str(e)}"
    
    # Vectorize the input text
    try:
        if embedding_name == 'TFIDF':
            X_input = tfidf_vectorizer.transform([cleaned_text])
        elif embedding_name == 'BOW':
            X_input = bow_vectorizer.transform([cleaned_text])
        elif embedding_name == 'SPACY':
            doc = nlp(cleaned_text)
            X_input = np.array([doc.vector])
        else:
            return "Invalid embedding method selected."
    except Exception as e:
        return f"Error in vectorizing input: {str(e)}"
    
    # Get the selected model
    model = models.get(model_name, {}).get(embedding_name)
    if model is None:
        return f"Model not available for {model_name} with {embedding_name} embedding."
    
    # Make prediction
    try:
        if model_name == 'Neural Network':
            if embedding_name in ['TFIDF', 'BOW']:
                X_input = X_input.toarray()
            probabilities = model.predict(X_input)
            predicted_class = np.argmax(probabilities, axis=1)[0] + 1  # move class index (1-5)
            confidence = np.max(probabilities)
        else:
            probabilities = model.predict_proba(X_input)
            predicted_class = model.predict(X_input)[0]
            confidence = np.max(probabilities)
        
        # Generate confidence plot
        fig, ax = plt.subplots(figsize=(5, 3))
        ax.bar(range(1, 6), probabilities.flatten(), color=custom_palette)
        ax.set_xlabel('Ratings', fontsize=14)
        ax.set_ylabel('Confidence', fontsize=14)
        ax.set_title('Model Confidence for Each Rating', fontsize=16)
        plt.tight_layout()
        confidence_plot_path = 'outputs/figures/confidence_plot.png'
        plt.savefig(confidence_plot_path)
        plt.close()

        return f"Predicted Rating: {predicted_class}\nConfidence: {confidence:.2f}", confidence_plot_path
    except Exception as e:
        return f"Error in making prediction: {str(e)}"

In [None]:
def gradio_interface(model_name, embedding_name, review_text):
    prediction, confidence_plot_path = predict_review(model_name, embedding_name, review_text)
    return prediction, confidence_plot_path

iface = gr.Interface(
    fn=gradio_interface,
    inputs=[
        gr.Dropdown(choices=['Random Forest', 'Neural Network'], label="Select Model"),
        gr.Dropdown(choices=['TFIDF', 'BOW', 'SPACY'], label="Select Embedding Technique"),
        gr.Textbox(lines=5, placeholder="Enter your review here...", label="Review Text"),
    ],
    outputs=[
        gr.Textbox(label="Prediction"),
        gr.Image(type='filepath', label="Confidence Plot"),
    ],
    title="Hotel Review Rating Prediction",
    description="Select a model and embedding technique, enter a hotel review, and see the predicted rating and confidence, along with visual analysis.",
)

iface.launch()


* Running on local URL:  http://127.0.0.1:7865

To create a public link, set `share=True` in `launch()`.




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 363ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
