## Creating interface for predicting the reviews

Downloading the required packages

In [None]:
!pip install streamlit pyngrok joblib nltk scikit-learn dill
from pyngrok import ngrok

ngrok.set_auth_token("#") # Set authorization token from ngrok



Writing the "app.py" file

In [None]:
%%writefile app.py
import streamlit as st
import joblib
import string
import nltk
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer


import joblib
import streamlit as st
import joblib
import string
import nltk
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer

nltk.download("stopwords")

def whitespace_split(text):
    return text.split()

model_path = "/content/stack_2_models_optimized.pkl"
vectorizer, tfidf_transformer, stacking_model = joblib.load(model_path)


def predict_review_probability(review, fake_threshold=0.7):
    """
    Transforms a single review text and obtains prediction probabilities using the stacking model.
    """
    review_counts = vectorizer.transform([review])
    review_tfidf = tfidf_transformer.transform(review_counts)
    probas = stacking_model.predict_proba(review_tfidf)[0]

    new_real_prob = probas[1]
    new_fake_prob = probas[0]

    prediction = "Fake Review" if new_fake_prob >= fake_threshold else "Real Review"
    return prediction, (new_real_prob, new_fake_prob)

def predict_csv_file(uploaded_file):
    """
    Reads a CSV file, processes all reviews, and returns overall real/fake probabilities.
    """
    df = pd.read_csv(uploaded_file)

    # Ensure CSV has only reviews (first column assumed to contain reviews)
    if df.shape[1] > 1:
        st.warning("⚠️ The uploaded CSV has multiple columns. Only the first column will be considered as reviews.")

    # Extract reviews
    reviews = df.iloc[:, 0].astype(str).tolist()

    # Transform reviews using the loaded vectorizer & TF-IDF transformer
    review_counts = vectorizer.transform(reviews)
    review_tfidf = tfidf_transformer.transform(review_counts)

    # Get probabilities for all reviews
    probas = stacking_model.predict_proba(review_tfidf)

    real_probs = probas[:, 1]  # Index 1 is real
    fake_probs = probas[:, 0]  # Index 0 is fake

    # Compute overall averages
    avg_real_prob = real_probs.mean()
    avg_fake_prob = fake_probs.mean()

    return avg_real_prob, avg_fake_prob

# ========================== STREAMLIT UI ==========================

st.title("🛍️ Fake Review Detector")
st.markdown("**Check if a review or a batch of reviews is Real or Fake.**")

# Option 1: Single Review Input
st.subheader("📌 Test a Single Review")
review_input = st.text_area("Enter a product review:", height=150)

if st.button("Analyze Review"):
    if not review_input.strip():
        st.error("❌ Please enter a review.")
    else:
        prediction, (real_prob, fake_prob) = predict_review_probability(review_input, fake_threshold=0.7)
        st.markdown(f"**🟢 Probability of being Real:** {real_prob:.2%}")
        st.markdown(f"**🔴 Probability of being Fake:** {fake_prob:.2%}")
        st.markdown(f"### 🎯 Prediction: {prediction}")
        st.progress(int(fake_prob * 100))

# Option 2: CSV Upload
st.subheader("📂 Upload a CSV File")
uploaded_file = st.file_uploader("Upload a CSV file with reviews (one review per row)", type=["csv"])

if uploaded_file is not None:
    if st.button("Analyze CSV Reviews"):
        avg_real_prob, avg_fake_prob = predict_csv_file(uploaded_file)

        st.markdown(f"**🟢 Average Probability of Reviews Being Real:** {avg_real_prob:.2%}")
        st.markdown(f"**🔴 Average Probability of Reviews Being Fake:** {avg_fake_prob:.2%}")

        if avg_fake_prob > avg_real_prob:
            st.markdown("### ⚠️ Most of these reviews seem **Fake**!")
        else:
            st.markdown("### ✅ Most of these reviews seem **Real**!")




Writing app.py


Opening streamlit (interface)

In [None]:

from pyngrok import ngrok
import subprocess
import time

# Kill any pre-existing ngrok processes
!pkill ngrok

port = 8501

public_url = ngrok.connect(port).public_url
print("Your Streamlit app is available at:", public_url)

# Run the Streamlit app in a background process
process = subprocess.Popen(["streamlit", "run", "app.py"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)

# Optional: Wait a few seconds to let the app start properly
time.sleep(5)
