In [6]:
!pip install streamlit pyngrok transformers torch xgboost tweepy emoji


Collecting emoji
  Downloading emoji-2.14.1-py3-none-any.whl.metadata (5.7 kB)
Downloading emoji-2.14.1-py3-none-any.whl (590 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m590.6/590.6 kB[0m [31m27.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: emoji
Successfully installed emoji-2.14.1


In [15]:
%%writefile app.py
import streamlit as st
from pyngrok import ngrok
import joblib
import pandas as pd
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModel
import tweepy
import re
import emoji

def clean_text(text):
    text = text.encode('latin-1', errors='ignore').decode('utf-8', errors='ignore')
    text = emoji.demojize(text)
    text = text.lower()
    text = re.sub(r"@\w+", "", text)
    text = re.sub(r"http\S+|www\S+", "", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

@st.cache_resource
def load_pipeline():
    return joblib.load('racism_detection_pipeline.pkl')

def get_embedding(text, tokenizer, model):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state[:, 0, :].numpy()

def main():
    st.title("Racism Detection in Tweets 🕵️")
    pipeline = load_pipeline()
    xgb_model = pipeline['xgb_model']
    tokenizer = pipeline['roberta_tokenizer']
    model = pipeline['roberta_model']

    option = st.sidebar.selectbox("Select Option", ["Single Tweet Check", "Fetch Twitter Posts"])

    if option == "Single Tweet Check":
        st.header("Analyze Text Input")
        user_input = st.text_area("Enter/Paste Tweet Text:")

        if st.button("Check for Racism"):
            if user_input:
                cleaned = clean_text(user_input)
                embedding = get_embedding(cleaned, tokenizer, model)
                pred = xgb_model.predict(embedding)[0]
                proba = xgb_model.predict_proba(embedding)[0][1]

                if pred == 1:
                    st.error(f"🚩 Potential Racist Content (Confidence: {proba*100:.2f}%)")
                else:
                    st.success(f"✅ Clean Content (Confidence: {proba*100:.2f}%)")
            else:
                st.warning("Please enter some text to analyze")

    else:
        st.header("Analyze Live Tweets")

        # Twitter API credentials input
        api_key = st.text_input("Twitter API Key", type="password")
        api_secret = st.text_input("Twitter API Secret", type="password")
        access_token = st.text_input("Twitter Access Token", type="password")
        access_secret = st.text_input("Twitter Access Secret", type="password")

        # **Fix:** Ask the user to enter a search query
        search_query = st.text_input("Enter keyword to search tweets")

        if st.button("Fetch and Analyze Tweets"):
            if not api_key or not api_secret or not access_token or not access_secret:
                st.warning("Please enter all Twitter API credentials.")
            elif not search_query.strip():
                st.warning("Please enter a search keyword.")
            else:
                try:
                    auth = tweepy.OAuthHandler(api_key, api_secret)
                    auth.set_access_token(access_token, access_secret)
                    api = tweepy.API(auth, wait_on_rate_limit=True)

                    # **Fix:** Use the defined `search_query`
                    tweets = api.search_tweets(q=search_query, count=10, tweet_mode='extended')
                    results = []

                    for tweet in tweets:
                        text = tweet.full_text
                        cleaned = clean_text(text)
                        embedding = get_embedding(cleaned, tokenizer, model)
                        pred = xgb_model.predict(embedding)[0]
                        proba = xgb_model.predict_proba(embedding)[0][1]

                        results.append({
                            "Tweet": text,
                            "Prediction": "Racist" if pred == 1 else "Clean",
                            "Confidence (%)": round(proba * 100, 2)
                        })

                    df = pd.DataFrame(results)
                    st.dataframe(df)

                except tweepy.TweepyException as e:
                    st.error(f"Twitter API Error: {str(e)}")
                except Exception as e:
                    st.error(f"Error: {str(e)}")

if __name__ == '__main__':
    main()


Overwriting app.py


In [13]:
!ngrok authtoken 2sXsXpCzoa6FMO3jZf0AXzAoWMj_31Vb6KzTUUf9kYSss74pq

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [16]:
import os
import threading
import time
import subprocess
from pyngrok import ngrok

# Kill any previous ngrok processes (to avoid conflicts)
!killall ngrok > /dev/null 2>&1

# Start Streamlit App in the background
def run_streamlit():
    os.system("streamlit run app.py --server.port 8501")

threading.Thread(target=run_streamlit).start()

# Wait for Streamlit to start
time.sleep(5)

# Open an ngrok tunnel to the Streamlit app
public_url = ngrok.connect(8501, "http")
print(f"Streamlit is running at {public_url}")


Streamlit is running at NgrokTunnel: "https://396f-34-124-192-111.ngrok-free.app" -> "http://localhost:8501"
