# ***Real-Time Sentiment Analysis***

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
!pip install firebase-admin
!pip install dash dash-bootstrap-components firebase-admin

Collecting dash
  Downloading dash-3.0.4-py3-none-any.whl.metadata (10 kB)
Collecting dash-bootstrap-components
  Downloading dash_bootstrap_components-2.0.3-py3-none-any.whl.metadata (18 kB)
Collecting Flask<3.1,>=1.0.4 (from dash)
  Downloading flask-3.0.3-py3-none-any.whl.metadata (3.2 kB)
Collecting Werkzeug<3.1 (from dash)
  Downloading werkzeug-3.0.6-py3-none-any.whl.metadata (3.7 kB)
Collecting retrying (from dash)
  Downloading retrying-1.3.4-py3-none-any.whl.metadata (6.9 kB)
Downloading dash-3.0.4-py3-none-any.whl (7.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.9/7.9 MB[0m [31m75.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dash_bootstrap_components-2.0.3-py3-none-any.whl (203 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m203.7/203.7 kB[0m [31m18.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading flask-3.0.3-py3-none-any.whl (101 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.7/101.7 kB[0m [31m9.0 M

# 🔥 Batch Deletion Script for Firebase Firestore ("tweet_sentiments" Collection)


In [4]:
import firebase_admin
from firebase_admin import credentials, firestore
import time

# Initialize Firebase
if not firebase_admin._apps:
    cred = credentials.Certificate("/content/real-time-sentiment-anal-adf49-firebase-adminsdk-fbsvc-6bd22a44f2.json")
    firebase_admin.initialize_app(cred)

db = firestore.client()

# Safe delete with batch limit
def delete_limited_documents(collection_name, limit=100, delay=2):
    docs = db.collection(collection_name).limit(limit).stream()
    count = 0
    for doc in docs:
        db.collection(collection_name).document(doc.id).delete()
        count += 1
        print(f"Deleted: {doc.id}")
        time.sleep(delay)  # slow down to avoid quota spikes
    print(f"Deleted {count} documents (limit {limit})")

# Run this multiple times manually or in a loop if needed
delete_limited_documents("tweet_sentiments")


Deleted: 01J0bTJseHnGWP13XCw1
Deleted: 01YyeM2bzn1YDeohh9no
Deleted: 01eTxdS0sPcH3LPjMPrZ
Deleted: 01uzJxGv99LfWCaEwD8m
Deleted: 01wxVWtxTbhRWi5200sq
Deleted: 01xUwOJYjE68Ox28m1hi
Deleted: 01zNY1xNI6CSn0X76U7x
Deleted: 02IgdMQJ2Ue8zF5qYd8v
Deleted: 02bHc8L8GSGQ6vYugR23
Deleted: 02cn6rOpVqqaHGAEpOS0
Deleted: 02l5BgbmPf19VkCFyA4S
Deleted: 02p6oDDuy5Ea14RKtKXY
Deleted: 039c086a6ccbaad968d04ce99202c5074cb2244250257341aca17d195b1e927a
Deleted: 03SmhSbvCaNremfW7zqa
Deleted: 03UOLcnJMz5t29KoFwvN
Deleted: 03W7JNxstWMrtXBVOSu5
Deleted: 03a6453221380077d596bd0e06b302eaf2a227af999f2897b9606de4ed63381f
Deleted: 03f2dd66a618bc1b5f1ae958b8d8636d80629868f276a9fb0eb532e44941feda
Deleted: 03f3e2bacf836305f2b6889b2e5020109dd4a6d7a5be91d55c7c7446734b48c8
Deleted: 03f9e664b4dd0199c6c85e391c285ef0fd1a1c7973b14f17d4605acabe603f48
Deleted: 03fcb4018f5989c90998d3c67a54f166060b157c7b130f61be795e080d7b983b
Deleted: 03kriv29CtvLFzvt9R6P
Deleted: 03lTgfOVuyq43zJTbW6X
Deleted: 04024ef370883b91d24c5e169d68e47cc968d

# 💬 Real-Time Tweet Sentiment Analysis with PySpark, HuggingFace Transformers, and Firebase
This Part performs real-time sentiment analysis on tweets using the following workflow:
1. Loads a CSV dataset of tweets using PySpark for distributed processing.
2. Cleans and preprocesses the tweet text with a custom UDF.
3. Applies a HuggingFace Transformers sentiment analysis model to classify each tweet.
4. Saves the results (original text, cleaned text, sentiment label, confidence score) to Firebase Firestore.
5. Optionally, saves the output locally as a CSV for offline use or reporting.


In [6]:
from pyspark.sql import SparkSession
import pandas as pd
import re
from transformers import pipeline
from pyspark.sql.functions import col, udf
from pyspark.sql.types import StringType
import firebase_admin
from firebase_admin import credentials, firestore

# ✅ 1. Initialize Spark session
spark = SparkSession.builder.appName("RealTimeTweetSentiment").getOrCreate()

# ✅ 2. Load tweet dataset
df_spark = spark.read.csv("/content/Tweets.csv", header=True)

# ✅ 3. Define cleaning function and register UDF
def clean_tweet(text):
    text = re.sub(r"http\S+", "", text or "")
    text = re.sub(r"[^a-zA-Z\s]", "", text)
    return text.lower().strip()

clean_udf = udf(clean_tweet, StringType())
df_cleaned = df_spark.withColumn("cleaned_text", clean_udf(col("text")))

# ✅ 4. Filter out empty cleaned_text rows
df_cleaned = df_cleaned.filter((col("cleaned_text").isNotNull()) & (col("cleaned_text") != ""))

# ✅ 5. Convert to Pandas DataFrame
df_pandas = df_cleaned.select("text", "cleaned_text").toPandas()

# ✅ 6. Limit to the first 14641 tweets
df_pandas = df_pandas.head(14641)

# ✅ 7. Load HuggingFace sentiment pipeline
sentiment_model = pipeline("sentiment-analysis")

# ✅ 8. Run sentiment prediction in batch
results = sentiment_model(df_pandas["cleaned_text"].tolist(), batch_size=32, truncation=True)

# ✅ 9. Add results to DataFrame
df_pandas["sentiment"] = [r["label"] for r in results]
df_pandas["confidence"] = [r["score"] for r in results]

# ✅ 10. Initialize Firebase only once
if not firebase_admin._apps:
    cred = credentials.Certificate("/content/real-time-sentiment-anal-adf49-firebase-adminsdk-fbsvc-6bd22a44f2.json")
    firebase_admin.initialize_app(cred)

db = firestore.client()

# ✅ 11. Push each tweet to Firestore
for i, row in df_pandas.iterrows():
    db.collection("tweet_sentiments").add({
        "text": row["text"],
        "cleaned_text": row["cleaned_text"],
        "sentiment": row["sentiment"],
        "confidence": float(row["confidence"]),
        "timestamp": firestore.SERVER_TIMESTAMP
    })
    print(f"✅ Uploaded tweet {i+1}/{len(df_pandas)}")

# ✅ 12. Optional: Save locally
df_pandas.to_csv("/content/sentiment_results_14641.csv", index=False)


No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cuda:0


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
✅ Uploaded tweet 9633/14632
✅ Uploaded tweet 9634/14632
✅ Uploaded tweet 9635/14632
✅ Uploaded tweet 9636/14632
✅ Uploaded tweet 9637/14632
✅ Uploaded tweet 9638/14632
✅ Uploaded tweet 9639/14632
✅ Uploaded tweet 9640/14632
✅ Uploaded tweet 9641/14632
✅ Uploaded tweet 9642/14632
✅ Uploaded tweet 9643/14632
✅ Uploaded tweet 9644/14632
✅ Uploaded tweet 9645/14632
✅ Uploaded tweet 9646/14632
✅ Uploaded tweet 9647/14632
✅ Uploaded tweet 9648/14632
✅ Uploaded tweet 9649/14632
✅ Uploaded tweet 9650/14632
✅ Uploaded tweet 9651/14632
✅ Uploaded tweet 9652/14632
✅ Uploaded tweet 9653/14632
✅ Uploaded tweet 9654/14632
✅ Uploaded tweet 9655/14632
✅ Uploaded tweet 9656/14632
✅ Uploaded tweet 9657/14632
✅ Uploaded tweet 9658/14632
✅ Uploaded tweet 9659/14632
✅ Uploaded tweet 9660/14632
✅ Uploaded tweet 9661/14632
✅ Uploaded tweet 9662/14632
✅ Uploaded tweet 9663/14632
✅ Uploaded tweet 9664/14632
✅ Uploaded tweet 9665/14632
✅ Uploaded 

# 📈 Real-Time Sentiment Analysis Dashboard with Firebase and HuggingFace
# This Dash web app displays real-time sentiment analysis of tweets using:
🔹 Firebase Firestore for data storage and retrieval.
🔹 HuggingFace Transformers for accurate sentiment classification.
🔹 PyDash and Plotly for interactive visualizations and UI.
Features:
✅ Live refresh with sentiment filtering and dynamic chart selection (Pie/Bar).
✅ Search and analyze custom user-inputted tweets instantly.
✅ Secure, cloud-connected, and ready for real-time deployment.
Designed for deployment in real-time analytics systems, especially for social media monitoring.


In [14]:
import dash
from dash import html, dcc, dash_table
import dash_bootstrap_components as dbc
import pandas as pd
import plotly.express as px
from firebase_admin import credentials, firestore, initialize_app
import firebase_admin
from dash.dependencies import Input, Output, State
from transformers import pipeline
import time

# ✅ Firebase Init
if not firebase_admin._apps:
    cred = credentials.Certificate("real-time-sentiment-anal-adf49-firebase-adminsdk-fbsvc-d2c025579f.json")
    initialize_app(cred)

db = firestore.client()

# ✅ HuggingFace Sentiment Pipeline
sentiment_pipeline = pipeline("sentiment-analysis")

# ✅ Fetch Firestore Tweets
def fetch_data():
    tweets_ref = db.collection("tweet_sentiments").order_by("timestamp", direction=firestore.Query.DESCENDING).limit(500)
    docs = tweets_ref.stream()
    rows = [doc.to_dict() for doc in docs]
    return pd.DataFrame(rows)

# ✅ Classify New Tweet
def classify_tweet(text):
    result = sentiment_pipeline(text[:512])[0]
    return result["label"], float(result["score"])

# ✅ Dash App Setup
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
app.title = "Tweet Sentiment Dashboard"

# ✅ Layout
app.layout = dbc.Container([
    html.H2("📊 Real-Time Tweet Sentiment Dashboard"),

    dbc.Row([
        dbc.Col(dcc.Dropdown(
            id='sentiment-filter',
            options=[{'label': s, 'value': s} for s in ['All', 'POSITIVE', 'NEGATIVE']],
            value='All',
            clearable=False
        ), md=4),

        dbc.Col(dcc.RadioItems(
            id='chart-type',
            options=[
                {'label': 'Pie Chart', 'value': 'pie'},
                {'label': 'Bar Chart', 'value': 'bar'}
            ],
            value='pie',
            inline=True
        ), md=4),

        dbc.Col(html.Div(id='last-refresh', style={'textAlign': 'right'}), md=4)
    ]),

    html.Br(),

    dcc.Interval(id="interval-refresh", interval=30 * 1000, n_intervals=0),

    dcc.Graph(id='sentiment-chart'),

    dash_table.DataTable(
        id='tweet-table',
        columns=[{"name": i, "id": i} for i in ['text', 'cleaned_text', 'sentiment', 'confidence']],
        style_table={'overflowX': 'auto'},
        style_cell={'textAlign': 'left'},
        page_size=10
    ),

    html.Hr(),

    html.H4("📝 Analyze Your Own Tweet"),
    dcc.Input(id='user-input', type='text', placeholder='Type your tweet here...', style={'width': '80%'}),
    html.Button('Analyze', id='analyze-btn', n_clicks=0),
    html.Div(id='prediction-output', style={'marginTop': '10px', 'fontWeight': 'bold', 'color': 'blue'})

], fluid=True)

# ✅ Callback: Main Table & Chart Refresh
@app.callback(
    [Output('tweet-table', 'data'),
     Output('sentiment-chart', 'figure'),
     Output('last-refresh', 'children')],
    [Input('sentiment-filter', 'value'),
     Input('chart-type', 'value'),
     Input('interval-refresh', 'n_intervals')]
)
def update_dashboard(selected_sentiment, chart_type, n):
    df = fetch_data()
    timestamp = f"Last updated: {time.strftime('%H:%M:%S')}"

    if df.empty:
        return [], {}, timestamp

    if selected_sentiment != 'All':
        df = df[df['sentiment'] == selected_sentiment]

    sentiment_counts = df['sentiment'].value_counts().reset_index()
    sentiment_counts.columns = ['sentiment', 'count']

    # Define custom colors
    color_map = {'POSITIVE': 'green', 'NEGATIVE': 'red', 'NEUTRAL': 'gray'}

    if chart_type == 'pie':
        fig = px.pie(
            sentiment_counts,
            values='count',
            names='sentiment',
            title='Sentiment Distribution',
            color='sentiment',
            color_discrete_map=color_map
        )
    else:
        fig = px.bar(
            sentiment_counts,
            x='sentiment',
            y='count',
            title='Sentiment Distribution',
            color='sentiment',
            color_discrete_map=color_map
        )

    return df[['text', 'cleaned_text', 'sentiment', 'confidence']].to_dict('records'), fig, timestamp

# ✅ Callback: Analyze User Input
@app.callback(
    Output('prediction-output', 'children'),
    [Input('analyze-btn', 'n_clicks')],
    [State('user-input', 'value')]
)
def predict_sentiment(n_clicks, user_text):
    if n_clicks > 0 and user_text:
        label, score = classify_tweet(user_text)
        return f"Prediction: {label} ({score:.2%} confidence)"
    return ""

# ✅ Run App
if __name__ == '__main__':
    app.run(debug=False, port=8501)



No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cuda:0


<IPython.core.display.Javascript object>

# 📦 Installing Neo4j Driver to Connect to Aura from Python


In [16]:
!pip install neo4j



# 🔄 Uploading Sentiment Analysis Results to Neo4j Aura

This code block connects to Neo4j Aura using secure credentials, verifies the connection,
and uploads tweets along with their sentiment and confidence scores as graph nodes and relationships.


In [17]:
from neo4j import GraphDatabase

# ✅ Step 1: Neo4j Aura Credentials
NEO4J_URI = "neo4j+s://2e83b9f5.databases.neo4j.io"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "l3VCuoEgZYELmkgkZXgw3u5wn36QjYa1Kk4CG6IDOY0"  # ← REPLACE THIS with your real password from the "Reset Password" button

# ✅ Step 2: Create Driver and Verify Connection
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
driver.verify_connectivity()
print("✅ Connected to Neo4j Aura!")

# ✅ Step 3: Function to Upload Tweets to Neo4j
def upload_to_neo4j(df):
    with driver.session() as session:
        for i, row in df.iterrows():
            session.run("""
                MERGE (t:Tweet {text: $text})
                SET t.cleaned_text = $cleaned_text,
                    t.sentiment = $sentiment,
                    t.confidence = $confidence

                MERGE (s:Sentiment {type: $sentiment})
                MERGE (t)-[:HAS_SENTIMENT]->(s)
            """, {
                "text": row["text"],
                "cleaned_text": row["cleaned_text"],
                "sentiment": row["sentiment"],
                "confidence": float(row["confidence"])
            })
        print(f"✅ Uploaded {len(df)} tweets to Neo4j")

# ✅ Step 4: Run It
upload_to_neo4j(df_pandas)  # assuming df_pandas is your sentiment DataFrame


✅ Connected to Neo4j Aura!
✅ Uploaded 14632 tweets to Neo4j


In [18]:
!pip install dash dash-bootstrap-components neo4j pandas plotly



# 📊 Real-Time Tweet Sentiment Dashboard using Neo4j Aura and Dash
This dashboard connects to Neo4j Aura to visualize sentiment analysis results from tweets.
It supports:
- 🔍 Live keyword-based tweet search
- 📈 Sentiment type counts
- 🌐 An interactive graph view of tweets and their associated sentiments

Built with Python, Dash, Plotly, and Neo4j Aura.


In [19]:
import dash
from dash import dcc, html, Input, Output
import dash_bootstrap_components as dbc
from neo4j import GraphDatabase
import pandas as pd
import plotly.graph_objs as go
import networkx as nx

# ✅ Neo4j Aura connection
driver = GraphDatabase.driver(
    "neo4j+s://2e83b9f5.databases.neo4j.io",
    auth=("neo4j", "l3VCuoEgZYELmkgkZXgw3u5wn36QjYa1Kk4CG6IDOY0")
)

# ✅ Dash app setup
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

app.layout = dbc.Container([
    html.H2("Neo4j Tweet Sentiment Dashboard", className="text-center my-4"),

    dbc.Row([
        dbc.Col([
            html.Label("Search tweets by keyword:"),
            dcc.Input(id="keyword", type="text", value="", placeholder="e.g. happy", className="form-control"),
        ], width=6),
        dbc.Col([
            html.Label("Sentiment Count:"),
            html.Div(id="sentiment-counts", className="mt-2")
        ])
    ], className="mb-4"),

    dbc.Row([
        dbc.Col([
            html.H5("Matching Tweets"),
            html.Div(id="tweet-results", style={"maxHeight": "300px", "overflowY": "scroll"})
        ])
    ]),

    dbc.Row([
        dbc.Col([
            html.H5("Neo4j Graph View"),
            dcc.Graph(id="graph-view")
        ])
    ])
])

# ✅ Neo4j queries
def get_sentiment_counts():
    with driver.session() as session:
        result = session.run("""
            MATCH (:Tweet)-[:HAS_SENTIMENT]->(s:Sentiment)
            RETURN s.type AS sentiment, COUNT(*) AS count
        """)
        return pd.DataFrame([dict(r) for r in result])

def search_tweets(keyword):
    with driver.session() as session:
        result = session.run("""
            MATCH (t:Tweet)-[:HAS_SENTIMENT]->(s:Sentiment)
            WHERE toLower(t.cleaned_text) CONTAINS toLower($kw)
            RETURN t.text AS text, t.cleaned_text AS cleaned, t.sentiment AS sentiment, t.confidence AS confidence
            LIMIT 50
        """, {"kw": keyword})
        return pd.DataFrame([dict(r) for r in result])

def fetch_graph_data():
    with driver.session() as session:
        result = session.run("""
            MATCH (t:Tweet)-[:HAS_SENTIMENT]->(s:Sentiment)
            RETURN t.text AS tweet, s.type AS sentiment
            LIMIT 100
        """)
        return pd.DataFrame([dict(r) for r in result])

# ✅ Build plotly graph
def build_graph(df):
    nodes = []
    edges = []
    seen = set()

    for sentiment in df['sentiment'].unique():
        nodes.append(dict(id=sentiment, label=sentiment, color='blue'))
        seen.add(sentiment)

    for _, row in df.iterrows():
        tweet = row["tweet"]
        sentiment = row["sentiment"]
        if tweet not in seen:
            nodes.append(dict(id=tweet, label=tweet[:40] + "...", color='purple'))
            seen.add(tweet)
        edges.append((tweet, sentiment))

    G = nx.Graph()
    for n in nodes:
        G.add_node(n["id"], label=n["label"], color=n["color"])
    for s, t in edges:
        G.add_edge(s, t)

    pos = nx.spring_layout(G, k=0.5, iterations=50)

    edge_x, edge_y = [], []
    for s, t in G.edges():
        x0, y0 = pos[s]
        x1, y1 = pos[t]
        edge_x += [x0, x1, None]
        edge_y += [y0, y1, None]

    node_x, node_y, node_text, node_color = [], [], [], []
    for node, data in G.nodes(data=True):
        x, y = pos[node]
        node_x.append(x)
        node_y.append(y)
        node_text.append(data['label'])
        node_color.append(data['color'])

    edge_trace = go.Scatter(
        x=edge_x, y=edge_y, line=dict(width=1, color='#888'),
        hoverinfo='none', mode='lines'
    )

    node_trace = go.Scatter(
        x=node_x, y=node_y, mode='markers+text', text=node_text,
        textposition='top center',
        hoverinfo='text',
        marker=dict(color=node_color, size=10, line_width=2)
    )

    return go.Figure(data=[edge_trace, node_trace],
                     layout=go.Layout(
                         showlegend=False,
                         margin=dict(l=10, r=10, t=10, b=10),
                         hovermode='closest'
                     ))

# ✅ Callback
@app.callback(
    Output("sentiment-counts", "children"),
    Output("tweet-results", "children"),
    Output("graph-view", "figure"),
    Input("keyword", "value")
)
def update_dashboard(keyword):
    counts_df = get_sentiment_counts()
    tweets_df = search_tweets(keyword) if keyword else pd.DataFrame()
    graph_df = fetch_graph_data()

    count_list = html.Ul([html.Li(f"{row['sentiment']}: {row['count']}") for _, row in counts_df.iterrows()])
    tweet_list = html.Ul([
        html.Li([
            html.Strong(f"[{row['sentiment']}] "),
            html.Span(row['text']),
            html.Span(f" (conf: {row['confidence']:.2f})", style={"color": "gray", "fontSize": "0.9em"})
        ]) for _, row in tweets_df.iterrows()
    ]) if not tweets_df.empty else "No results."

    return count_list, tweet_list, build_graph(graph_df)

# ✅ Run server (correct version for Dash 2.0+)
if __name__ == "__main__":
    app.run(debug=True)


<IPython.core.display.Javascript object>