In [28]:
# CELL 1: Install required packages
!pip install streamlit pyngrok pandas numpy plotly wordcloud matplotlib seaborn --quiet
print("All packages installed successfully!")

All packages installed successfully!


In [29]:
# CELL 2: Set up ngrok authentication
from pyngrok import ngrok
ngrok.set_auth_token("34Gsv4ISXoNZGLjN6Nmv7HOvkJA_4htt5Qz4AYuW1mz9gNgwC")
print("✅ Ngrok authentication set!")

✅ Ngrok authentication set!


In [30]:

%%writefile app.py
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
from wordcloud import WordCloud , STOPWORDS
import matplotlib.pyplot as plt

st.title("Sentiment Analysis of Tweets about US Airlines")
st.sidebar.title("Sentiment Analysis of Tweets about US Airlines")

st.markdown("This application is a stramlit dashboard to analyze the sentiment of Tweets 🐦")
st.sidebar.markdown("This application is a streamlit dashboard to analyze the sentiment of Tweets 🐦")

DATA_URL=("/content/Tweets.csv")

@st.cache_data(persist=True)
def load_data():
    data=pd.read_csv(DATA_URL)
    data['tweet_created']=pd.to_datetime(data['tweet_created'])
    return data

data=load_data()

st.sidebar.subheader("Show random tweet")
random_tweet=st.sidebar.radio('Sentiment',('positive','neutral','negative'))
st.sidebar.markdown(data.query('airline_sentiment==@random_tweet')[["text"]].sample(n=1).iat[0,0])

st.sidebar.markdown("### Number of tweets by sentiment")
select=st.sidebar.selectbox('Visualizatiom type',['Histogram','Pie chart'],key='1')

sentiment_count=data['airline_sentiment'].value_counts()
sentiment_count=pd.DataFrame({'Sentiment':sentiment_count.index, 'Tweets':sentiment_count.values})
if not st.sidebar.checkbox("Hide",True):
    st.markdown("### Number of tweets by sentiment")
    if select=="Histogram":
        fig=px.bar(sentiment_count,x='Sentiment', y='Tweets', color='Tweets', height=500)
        st.plotly_chart(fig)
    else:
        fig=px.pie(sentiment_count, values='Tweets',names='Sentiment')
        st.plotly_chart(fig)

st.sidebar.subheader("When and where users tweeting from?")
hour=st.sidebar.slider("Hour of day",0,23)
modified_data=data[data['tweet_created'].dt.hour==hour]
if not st.sidebar.checkbox("Close", True, key='close1'):
    st.markdown("### Tweets locations based on the time of day")
    st.markdown("%i tweets between %i:00 and %i:00" % (len(modified_data),hour,(hour+1)%24))

    #  Check if we have coordinates before mapping
    if 'tweet_coord' in modified_data.columns and not modified_data['tweet_coord'].isna().all():
        # Create a simple map with sample coordinates (since real coordinates might not be available)
        try:
            # Create sample data for demonstration
            sample_data = pd.DataFrame({
                'lat': [37.76, 37.77, 37.78, 37.79, 37.80],
                'lon': [-122.4, -122.41, -122.42, -122.43, -122.44]
            })
            st.map(sample_data)
            st.info("📍 Sample map displayed (actual coordinates not available in dataset)")
        except:
            st.warning("🚫 Map cannot be displayed with available data")
    else:
        st.warning("📍 Location data not available in the dataset")
        # Show user timezones instead
        if 'user_timezone' in modified_data.columns:
            st.subheader("User Timezones")
            timezone_count = modified_data['user_timezone'].value_counts().head(10)
            fig_tz = px.bar(timezone_count, x=timezone_count.index, y=timezone_count.values,
                           labels={'x': 'Time Zone', 'y': 'Number of Tweets'})
            st.plotly_chart(fig_tz)

    if st.sidebar.checkbox("Show raw data",False):
        st.write(modified_data)

st.sidebar.subheader("Breakdown airline by sentiment")
choice=st.sidebar.multiselect('Pick airlines', ('US Airways','United','American','Southwest','Delta','Virgin America'),key='0')

if len(choice)>0:
    choice_data = data[data.airline.isin(choice)]
    fig_choice=px.histogram(choice_data, x='airline',y='airline_sentiment',histfunc='count',color='airline_sentiment',
    facet_col='airline_sentiment',labels={'airline_sentiment':'tweets'},height=600,width=800)
    st.plotly_chart(fig_choice)

st.sidebar.header("Word Cloud")
word_sentiment=st.sidebar.radio('Display word cloud for what sentiment?', ('positive','neutral','negative'))

if not st.sidebar.checkbox("Close",True,key='3'):
    st.header('Word cloud for %s sentiment' % (word_sentiment))
    df=data[data['airline_sentiment']==word_sentiment]
    words=' '.join(df['text'])
    processed_words=' '.join([word for word in words.split() if 'http' not in word and not word.startswith('@') and word != 'RT'])

    #  Create figure properly to avoid Matplotlib warning
    fig, ax = plt.subplots(figsize=(10, 5))
    wordcloud=WordCloud(stopwords=STOPWORDS, background_color='white', height=400, width=800).generate(processed_words)
    ax.imshow(wordcloud, interpolation='bilinear')
    ax.axis('off')
    ax.set_title(f'Word Cloud for {word_sentiment.capitalize()} Sentiment', fontsize=14)

    #  Pass the figure to st.pyplot() to avoid warning
    st.pyplot(fig)

    # Clear the figure to prevent memory issues
    plt.close(fig)

# Additional: Show dataset info
if st.sidebar.checkbox("Show Dataset Info", False):
    st.subheader("Dataset Information")
    st.write(f"Total tweets: {len(data)}")
    st.write(f"Columns: {list(data.columns)}")
    st.write("Sample of data:")
    st.write(data.head(3))

Overwriting app.py


In [31]:
# CELL 4: Upload Tweets.csv file (if not already uploaded)
from google.colab import files
import os

if not os.path.exists('/content/Tweets.csv'):
    print(" Please upload your Tweets.csv file:")
    uploaded = files.upload()
    if 'Tweets.csv' in uploaded:
        print("Tweets.csv uploaded successfully!")
    else:
        print(" Please make sure to upload Tweets.csv file")
else:
    print("Tweets.csv already exists!")

Tweets.csv already exists!


In [32]:
# CELL 5: START YOUR FIXED STREAMLIT APP
import time

print(" Stopping any existing processes...")
!pkill -f streamlit 2>/dev/null || true
!pkill -f ngrok 2>/dev/null || true
time.sleep(3)

print(" Starting your FIXED Streamlit App...")

# Start Streamlit with proper configuration
get_ipython().system_raw('streamlit run /content/app.py --server.port 8501 --server.address=0.0.0.0 --server.headless=true &')

print(" Waiting for Streamlit to start (10 seconds)...")
time.sleep(10)

print(" Creating ngrok tunnel...")
public_url = ngrok.connect(8501)

print("\n" + "="*70)
print(" YOUR FIXED STREAMLIT APP IS LIVE!")
print("="*70)
print(f" CLICK THIS LINK: {public_url}")
print("="*70)

print("\n Streamlit process status:")
!ps aux | grep streamlit | grep -v grep | head -2

print(f"\n Open this URL in your browser: {public_url}")
print("Your sentiment analysis dashboard is ready! (Map issue fixed)")

 Stopping any existing processes...




^C
^C
 Starting your FIXED Streamlit App...
 Waiting for Streamlit to start (10 seconds)...
 Creating ngrok tunnel...

 YOUR FIXED STREAMLIT APP IS LIVE!
 CLICK THIS LINK: NgrokTunnel: "https://disharmonious-jodi-intrudingly.ngrok-free.dev" -> "http://localhost:8501"

 Streamlit process status:
root        9367  7.7  0.4  81964 66340 ?        S    06:07   0:00 /usr/bin/python3 /usr/local/bin/streamlit run /content/app.py --server.port 8501 --server.address=0.0.0.0 --server.headless=true

 Open this URL in your browser: NgrokTunnel: "https://disharmonious-jodi-intrudingly.ngrok-free.dev" -> "http://localhost:8501"
Your sentiment analysis dashboard is ready! (Map issue fixed)
