In [None]:
import pandas as pd
from sqlalchemy import create_engine
import os
import sys
import nltk
import re
import textblob
from nltk.sentiment import SentimentIntensityAnalyzer

In [52]:
os.chdir('C:/Users/ashay/OneDrive/Desktop/MastersPrep/Coursework/DatabaseInformationSystemsCIS 5500/Project/tweet-analysis-app') 

In [53]:
load_dotenv(dotenv_path="./backend/.env")

True

In [55]:
# Database connection details
engine = create_engine(os.getenv("POSTGRES_DATABASE_URL"))


In [13]:
query = """
    SELECT tweet_id, tweet, tweet_about
    FROM tweets;
"""

In [14]:
# Load the data into a Pandas DataFrame
with engine.connect() as conn:
    df = pd.read_sql(query, conn)

In [15]:
df.head()

Unnamed: 0,tweet_id,tweet,tweet_about
0,1325117119755849728,We celebrating over here on Capitol Hill! Con...,Biden
1,1321057306868699136,@realDonaldTrump the reason you are hated by t...,Trump
2,1322581932996239360,@EternalMLcrisis @AnandWrites That and the gun...,Trump
3,1324098829361291264,Just #Biden our time until this election is ce...,Biden
4,1324724286272250112,Happy days are here again.\n#Biden https://t.c...,Biden


In [20]:
def preprocess_text(text):
    # Remove URLs and hyperlinks
    text = re.sub(r'http\S+', '', text)

    # Remove mentions and hashtags
    text = re.sub(r'@\w+|#\w+', '', text)

    # Remove special characters, numbers, and punctuations
    text = re.sub(r'[^\w\s]', '', text)
    
    text = text.lower().strip()
    return text

In [21]:
df = df.dropna(subset=['tweet'])

In [22]:
df['cleaned_tweet'] = df['tweet'].apply(preprocess_text)

In [34]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\ashay\AppData\Roaming\nltk_data...


True

In [35]:
sia = SentimentIntensityAnalyzer()

In [36]:
def analyze_sentiment(text):
    scores = sia.polarity_scores(text)
    if scores['compound'] > 0.05:
        return 'positive'
    elif scores['compound'] < -0.05:
        return 'negative'
    else:
        return 'neutral'

In [37]:
df['sentiment'] = df['cleaned_tweet'].apply(analyze_sentiment)

In [39]:
df.head()

Unnamed: 0,tweet_id,tweet,tweet_about,cleaned_tweet,sentiment
0,1325117119755849728,We celebrating over here on Capitol Hill! Con...,Biden,we celebrating over here on capitol hill cong...,positive
1,1321057306868699136,@realDonaldTrump the reason you are hated by t...,Trump,the reason you are hated by the media and demo...,negative
2,1322581932996239360,@EternalMLcrisis @AnandWrites That and the gun...,Trump,that and the gun at their heads named is givi...,negative
3,1324098829361291264,Just #Biden our time until this election is ce...,Biden,just our time until this election is certified,neutral
4,1324724286272250112,Happy days are here again.\n#Biden https://t.c...,Biden,happy days are here again,positive


In [42]:
df_filtered = df[['tweet_id', 'sentiment']]

In [43]:
df_filtered.head()

Unnamed: 0,tweet_id,sentiment
0,1325117119755849728,positive
1,1321057306868699136,negative
2,1322581932996239360,negative
3,1324098829361291264,neutral
4,1324724286272250112,positive


In [None]:
df_filtered.to_csv('sentiment.csv', index=False)