### Sentiment Analysis

In [55]:
# Import Libraries

from nltk.sentiment import SentimentIntensityAnalyzer
import nltk
nltk.download("vader_lexicon")
nltk.download('punkt')
nltk.download('stopwords')
import numpy as np
import pandas as pd
import statistics
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report
import dash
from dash import dcc, html
import plotly.express as px
from nltk.corpus import stopwords
from collections import Counter
import string



[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\aad.sray\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\aad.sray\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [71]:
# Import data and pre- process data

df = pd.read_csv('db2023_cleandf.csv')
df['ID'] = range(1, 480)

df.drop(columns=['Time'], inplace=True)

df.head()


Unnamed: 0,Comment,Score,ID
0,Posting a bit early so people know where to go...,1,1
1,20% fare reduction on public transport continu...,39,2
2,This is good.,3,3
3,"Aaaaaahhhhh, what have the Greens done for us!",2,4
4,"€67 million to provide 16,000 craft apprentice...",25,5


In [72]:
type(df)

pandas.core.frame.DataFrame

In [73]:
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
import statistics

# Download the required NLTK resources
nltk.download('punkt')
nltk.download('vader_lexicon')

# Create SentimentIntensityAnalyzer instance
sia = SentimentIntensityAnalyzer()

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\aad.sray\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\aad.sray\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


### Calculate sentiment scores and word frequencies

In [74]:
# Function to calculate sentiment scores
def calculate_sentiment_scores(comment):
    sia = SentimentIntensityAnalyzer()
    sentiment_score = sia.polarity_scores(comment)['compound']
    return sentiment_score

# Function to calculate word frequencies
def process_comments_and_get_metrics(df):
    comments = ' '.join(df['Comment']).lower()
    comments = comments.translate(str.maketrans('', '', string.punctuation))  
    word_list = comments.split()

    # Remove stop words
    stop_words = set(stopwords.words('english'))
    word_list = [word for word in word_list if word not in stop_words]

    # Calculate sentiment scores for each comment
    df['Sentiment'] = df['Comment'].apply(calculate_sentiment_scores)
    
    # Calculate word frequencies
    word_freq = Counter(word_list)
    
    # Ensure the lengths match
    common_length = min(len(word_freq), len(df))
    
    return pd.DataFrame({
        'Word': list(word_freq.keys())[:common_length],
        'Frequency': list(word_freq.values())[:common_length],
        'Sentiment': df['Sentiment'].iloc[:common_length].tolist()  
    })

### Visualisations for Word Frequency and Sentiment Analysis

In [77]:
# Word Frequency and Sentiment analysis


# Create Dash App 
app = dash.Dash(__name__)


# Get df metrics
df_metrics = process_comments_and_get_metrics(df)

# Layout App
app.layout = html.Div(children=[
    html.H1("Word Sentiment Analysis - Sentiment and Frequency"),

    # Plotly Graph 
    dcc.Graph(
        id='scatter-plot',
        figure=px.scatter(
            df_metrics,
            x='Frequency',
            y='Sentiment',
            text='Word',
            title='Sentiment and Frequency Scatter Plot',
            color='Frequency',  # Set color based on Frequency
            color_continuous_scale='plasma',  # Set color scale to 'plasma'
        ).update_traces(mode='markers', marker=dict(size=10))
    )
])

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True, port=8062)
