## Import Packages and Load Data

In [1]:
#%pip install vaderSentiment

In [6]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import pandas as pd
import pandas as pd
import os
import glob
from pathlib import Path
from tqdm import tqdm
tqdm.pandas()

In [3]:
df = pd.read_csv('data/bertopic_results.csv')

## VADER Sentiment Analysis

In [8]:
def vader_analysis(text):
    if pd.isna(text) or text == '' or text == 'nan':
        return 'neutral', 0.0
    
    text = str(text)

    analyzer = SentimentIntensityAnalyzer()
    
    try:
        scores = analyzer.polarity_scores(text)
        compound_score = scores['compound']
        
        if compound_score >= 0.05:
            sentiment = 'positive'
        elif compound_score <= -0.05:
            sentiment = 'negative'
        else:
            sentiment = 'neutral'
            
        return sentiment, compound_score
            
    except Exception as e:
        print(f"Error processing text: {e}")
        return 'neutral', 0.0

## Add VADER Sentiment and Scores to BERTopic Results DataFrame

In [9]:
df[['sentiment', 'score']] = df['text'].progress_apply(
    lambda x: pd.Series(vader_analysis(' '.join(x) if isinstance(x, list) else str(x))))


100%|██████████████████████████████████| 402415/402415 [38:27<00:00, 174.36it/s]


In [10]:
print(df.head())

                                            filename source_type  \
0  data/podcasts/armchair-expert-with-dax-shepard...     podcast   
1  data/podcasts/armchair-expert-with-dax-shepard...     podcast   
2  data/podcasts/armchair-expert-with-dax-shepard...     podcast   
3  data/podcasts/armchair-expert-with-dax-shepard...     podcast   
4  data/podcasts/armchair-expert-with-dax-shepard...     podcast   

                        source_name  \
0  Armchair Expert With Dax Shepard   
1  Armchair Expert With Dax Shepard   
2  Armchair Expert With Dax Shepard   
3  Armchair Expert With Dax Shepard   
4  Armchair Expert With Dax Shepard   

                                               title publish_date  chunk_id  \
0  Acquired Podcast on the NFL (with Ben Gilbert ...   2025-02-05         0   
1  Acquired Podcast on the NFL (with Ben Gilbert ...   2025-02-05         1   
2  Acquired Podcast on the NFL (with Ben Gilbert ...   2025-02-05         2   
3  Acquired Podcast on the NFL (with Ben

In [11]:
sentiment_vader = df['sentiment'].value_counts()

print(sentiment_vader)


positive    320039
negative     80844
neutral       1532
Name: sentiment, dtype: int64


## Save to CSV

In [12]:
df.to_csv('data/sentiment_analysis_vader.csv', index=False)