## Install Packages and Load Data

In [None]:
#%pip install textblob

In [2]:
from textblob import TextBlob
import pandas as pd
import os
import glob
from pathlib import Path
from tqdm import tqdm
tqdm.pandas()

In [4]:
df = pd.read_csv('data/bertopic_results.csv')

## TextBlob Sentiment Analysis

In [6]:
def textblob_sentiment(text):

    if pd.isna(text):
            return 'neutral', 0.0, 0.0, 'low'
        
    text_str = str(text)
        
    if text_str == '' or text_str == 'nan':
            return 'neutral', 0.0, 0.0, 'low'


    
    try:
        blob = TextBlob(text)
        polarity = blob.sentiment.polarity
        subjectivity = blob.sentiment.subjectivity
        
        if polarity > 0.05:
            sentiment = 'positive'
        elif polarity < -0.05:
            sentiment = 'negative'
        else:
            sentiment = 'neutral'
        
        if subjectivity >= 0.5:
            subjectivity_level = 'high'
        else:
            subjectivity_level = 'low'
            
        return sentiment, polarity, subjectivity, subjectivity_level
    
    except Exception as e:
        print(f"Error processing text: {e}")
        return 'neutral', 0.0, 0.0, 'low'

## Add Sentiment, Sentiment Score, Subjectivity and Subjectivity Score to BERTopic Results DataFrame

In [7]:
df[['sentiment', 'sentiment_score', 'subjectivity_score', 'subjectivity']] = df['text'].progress_apply(
    lambda x: pd.Series(textblob_sentiment(' '.join(x) if isinstance(x, list) else str(x))))


100%|█████████████████████████████████| 402415/402415 [04:27<00:00, 1507.15it/s]


In [8]:
print(df.head())

                                            filename source_type  \
0  data/podcasts/armchair-expert-with-dax-shepard...     podcast   
1  data/podcasts/armchair-expert-with-dax-shepard...     podcast   
2  data/podcasts/armchair-expert-with-dax-shepard...     podcast   
3  data/podcasts/armchair-expert-with-dax-shepard...     podcast   
4  data/podcasts/armchair-expert-with-dax-shepard...     podcast   

                        source_name  \
0  Armchair Expert With Dax Shepard   
1  Armchair Expert With Dax Shepard   
2  Armchair Expert With Dax Shepard   
3  Armchair Expert With Dax Shepard   
4  Armchair Expert With Dax Shepard   

                                               title publish_date  chunk_id  \
0  Acquired Podcast on the NFL (with Ben Gilbert ...   2025-02-05         0   
1  Acquired Podcast on the NFL (with Ben Gilbert ...   2025-02-05         1   
2  Acquired Podcast on the NFL (with Ben Gilbert ...   2025-02-05         2   
3  Acquired Podcast on the NFL (with Ben

In [9]:
textblob_sentiment = df['sentiment'].value_counts()

print(textblob_sentiment)


positive    289236
neutral      84576
negative     28603
Name: sentiment, dtype: int64


## Save DataFrame to CSV

In [10]:
df.to_csv('data/sentiment_analysis_textblob.csv', index=False)