In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import re, unicodedata
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification 
from scipy.special import softmax

In [2]:
MODEL = f"cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL)

In [3]:
reddit_posts = pd.read_csv('data/enha_reddit_posts.csv', sep=",", encoding='utf-8')
reddit_posts = reddit_posts.dropna(subset=['selftext'])
reddit_posts

Unnamed: 0,sub,id,created_date,title,selftext,score,num_comments
23,kpop,1lgundi,2025-06-21 13:45:33,"This Week In KPOP - June 21, 2025",Welcome to [This Week In KPOP](https://i.imgur...,4,1
40,kpop,1l3px4g,2025-06-05 06:07:33,ENHYPEN - DESIRE : UNLEASH (6th Mini Album),#[ENHYPEN - DESIRE : UNLEASH](https://preview....,206,70
47,kpop,1lbfqb2,2025-06-14 20:37:03,"This Week In KPOP - June 14, 2025",Welcome to [This Week In KPOP](https://i.imgur...,32,7
63,kpop,1l5l6zu,2025-06-07 15:45:13,"This Week In KPOP - June 7, 2025",Welcome to [This Week In KPOP](https://i.imgur...,23,1
112,kpop,1kaxjqi,2025-04-29 22:07:08,"The ""2024 in B-sides"" Rate!",[Hello everyone!](https://i.imgur.com/4Hb7mUH....,35,2
...,...,...,...,...,...,...,...
3045,Enhypenthoughts,1hk80lp,2024-12-22 22:19:54,romance untold daydream album pulls!!,IM SO HAPPY WITH EVERYTHING I GOT LIKE AHHHHHH...,92,15
3046,Enhypenthoughts,1hjwcke,2024-12-22 11:58:21,Support??💗,Guys i recently opened a bandlab and i cover s...,32,0
3047,Enhypenthoughts,1hjw9jx,2024-12-22 11:51:40,Fave kpop comeback this year?,This year definitely had its up and downs(more...,19,31
3048,Enhypenthoughts,1hjkpy8,2024-12-21 23:22:47,Enhypens Sunoo,Guys i really don’t know what to say except i ...,175,22


In [4]:
def remove_URL(text):
    """Remove URLs from a sample string"""
    return re.sub(r"http\S+", "", text)

def remove_emoji(text):
    emoji_pattern = re.compile("["
                           u"\U0001F600-\U0001F64F"  # emoticons
                           u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                           u"\U0001F680-\U0001F6FF"  # transport & map symbols
                           u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                           u"\U00002702-\U000027B0"
                           u"\U000024C2-\U0001F251"
                           "]+", flags=re.UNICODE)
    return emoji_pattern.sub(r'', text)

def remove_punctuation(string):
    return re.sub(r'[^\w\s]', '', string)

reddit_posts['processed_text'] = reddit_posts['selftext'].apply(remove_URL)
reddit_posts['processed_text'] = reddit_posts['processed_text'].apply(remove_emoji)
reddit_posts['processed_text'] = reddit_posts['processed_text'].apply(remove_punctuation)
reddit_posts

Unnamed: 0,sub,id,created_date,title,selftext,score,num_comments,processed_text
23,kpop,1lgundi,2025-06-21 13:45:33,"This Week In KPOP - June 21, 2025",Welcome to [This Week In KPOP](https://i.imgur...,4,1,Welcome to This Week In KPOP a collection of e...
40,kpop,1l3px4g,2025-06-05 06:07:33,ENHYPEN - DESIRE : UNLEASH (6th Mini Album),#[ENHYPEN - DESIRE : UNLEASH](https://preview....,206,70,ENHYPEN DESIRE UNLEASH\n\nRelease Date 5 Jun...
47,kpop,1lbfqb2,2025-06-14 20:37:03,"This Week In KPOP - June 14, 2025",Welcome to [This Week In KPOP](https://i.imgur...,32,7,Welcome to This Week In KPOP a collection of e...
63,kpop,1l5l6zu,2025-06-07 15:45:13,"This Week In KPOP - June 7, 2025",Welcome to [This Week In KPOP](https://i.imgur...,23,1,Welcome to This Week In KPOP a collection of e...
112,kpop,1kaxjqi,2025-04-29 22:07:08,"The ""2024 in B-sides"" Rate!",[Hello everyone!](https://i.imgur.com/4Hb7mUH....,35,2,Hello everyone Weve explored the singles of KP...
...,...,...,...,...,...,...,...,...
3045,Enhypenthoughts,1hk80lp,2024-12-22 22:19:54,romance untold daydream album pulls!!,IM SO HAPPY WITH EVERYTHING I GOT LIKE AHHHHHH...,92,15,IM SO HAPPY WITH EVERYTHING I GOT LIKE AHHHHHH...
3046,Enhypenthoughts,1hjwcke,2024-12-22 11:58:21,Support??💗,Guys i recently opened a bandlab and i cover s...,32,0,Guys i recently opened a bandlab and i cover s...
3047,Enhypenthoughts,1hjw9jx,2024-12-22 11:51:40,Fave kpop comeback this year?,This year definitely had its up and downs(more...,19,31,This year definitely had its up and downsmore ...
3048,Enhypenthoughts,1hjkpy8,2024-12-21 23:22:47,Enhypens Sunoo,Guys i really don’t know what to say except i ...,175,22,Guys i really dont know what to say except i l...


In [5]:
example = reddit_posts['processed_text'].iloc[1]
print(example)

ENHYPEN  DESIRE  UNLEASH

Release Date 5 June 2025



Track  Lyrics  Composed  Arranged by

01 Flashover  Tyler Spry Akil worldwidefresh King Kareen Lomax Joel Corry ARMADILLO RANGA Lee Ji Won MUMW Oh Hyun Sun lalala studio Cho Yoon Kyung Maryjane lalala studio Shizu danke Bae Hyun Sung MUMW
02 Bad Desire With or Without You  Audio  Cirkut JBACH Jessica Agombar hitman bang Slow Rabbit ARMADILLO Jang Yeo Jin lalala studio Peridot
03 Outside  Supreme Boi Hiss noise Slush Puppy martin Pink Slip Anthony Watts Gino The Ghost 329 PNP Lee Green lalala studio Lee Hyung Seok PNP Lim Su Ran lalala studio  MUMW
04 Loose Korean Ver  Tony Esterly Josh Allen Soaky Siren Anthony Watts ARMADILLO Shizu Cha Yu Bin danke Jang Jung Won Jamfactory Kim In Hyung SSAC MUMW Jang Seung Min lalala studio Kim Soo Ji lalala studio Jinri Full8loom Lee Ji Yeon lalala studio Yoon Ye Ji PNP
05 Helium  FRANTS Jay Edwin Honoret Jeremiah Daly Jesse Thomas  MUMW hitman bang ARMADILLO Peridot Shizu bay 153Joombas Choi May 

In [6]:
def polarity_scores_roberta(example):
    encoded_text = tokenizer(
        example,
        return_tensors='pt',
        truncation=True,
        padding=True,
        max_length=512
    )
    output = model(**encoded_text)
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)
    scores_dict = {
        "roberta_neg" : scores[0],
        "roberta_neu" : scores[1],
        "roberta_pos" : scores[2]
    }
    return scores_dict

In [7]:
res = {}
for _, row in reddit_posts.iterrows(): 
    try :
        text = row['processed_text']
        myID = row['id']
        roberta_result = polarity_scores_roberta(text)
        res[myID] = roberta_result
    except RuntimeError :
        print(f"Broke for {myID}")

In [8]:
roberta_df = pd.DataFrame(res).T
roberta_df.reset_index(inplace=True)
roberta_df.rename(columns={"index": "id"}, inplace=True)

# Merge with reddit_posts if needed
reddit_posts = reddit_posts.merge(roberta_df, on="id", how="inner")

In [10]:
reddit_posts.to_csv('roberta_results.csv', sep = '\t')

In [12]:
roberta_results = pd.read_csv('roberta_results.csv', sep="\t", encoding='utf-8')

In [13]:
roberta_results

Unnamed: 0.1,Unnamed: 0,sub,id,created_date,title,selftext,score,num_comments,processed_text,roberta_neg,roberta_neu,roberta_pos
0,0,kpop,1lgundi,2025-06-21 13:45:33,"This Week In KPOP - June 21, 2025",Welcome to [This Week In KPOP](https://i.imgur...,4,1,Welcome to This Week In KPOP a collection of e...,0.225868,0.711950,0.062182
1,1,kpop,1l3px4g,2025-06-05 06:07:33,ENHYPEN - DESIRE : UNLEASH (6th Mini Album),#[ENHYPEN - DESIRE : UNLEASH](https://preview....,206,70,ENHYPEN DESIRE UNLEASH\n\nRelease Date 5 Jun...,0.083048,0.873363,0.043589
2,2,kpop,1lbfqb2,2025-06-14 20:37:03,"This Week In KPOP - June 14, 2025",Welcome to [This Week In KPOP](https://i.imgur...,32,7,Welcome to This Week In KPOP a collection of e...,0.176815,0.747407,0.075778
3,3,kpop,1l5l6zu,2025-06-07 15:45:13,"This Week In KPOP - June 7, 2025",Welcome to [This Week In KPOP](https://i.imgur...,23,1,Welcome to This Week In KPOP a collection of e...,0.161751,0.765426,0.072823
4,4,kpop,1kaxjqi,2025-04-29 22:07:08,"The ""2024 in B-sides"" Rate!",[Hello everyone!](https://i.imgur.com/4Hb7mUH....,35,2,Hello everyone Weve explored the singles of KP...,0.037346,0.735257,0.227397
...,...,...,...,...,...,...,...,...,...,...,...,...
2462,2462,Enhypenthoughts,1hk80lp,2024-12-22 22:19:54,romance untold daydream album pulls!!,IM SO HAPPY WITH EVERYTHING I GOT LIKE AHHHHHH...,92,15,IM SO HAPPY WITH EVERYTHING I GOT LIKE AHHHHHH...,0.001658,0.007019,0.991322
2463,2463,Enhypenthoughts,1hjwcke,2024-12-22 11:58:21,Support??💗,Guys i recently opened a bandlab and i cover s...,32,0,Guys i recently opened a bandlab and i cover s...,0.001091,0.020677,0.978233
2464,2464,Enhypenthoughts,1hjw9jx,2024-12-22 11:51:40,Fave kpop comeback this year?,This year definitely had its up and downs(more...,19,31,This year definitely had its up and downsmore ...,0.007349,0.114040,0.878611
2465,2465,Enhypenthoughts,1hjkpy8,2024-12-21 23:22:47,Enhypens Sunoo,Guys i really don’t know what to say except i ...,175,22,Guys i really dont know what to say except i l...,0.004122,0.015525,0.980354
