# Sentiment Labelling using VADER

In [None]:
import torch

print("Number of GPU: ", torch.cuda.device_count())
print("GPU Name: ", torch.cuda.get_device_name())


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

In [None]:
import pandas as pd

df = pd.read_csv('../data/preprocessed/review_tiktok_dataset_preprocessed.csv')
df.head()

In [None]:
from googletrans import Translator
from tqdm import tqdm

translator = Translator()
tqdm.pandas()

def translate_comment(comment):
    if isinstance(comment, str) and comment.strip():
        try:
            return translator.translate(comment, src='id', dest='en').text
        except Exception as e:
            print(f"Translation failed: {e}")
    return comment  


df['translated_content'] = df['content'].progress_apply(translate_comment)

In [None]:
df.to_csv('../data/preprocessed/review_tiktok_dataset_translated.csv', index=False)

In [None]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from tqdm import tqdm

analyzer = SentimentIntensityAnalyzer()
tqdm.pandas()

def get_sentiment_score(comment):
    if isinstance(comment, str) and comment.strip():
        return analyzer.polarity_scores(comment)['compound']
    return 0.0

df['sentiment_score'] = df['translated_content'].progress_apply(get_sentiment_score)

In [None]:
def sentiment_label(sentiment_score):
    if sentiment_score >= 0.05:
        return 'positive'
    elif sentiment_score <= -0.05:
        return 'negative'
    else:
        return 'neutral'
    
df['sentiment_label'] = df['sentiment_score'].apply(sentiment_label)

In [None]:
df.head()

In [None]:
df.to_csv('../data/preprocessed/review_tiktok_dataset_vader_sentiment.csv', index=False)