In [1]:
import pandas as pd
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer

In [2]:
# Download VADER if not already installed
nltk.download('vader_lexicon')

# Load the dataset
df = pd.read_csv("/content/Reviews.csv")

# Keep only relevant columns
df = df[['Time', 'Score', 'Text']]
df = df.rename(columns={'Text': 'Review', 'Score': 'Rating', 'Time': 'Timestamp'})

# Convert Timestamp to datetime
df['Timestamp'] = pd.to_datetime(df['Timestamp'], unit='s')

# Initialize Sentiment Analyzer
sia = SentimentIntensityAnalyzer()

# Function to classify sentiment
def classify_sentiment(review):
    score = sia.polarity_scores(str(review))['compound']
    if score > 0.05:
        return 'Positive'
    elif score < -0.05:
        return 'Negative'
    else:
        return 'Neutral'

# Apply sentiment classification
df['Sentiment'] = df['Review'].apply(classify_sentiment)

# Save the cleaned dataset
df.to_csv("cleaned_reviews.csv", index=False)

print("done'cleaned_reviews.csv'.")


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


done'cleaned_reviews.csv'.
