In [9]:
import pandas as pd
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# Load the dataset
df = pd.read_csv('vadarfull.csv', encoding='latin1')

# Check for missing values in the 'review' column
missing_reviews = df[df['review'].isna()]

# Drop rows with missing 'review' values
df.dropna(subset=['review'], inplace=True)

# Initialize the VADER sentiment analyzer
sia = SentimentIntensityAnalyzer()

# Function to classify sentiment
def classify_sentiment(text):
    sentiment_scores = sia.polarity_scores(text)
    compound_score = sentiment_scores['compound']
    
    if compound_score >= 0.05:
        return 'positive'
    elif compound_score <= -0.05:
        return 'negative'
    else:
        return 'neutral'

# Apply sentiment classification to the 'review' column
df['Sentiment'] = df['review'].apply(classify_sentiment)

# Split the dataset into separate dataframes for positive, negative, and neutral reviews
positive_reviews = df[df['Sentiment'] == 'positive']
negative_reviews = df[df['Sentiment'] == 'negative']
neutral_reviews = df[df['Sentiment'] == 'neutral']

# Optionally, you can save these dataframes to separate CSV files if needed
positive_reviews.to_csv('positive_reviews.csv', index=False)
negative_reviews.to_csv('negative_reviews_vadar1.csv', index=False)
neutral_reviews.to_csv('neutral_reviews.csv', index=False)


In [4]:
import nltk.data
print(nltk.data.path)


['/Users/nekdilkhan/nltk_data', '/Users/nekdilkhan/miniforge3/nltk_data', '/Users/nekdilkhan/miniforge3/share/nltk_data', '/Users/nekdilkhan/miniforge3/lib/nltk_data', '/usr/share/nltk_data', '/usr/local/share/nltk_data', '/usr/lib/nltk_data', '/usr/local/lib/nltk_data']
