In [4]:
# Import necessary libraries
import pandas as pd
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer

In [5]:
!pip install nltk



In [6]:
# Download the VADER lexicon (only needed once)
nltk.download('vader_lexicon')

# Load the dataset
data = pd.read_csv('aggregate.csv')

headlines = data['headline']

# Initialize the VADER sentiment analyzer
sia = SentimentIntensityAnalyzer()

# Function to calculate sentiment scores
def get_sentiment_scores(text):
    scores = sia.polarity_scores(text)
    return scores['compound']  # Use the compound score for overall sentiment

# Apply the function to each headline
data['sentiment_score'] = headlines.apply(get_sentiment_scores)

# Categorize sentiment based on the compound score
def categorize_sentiment(score):
    if score > 0.05:
        return 'positive'
    elif score < -0.05:
        return 'negative'
    else:
        return 'neutral'

data['sentiment_category'] = data['sentiment_score'].apply(categorize_sentiment)

# Save the results to a new CSV file
data.to_csv('sentiment_scored_VADER.csv', index=False)

# Print sample results
print(data.head())

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\sridharv2010\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


                                            headline stock        date  \
0  AI Daily: Analyst sees Apple  Alibaba partners...  aapl  2025-03-03   
1  Apple’s iPhone 16e Is Likely to Underwhelm  Sa...  aapl  2025-03-03   
2  Apple CEO teases ‘something in the Air’ this week  aapl  2025-03-03   
3  Apple’s iPhone ceded market share in China  Eu...  aapl  2025-03-03   
4  Apple (AAPL): New Buy Recommendation for This ...  aapl  2025-03-03   

     open   close  sentiment_score sentiment_category  
0  241.79  238.03           0.3818           positive  
1  241.79  238.03           0.0000            neutral  
2  241.79  238.03          -0.2960           negative  
3  241.79  238.03           0.2960           positive  
4  241.79  238.03           0.0000            neutral  
