In [1]:
import pandas as pd
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [2]:
df=pd.read_csv("./Datasets/Headlines/News_headlines.csv", parse_dates=["Date"])
df

Unnamed: 0,Date,Headlines
0,2015-01-01,What Can We Expect From Apple Inc. In 2015? 1 ...
1,2015-02-01,What to expect from Apple in 2015 beyond its s...
2,2015-03-01,Mountie: An inexpensive and innovative way to ...
3,2015-04-01,"Donald Yacktman on the Sources of Moats, His C..."
4,2015-05-01,"SIM-free iPhone 6, 6 Plus reportedly debuting ..."
...,...,...
2360,2021-06-19,Best pre-Prime Day smartwatch deals on Apple W...
2361,2021-06-20,iPhone 13 rumors: As Apple's fall event gets c...
2362,2021-06-21,Apple is shortening new free TV Plus trials fr...
2363,2021-06-22,Google likely to soon face antitrust claims ov...


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2365 entries, 0 to 2364
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       2365 non-null   datetime64[ns]
 1   Headlines  2365 non-null   object        
dtypes: datetime64[ns](1), object(1)
memory usage: 37.1+ KB


In [4]:
vader = SentimentIntensityAnalyzer()

scores = df['Headlines'].apply(vader.polarity_scores).tolist()

scores_df = pd.DataFrame(scores)

df = df.join(scores_df, rsuffix='_right')

df.head()

Unnamed: 0,Date,Headlines,neg,neu,pos,compound
0,2015-01-01,What Can We Expect From Apple Inc. In 2015? 1 ...,0.069,0.82,0.111,0.2263
1,2015-02-01,What to expect from Apple in 2015 beyond its s...,0.041,0.849,0.11,0.9881
2,2015-03-01,Mountie: An inexpensive and innovative way to ...,0.124,0.763,0.112,0.1381
3,2015-04-01,"Donald Yacktman on the Sources of Moats, His C...",0.074,0.874,0.052,-0.533
4,2015-05-01,"SIM-free iPhone 6, 6 Plus reportedly debuting ...",0.022,0.843,0.134,0.9973


In [5]:
def get_vader_score(sentence): 
    compound=vader.polarity_scores(sentence)['compound']
    if compound > 0.05: 
        return 1
    elif (compound >= -0.05) and (compound <=0.05): 
        return None
    else: 
        return 0
df['Vader_label']=df.apply(lambda x: get_vader_score(x['Headlines']), axis=1)

In [6]:
df.iloc[600:620]

Unnamed: 0,Date,Headlines,neg,neu,pos,compound,Vader_label
600,2016-08-23,Halliburton Silently Drops With Oil Will Apple...,0.058,0.833,0.109,0.989,1.0
601,2016-08-24,How iPhone Can Make or Break Apple Stock Forme...,0.053,0.842,0.105,0.9927,1.0
602,2016-08-25,UPDATE 1-Apple fixes security flaw after UAE d...,0.06,0.815,0.125,0.9954,1.0
603,2016-08-26,Apple CEO Tim Cook Sells $36 Million From Bloc...,0.108,0.813,0.08,-0.9508,0.0
604,2016-08-27,Report: Apple Inc. May Tap Intel Corporation f...,0.035,0.867,0.097,0.9042,1.0
605,2016-08-28,Apple&quot;s MacBook offers great battery life...,0.124,0.732,0.144,0.4871,1.0
606,2016-08-29,A Sneak Peek at the Apple Inc. iPhone 7 and 7 ...,0.056,0.884,0.06,0.077,1.0
607,2016-08-30,"Here&quot;s How Apple, Inc. Pays Taxes Oversea...",0.087,0.832,0.081,0.9481,1.0
608,2016-08-31,Did Apple Reveal iPhone 7 Plus Camera Secret? ...,0.065,0.875,0.061,-0.6631,0.0
609,2016-01-09,Apple&quot;s VR Ambitions Revealed Ahead Of Ri...,0.074,0.86,0.066,-0.9456,0.0


In [7]:
df.isna().sum()

Date            0
Headlines       0
neg             0
neu             0
pos             0
compound        0
Vader_label    14
dtype: int64

In [8]:
df = df.dropna()

In [9]:
df.isna().sum()

Date           0
Headlines      0
neg            0
neu            0
pos            0
compound       0
Vader_label    0
dtype: int64

In [10]:
df['Vader_label'] = df['Vader_label'].astype(int)

In [11]:
df

Unnamed: 0,Date,Headlines,neg,neu,pos,compound,Vader_label
0,2015-01-01,What Can We Expect From Apple Inc. In 2015? 1 ...,0.069,0.820,0.111,0.2263,1
1,2015-02-01,What to expect from Apple in 2015 beyond its s...,0.041,0.849,0.110,0.9881,1
2,2015-03-01,Mountie: An inexpensive and innovative way to ...,0.124,0.763,0.112,0.1381,1
3,2015-04-01,"Donald Yacktman on the Sources of Moats, His C...",0.074,0.874,0.052,-0.5330,0
4,2015-05-01,"SIM-free iPhone 6, 6 Plus reportedly debuting ...",0.022,0.843,0.134,0.9973,1
...,...,...,...,...,...,...,...
2360,2021-06-19,Best pre-Prime Day smartwatch deals on Apple W...,0.030,0.804,0.167,0.9949,1
2361,2021-06-20,iPhone 13 rumors: As Apple's fall event gets c...,0.019,0.841,0.140,0.9951,1
2362,2021-06-21,Apple is shortening new free TV Plus trials fr...,0.040,0.789,0.172,0.9994,1
2363,2021-06-22,Google likely to soon face antitrust claims ov...,0.028,0.837,0.135,0.9979,1


In [None]:
# df1 = df[['Date', 'Vader_label', 'Headlines']].copy()