In [1]:
import pandas as pd
import numpy as np

In [3]:
fname = 'dataset/nasdaq/overnight_sentiments.csv'

data = pd.read_csv( fname, index_col = 0)
data.head(3)

Unnamed: 0,datetime,stockcode,source,headline,article,urls,dt,IsMarketOpen,TradeDate,summary,_relevance,_sentiment
0,"February 07, 2019, 09:11:00 PM EDT",AMZN,RTTNews,Amazon CEO Jeff Bezos Accuses National Enquire...,\n\n\nShutterstock photo\n\n@media screen and ...,https://www.nasdaq.com/article/amazon-ceo-jeff...,2019-02-07 21:11:00-05:00,False,2019-02-08,Shutterstock photo@media screen and (Amazon CE...,11,0.717833
2,"February 07, 2019, 06:55:00 PM EDT",AMZN,Reuters,Amazon's Bezos says National Enquirer owner tr...,\n\n\nReuters\n\n@media screen and (max-device...,https://www.nasdaq.com/article/amazons-bezos-s...,2019-02-07 18:55:00-05:00,False,2019-02-08,"Jeff Bezos, chief executive of Amazon.com Inc,...",5,0.34276
3,"February 07, 2019, 06:26:00 PM EDT",AMZN,Reuters,Amazon's Bezos says National Enquirer tried to...,\n\n\nReuters\n\n@media screen and (max-device...,https://www.nasdaq.com/article/amazons-bezos-s...,2019-02-07 18:26:00-05:00,False,2019-02-08,"Jeff Bezos, chief executive of Amazon.com Inc,...",5,0.45036


In [26]:
def GetAvgSentiment( l_sentiments, l_relevances, threshold = 1, method = 'simple'):
    '''
    Given a list of sentiments and corresponding relevance score, this function returns...
    1) "simple": equally weighted average sentiment score for sentiments' with relevance >= "threshold"
    2) "weighted": weighted average sentiment score
    '''
    
    if method == 'simple':
        # --- Method 1 ---
        # simple average sentiment with threhold cut off
    
        l_senti = []

        for i, s_ in enumerate(l_sentiments):
            if l_relevances[i] > threshold:
                l_senti.append(s_)

        avg_score = np.mean(l_senti)
        return avg_score
    
    elif method == 'weighted':
        # --- Method 2 ---
        # weighted average senti w.r.t. relevance
        
        if sum(l_relevances) == 0:
            return np.nan
        
        weights = [rel/ sum(l_relevances) for rel in l_relevances]
        avg_score = 0

        for i, s_ in enumerate(l_sentiments):
            avg_score += s_ * weights[i]
        
        return avg_score
    else:
        print(f'Method = {method} is not available')
        return None

In [29]:
l_tickers = data['stockcode'].unique()
l_to_df = []

for stock in l_tickers:
    print(f'Calculating Daily Sentiments for {stock}...')
    idf = data[data['stockcode'] == stock]
    l_td = idf['TradeDate'].unique()
    
    for idate in l_td:
        inews = idf[idf['TradeDate']== idate]
        inews = inews.reset_index()
        
        ss = GetAvgSentiment(inews['_sentiment'], inews['_relevance'], threshold = 1, method = 'simple')
        l_to_df.append(
            [stock, idate, ss]
        )

Calculating Daily Sentiments for AMZN...
Calculating Daily Sentiments for GOOGL...


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


Calculating Daily Sentiments for FB...
Calculating Daily Sentiments for NFLX...


In [17]:
data[
    data['stockcode'] == 'AMZN' 
][ data['TradeDate'] == '2018-11-20']

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,datetime,stockcode,source,headline,article,urls,dt,IsMarketOpen,TradeDate,summary,_relevance,_sentiment
1629,"November 20, 2018, 09:11:00 AM EDT",AMZN,Reuters,"US STOCKS-Retail gloom, tech weakness pin down...",\n\n\nShutterstock photo\n\n@media screen and ...,https://www.nasdaq.com/article/us-stocksretail...,2018-11-20 09:11:00-05:00,False,2018-11-20,Apple Inc'sAPPL.O 3.8 percent fall added to th...,0,0.22222
1630,"November 20, 2018, 09:11:00 AM EDT",AMZN,Reuters,"Retail gloom, tech weakness pin down Wall Street",\n\n\nReuters\n\n@media screen and (max-device...,https://www.nasdaq.com/article/retail-gloom-te...,2018-11-20 09:11:00-05:00,False,2018-11-20,Home improvement chain Lowe's Cos Inc fell 2....,0,-0.002
1631,"November 20, 2018, 07:29:00 AM EDT",AMZN,Reuters,"Best Buy boosts profit forecast, cautious on h...",\n\n\nReuters\n\n@media screen and (max-device...,https://www.nasdaq.com/article/best-buy-boosts...,2018-11-20 07:29:00-05:00,False,2018-11-20,Best Buy Co Inc raised its full-year earnings ...,0,0.806525
1632,"November 20, 2018, 07:29:00 AM EDT",AMZN,Reuters,Best Buy boosts profit forecast after sales beat,\n\n\nReuters\n\n@media screen and (max-device...,https://www.nasdaq.com/article/best-buy-boosts...,2018-11-20 07:29:00-05:00,False,2018-11-20,Best Buy Co Inc raised its full-year earnings ...,0,0.806525
1633,"November 20, 2018, 07:25:00 AM EDT",AMZN,Reuters,"Target 3rd-qtr comparable sales, earnings miss...",\n\n\nReuters\n\n@media screen and (max-device...,https://www.nasdaq.com/article/target-3rdqtr-c...,2018-11-20 07:25:00-05:00,False,2018-11-20,Target Corp said on Tuesday that third-quarte...,0,0.4296
1634,"November 20, 2018, 07:18:00 AM EDT",AMZN,Reuters,US STOCKS-Futures drop as Apple-led tech slide...,\n\n\nShutterstock photo\n\n@media screen and ...,https://www.nasdaq.com/article/us-stocksfuture...,2018-11-20 07:18:00-05:00,False,2018-11-20,U.S. stock futures dropped by up to 1percent o...,0,0.0877
1635,"November 20, 2018, 07:18:00 AM EDT",AMZN,Reuters,Futures drop as Apple-led tech slide deepens,\n\n\nReuters\n\n@media screen and (max-device...,https://www.nasdaq.com/article/futures-drop-as...,2018-11-20 07:18:00-05:00,False,2018-11-20,U.S. stock futures dropped by up to 1 percent ...,0,-0.0198
1636,"November 20, 2018, 07:10:00 AM EDT",AMZN,Reuters,"Kohl's tops same-store sales estimates, raises...",\n\n\nReuters\n\n@media screen and (max-device...,https://www.nasdaq.com/article/kohls-tops-same...,2018-11-20 07:10:00-05:00,False,2018-11-20,"especially on any results less than stellar,"" ...",0,0.23625
1637,"November 19, 2018, 04:39:00 PM EDT",AMZN,Reuters,"US STOCKS-Wall St tumbles as Apple, internet s...",\n\n\nShutterstock photo\n\n@media screen and ...,https://www.nasdaq.com/article/us-stockswall-s...,2018-11-19 16:39:00-05:00,False,2018-11-20,"Shares of FacebookFB.O were down 5.7 percent,A...",0,-0.17508
1638,"November 19, 2018, 04:08:00 PM EDT",AMZN,Reuters,"Wall St tumbles as Apple, internet stocks swoon",\n\n\nReuters\n\n@media screen and (max-device...,https://www.nasdaq.com/article/wall-st-tumbles...,2018-11-19 16:08:00-05:00,False,2018-11-20,U.S. stocks dropped and the Nasdaq fell 3 perc...,0,0.50815


In [30]:
df_csv = pd.DataFrame(data = l_to_df, columns = ['stockcode','trade_date', 'sentiment_score'])
df_csv.describe()

Unnamed: 0,sentiment_score
count,288.0
mean,0.449314
std,0.14718
min,-0.118733
25%,0.364988
50%,0.44518
75%,0.529565
max,0.9579


In [31]:
output_fname = 'dataset/nasdaq/daily_sentiment.csv'
df_csv.to_csv(output_fname)