**Description:**

Using the 'textblob' library to perform NLP analysis on multiple stocks. The objective is to rank stocks based on average sentiment values.





In [1]:
# !pip install pyfinviz
# !pip install textblob

In [2]:
from textblob import TextBlob
import numpy as np
from pyfinviz.quote import Quote
import pandas as pd
import yfinance as yf

In [3]:
universe = ['SPY', 'V', 'PYPL', 'VZ', 'T', 'VIV', 'AMX', 'VOD', 'TU', 'TEF', 'TSLA', 
           'GM', 'F', 'STLA', 'HMC', 'NIO', 'PG', 'KO', 'PEP', 'ABEV', 'NKE',
           'PM', 'UL', 'BUD', 'MDLZ', 'MO', 'DIS', 'CMCSA', 'SBUX', 'WBD',
           'BSX', 'HLN', 'AAPL', 'NVDA', 'TSM', 'CSCO', 'QCOM', 'RTX', 'AMD',
           'INTC', 'MU', 'NOK', 'XOM', 'PBR', 'CVX', 'SHEL', 'COP', 'TTE', 'BP', 
           'EQNR', 'CNQ', 'SU', 'UBS', 'JPM', 'BAC', 'ITUB', 'WFC', 
           'MS', 'HSBC', 'SCHW', 'BBD', 'JNJ', 'MRK', 'ABBV', 'PFE', 'NVO', 
           'NVS', 'AZN', 'ABT', 'BMY', 'SNY', 'ENB', 'SLB', 'EPD', 'KMI', 'ET', 
           'VALE', 'BHP', 'RIO', 'FCX', 'GOLD', 'SUZ', 'AMCR', 'GE', 'ABB', 
           'AMZN', 'WMT', 'HD', 'BABA', 'PDD', 'CVS', 'JD', 'TJX', 'CPNG', 
           'MSFT', 'GOOG', 'GOOGL', 'META', 'ORCL', 'BEKE', 'SAP', 'INFY', 
           'WIT', 'NU', 'CSX', 'UBER', 'GRAB', 'NEE', 'SO', 'PCG', 'MCD'] 

In [4]:
portfolio = ["AAPL", "KO", "MCD", "PFE", "MSFT", "TSM", "TSLA", "MRK", "PM", "JNJ" ]

for e in portfolio:
    assert e in universe, f"{e} is not in the universe"

In [5]:
sentiment_all = {}

for e in universe:

  try: 

    _tmp = [] 
    for headline in Quote(e).outer_news_df["Headline"]:

      sent = TextBlob(headline).sentiment.polarity # -1 to 1 sentiment
      if sent != 0:
        _tmp.append(sent)
    
    sentiment_all[e] = np.average(_tmp)
    # print("Ticker: {}; Avg Sentiment: {}".format(e, sentiment_all[e]))

  except Exception as err:
    print("Error: {} at ticker {}".format(err, e))


  print("Progress: {}/{}".format(universe.index(e)+1, len(universe)), end='\r')



Error: 'Quote' object has no attribute 'outer_news_df' at ticker ABB
Progress: 111/111

In [6]:
df_sentiment = pd.DataFrame.from_dict(sentiment_all, orient='index', columns=['Avg. Sentiment'])
df_sentiment.sort_values('Avg. Sentiment', ascending=False).style.background_gradient()

Unnamed: 0,Avg. Sentiment
NOK,0.556382
SHEL,0.516758
ABEV,0.42687
PG,0.416806
NU,0.414193
ITUB,0.409635
PM,0.39106
COP,0.379285
JNJ,0.37395
EPD,0.372626


In [7]:
df_sentiment.loc[portfolio].sort_values('Avg. Sentiment', ascending=False).style.background_gradient()

Unnamed: 0,Avg. Sentiment
PM,0.39106
JNJ,0.37395
MRK,0.340928
KO,0.338643
MCD,0.288698
PFE,0.261939
TSM,0.256066
AAPL,0.250538
MSFT,0.168897
TSLA,0.114737


In [9]:
# Get 1 year daily data for each ticker
data = yf.download(universe, start="2020-01-01", end="2021-01-01")['Adj Close']
data = data.dropna(axis=1)
data

[*********************100%***********************]  111 of 111 completed

ERROR 
4 Failed downloads:
ERROR ['ABB']: Exception('ABB: No timezone found, symbol may be delisted')
ERROR ['NU']: Exception("NU: Data doesn't exist for startDate = 1577854800, endDate = 1609477200")
ERROR ['CPNG']: Exception("CPNG: Data doesn't exist for startDate = 1577854800, endDate = 1609477200")
ERROR ['HLN']: Exception("HLN: Data doesn't exist for startDate = 1577854800, endDate = 1609477200")





Unnamed: 0_level_0,AAPL,ABBV,ABEV,ABT,AMCR,AMD,AMX,AMZN,AZN,BABA,...,V,VALE,VIV,VOD,VZ,WBD,WFC,WIT,WMT,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-02,73.249031,75.656311,4.183811,81.730209,9.064963,49.099998,14.912305,94.900497,45.610382,219.770004,...,186.293243,9.837492,11.860083,15.216392,50.476467,32.220001,48.727352,3.691714,112.085129,58.054886
2020-01-03,72.536888,74.938187,4.130740,80.733826,8.945575,48.599998,14.893963,93.748497,45.338840,217.000000,...,184.811661,9.720466,11.809793,15.106755,49.939041,32.029999,48.428181,3.721169,111.095657,57.588154
2020-01-06,73.114883,75.529602,4.086514,81.156822,8.885881,48.389999,14.820593,95.143997,45.148762,216.639999,...,184.411972,9.610756,11.692448,15.145909,49.831558,31.959999,48.138084,3.780079,110.869476,58.030338
2020-01-07,72.771019,75.098709,4.095359,80.705635,8.809132,48.250000,14.600484,95.343002,45.320736,217.630005,...,183.924637,9.676581,11.675685,15.044100,49.277592,32.070000,47.739204,3.780079,109.842293,57.555405
2020-01-08,73.941635,75.630966,4.104204,81.034622,8.843241,47.830002,14.545457,94.598503,45.212124,218.000000,...,187.073059,9.669269,11.650543,15.114583,49.368542,32.110001,47.884262,3.789898,109.465355,56.687450
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-12-24,129.850571,92.068710,2.768984,103.451172,10.368696,91.809998,13.438074,158.634506,45.170944,222.000000,...,204.681427,12.882509,7.376836,13.881310,50.777046,28.570000,28.139160,5.518682,137.487122,36.783619
2020-12-28,134.494781,92.238121,2.750827,102.916489,10.324158,91.599998,13.503993,164.197998,45.971577,222.360001,...,208.535797,12.760470,7.351428,13.914540,50.897861,29.450001,28.224030,5.498973,139.135025,36.907417
2020-12-29,132.703995,93.352646,2.832535,103.432060,10.333065,90.620003,13.777086,166.100006,46.455688,236.259995,...,210.242279,12.844373,7.334487,13.823160,50.751160,29.690001,28.082581,5.508828,138.253586,36.491829
2020-12-30,131.572479,93.860870,2.768984,103.537102,10.457774,92.290001,13.871256,164.292496,46.716358,238.389999,...,214.155457,12.928273,7.512344,13.831468,50.172974,29.809999,28.054289,5.558102,138.138611,36.783619


In [11]:
drift = {} 

for e in data.columns:
    drift[e] = data[e].pct_change().dropna().mean()

df_drift = pd.DataFrame.from_dict(drift, orient='index', columns=['Avg. Drift'])
df_drift["Sentiment"] = [df_sentiment.loc[e]["Avg. Sentiment"] for e in df_drift.index]
df_drift.sort_values('Avg. Drift', ascending=False).style.background_gradient()

Unnamed: 0,Avg. Drift,Sentiment
NIO,0.012249,0.238257
TSLA,0.009973,0.114737
PDD,0.006704,0.322541
JD,0.003824,0.138232
NVDA,0.003765,0.194959
FCX,0.003725,0.22633
PYPL,0.003516,0.216857
AMD,0.00321,0.225116
UBER,0.003043,0.268623
TSM,0.002886,0.256066
