In [187]:
import os
from pathlib import Path
import pandas as pd
from newsapi import NewsApiClient
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from dotenv import load_dotenv
load_dotenv()
analyzer = SentimentIntensityAnalyzer()

In [188]:
# news api for collecting data
api_key = os.getenv("news_api")
newsapi = NewsApiClient(api_key=api_key)

In [189]:
# import our initial batch of 20 companies (10 bankrupt, 10 healthy)
filepath = Path("../Project_2/Stocks_MasterList_Test.csv")
stocks_df = pd.read_csv(filepath, parse_dates=True, infer_datetime_format=True)
stocks_df.head(41)

Unnamed: 0,Symbol,Company Name,Price Performance (4 Weeks),Sector,Market Capitalization,Binary
0,RHHBF,Roche Holding AG,-5.28026,Health Care,$2.39T,1


In [190]:
company_info_df = stocks_df[['Symbol', 'Company Name']].copy()
company_info_df.head()

Unnamed: 0,Symbol,Company Name
0,RHHBF,Roche Holding AG


In [191]:
search_list = []

for index, row in company_info_df.iterrows():
    ticker = row['Symbol']
    company_name = row['Company Name']
    search_string = ticker + ' ' + "AND" + ' ' + company_name
    search_list.append(search_string)

print(search_list)

['RHHBF AND Roche Holding AG']


In [193]:
# GME,AAPL,TSLA,FB,PFE,PLTR,AMC,AMZN,GOOGL,NFLX,MSFT,NVDA,T,SBUX,CRM,CMG,WFC,MRNA,BYND,NKLA

# used this list to test stocks, given stocks only work with newsapi if they are common
search_list = ['facebook', 
               'apple', 
               'tesla', 
               'google', 
               'palantir', 
               'amc', 
               'amazon', 
               'netflix', 
               'at&t', 
               'nvidia', 
               'pfizer', 
               'moderna', 
               'nikola', 
               'beyond meat', 
               'gamestop', 
               'starbucks', 
               'wells fargo', 
               'salesforce', 
               'microsoft',
               'verizon']

# begin the actual function for NLP
final_sentiment_df = pd.DataFrame(columns=['name', 'compound', 'positive', 'negative', 'neutral'])
company_sentiments = []

for search_string in search_list:
    company_headlines = newsapi.get_everything(
        q = search_string, 
        language = "en", 
        page_size = 100,
        sort_by = "relevancy")

    for article in company_headlines["articles"]:
        try: 
            text = article["content"]
            date = article["publishedAt"][:10] 
            sentiment = analyzer.polarity_scores(text)
            compound = sentiment["compound"]
            pos = sentiment["pos"]
            neu = sentiment["neu"]
            neg = sentiment["neg"]
        
            company_sentiments.append({
                "text": text,
                "date": date,
                "compound": compound,
                "positive": pos,
                "negative": neg,
                "neutral": neu})
        except AttributeError:
            pass
            
    sentiment_df = pd.DataFrame(company_sentiments) 
    cols = ["date", "text", "compound", "positive", "negative", "neutral"]
    sentiment_df = sentiment_df[cols]
    sentiment_stats_df = sentiment_df.describe()
    sentiment_stats_df.reset_index(drop=True, inplace=True)
    
    not_final_sentiment_df = pd.DataFrame()
    not_final_sentiment_df["name"] = [search_string]
    not_final_sentiment_df["compound"] = [sentiment_stats_df.loc[1]['compound']]
    not_final_sentiment_df["positive"] = [sentiment_stats_df.loc[1]['positive']]
    not_final_sentiment_df["negative"] = [sentiment_stats_df.loc[1]['negative']]
    not_final_sentiment_df["neutral"] = [sentiment_stats_df.loc[1]['neutral']]
    
    final_sentiment_df = final_sentiment_df.append(not_final_sentiment_df, ignore_index=True)
    
print(final_sentiment_df)

           name  compound  positive  negative   neutral
0      facebook  0.057596  0.055582  0.038847  0.905592
1         apple  0.129631  0.062559  0.031287  0.906169
2         tesla  0.135232  0.061614  0.029526  0.905457
3        google  0.158823  0.065476  0.028667  0.903321
4      palantir  0.170719  0.068208  0.028148  0.901574
5           amc  0.179199  0.070910  0.029000  0.898372
6        amazon  0.204182  0.079820  0.030685  0.888019
7       netflix  0.224505  0.084184  0.030235  0.884294
8          at&t  0.213393  0.081505  0.029987  0.887369
9        nvidia  0.215711  0.081997  0.030307  0.886668
10       pfizer  0.205034  0.078043  0.029287  0.891735
11      moderna  0.197311  0.075021  0.028075  0.896047
12       nikola  0.192558  0.073933  0.028195  0.897081
13  beyond meat  0.191310  0.073452  0.028245  0.897564
14     gamestop  0.193367  0.073162  0.027607  0.898540
15    starbucks  0.192852  0.073118  0.027627  0.898603
16  wells fargo  0.196081  0.073805  0.027400  0