In [187]:
import os
from pathlib import Path
import pandas as pd
from newsapi import NewsApiClient
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from dotenv import load_dotenv
load_dotenv()
analyzer = SentimentIntensityAnalyzer()

In [188]:
# news api for collecting data
api_key = os.getenv("NEWS_API_KEY")
newsapi = NewsApiClient(api_key=api_key)

In [189]:
# import our initial batch of 20 companies (10 bankrupt, 10 healthy)
filepath = Path("../Project_2/Stocks_MasterList_Test.csv")
stocks_df = pd.read_csv(filepath, parse_dates=True, infer_datetime_format=True)
stocks_df.head(41)

Unnamed: 0,Symbol,Company Name,Price Performance (4 Weeks),Sector,Market Capitalization,Binary
0,RHHBF,Roche Holding AG,-5.28026,Health Care,$2.39T,1


In [190]:
company_info_df = stocks_df[['Symbol', 'Company Name']].copy()
company_info_df.head()

Unnamed: 0,Symbol,Company Name
0,RHHBF,Roche Holding AG


In [191]:
search_list = []

for index, row in company_info_df.iterrows():
    ticker = row['Symbol']
    company_name = row['Company Name']
    search_string = ticker + ' ' + "AND" + ' ' + company_name
    search_list.append(search_string)

print(search_list)

['RHHBF AND Roche Holding AG']


In [1]:
# GME,AAPL,TSLA,FB,PFE,PLTR,AMC,AMZN,GOOGL,NFLX,MSFT,NVDA,T,SBUX,CRM,CMG,WFC,MRNA,BYND,NKLA

# used this list to test stocks, given stocks only work with newsapi if they are common
search_list = ['facebook', 
               'apple', 
               'tesla', 
               'google', 
               'palantir', 
               'amc', 
               'amazon', 
               'netflix', 
               'at&t', 
               'nvidia', 
               'pfizer', 
               'moderna', 
               'nikola', 
               'beyond meat', 
               'gamestop', 
               'starbucks', 
               'wells fargo', 
               'salesforce', 
               'microsoft',
               'verizon',
              'Carnival Cruise',
              'Macys',
              'Nordstrom',
              'Teladoc',
              'Norwegian Cruise',
              'Peloton',
              'Lululemon',
              'Wayfair',
              'Chevron',
              'Royal Caribbean Cruise',
              'Softbank',
              'AstraZeneca',
              'Johnson & Johnson',
              'Twitter',
              'Costco',
              'Zillow',
              'Gilead',
              'Boeing',
              'Okta',
              'Nike',
              'Home Depot',
              'Shopify',
              'Uber',
              'Lyft',
              'Doordash',
              'Airbnb',
              'PayPal',
              'Snowflake',
              'Quantumscape',
              'Activision',
              'DraftKings',
              'Lordstown',
              'Walgreens',
              'Spotify',
              'Fubotv',
              'Northrop Grumman',
              'Berkshire Hathaway',
              'Cisco',
              'Ebay',
              'Qualcomm',
              'Corning',
              'Oracle',
              'Intel',
              'Seagate',
              'Nextera',
              'Zscaler',
              'Crowdstrike',
              'Redfin',
              'Roblox',
              'Citigroup',
              'Docusign',
              'Amgen',
              'Regeneron',
              'Medtronic',
              'Microsoft',
              'ArcelorMittal',
              'Mattel',
              'Schlumberger',
              'Ford',
              'Intuit',
              'Pepsi',
              'Stryker',
              'Walmart',
              'Hasbro',
              'Novartis',
              'Toyota',
              'Mastercard',
              'Fortinet',
              'Groupon',
              'Autodesk',
              'ServiceNow',
              'Fireeye',
              'Proofpoint',
              'Qualys',
              'Fastly']

# begin the actual function for NLP
final_sentiment_df = pd.DataFrame(columns=['name', 'compound', 'positive', 'negative', 'neutral'])
company_sentiments = []

for search_string in search_list:
    company_headlines = newsapi.get_everything(
        q = search_string, 
        language = "en", 
        page_size = 100,
        sort_by = "relevancy")

    for article in company_headlines["articles"]:
        try: 
            text = article["content"]
            date = article["publishedAt"][:10] 
            sentiment = analyzer.polarity_scores(text)
            compound = sentiment["compound"]
            pos = sentiment["pos"]
            neu = sentiment["neu"]
            neg = sentiment["neg"]
        
            company_sentiments.append({
                "text": text,
                "date": date,
                "compound": compound,
                "positive": pos,
                "negative": neg,
                "neutral": neu})
        except AttributeError:
            pass
            
    sentiment_df = pd.DataFrame(company_sentiments) 
    cols = ["date", "text", "compound", "positive", "negative", "neutral"]
    sentiment_df = sentiment_df[cols]
    sentiment_stats_df = sentiment_df.describe()
    sentiment_stats_df.reset_index(drop=True, inplace=True)
    
    not_final_sentiment_df = pd.DataFrame()
    not_final_sentiment_df["name"] = [search_string]
    not_final_sentiment_df["compound"] = [sentiment_stats_df.loc[1]['compound']]
    not_final_sentiment_df["positive"] = [sentiment_stats_df.loc[1]['positive']]
    not_final_sentiment_df["negative"] = [sentiment_stats_df.loc[1]['negative']]
    not_final_sentiment_df["neutral"] = [sentiment_stats_df.loc[1]['neutral']]
    
    final_sentiment_df = final_sentiment_df.append(not_final_sentiment_df, ignore_index=True)
    
print(final_sentiment_df)

NameError: name 'pd' is not defined