# Sentiment prediction of stock performance
#### Takes in list of webpages resulting from a stock ticker search, calculates overall sentiment, sentiment volatility, and gives bullish/bearish estimation

In [123]:
import boto3
import statistics
import json
from collections import Counter
import numpy as np
import tensorflow as tf # Neural Network model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import matplotlib.pyplot as plt
from arch import arch_model  # GARCH model
import requests
from bs4 import BeautifulSoup

In [124]:
client = boto3.client('comprehend')


class SentimentMarker:
    def __init__(self, sentiment, sentiment_score):
        self.sentiment = sentiment
        self.sentiment_score = sentiment_score

In [246]:
# takes in a dict of webpages from webscraping, returns sentiment analysis
def utf8len(s):
    return len(s.encode('utf-8'))

def sentiment(webpage_list):
    res = []
    res_data = []
    for webPage in webpage_list: 
        # uses sentiment analysis model to get metrics from specific webpage
        if(utf8len(webPage['bodyText'])>5000):
            continue
        response = client.detect_sentiment(
            Text=webPage['bodyText'],
            LanguageCode='en'
        )
        res.append(response)
        res_data.append(response['Sentiment'])
    overall_sentiment = (overall_sent(res_data)).lower().capitalize() # finds plurality of sentiment
    print(overall_sentiment)
    scores=[]     
    for dic in res:
        scores.append(dic['SentimentScore'][overall_sentiment])
    uncertainty = statistics.mean(scores)  # takes mean of uncertainty metric
    return overall_sentiment, uncertainty


def overall_sent(sentiment_markers):
    c = Counter(sentiment_markers)
    return c.most_common()[0][0]

In [247]:
with open('sample.json', 'r') as openfile:
 
    # Reading from json file
    json_object = json.load(openfile)
 
print(sentiment(json_object))

Neutral
('Neutral', 0.7165432870388031)


In [83]:
def result_classification(overall_sentiment, uncertainty, stock_ticker):
    beta = get_beta(stock_ticker)
    volatility_class = get_volatility(beta)
    bear_bull_class = get_bullish(overall_sentiment, uncertainty)
    growth_class = get_growth(bear_bull_class, volatility_class)
    res = []
    header = "Our model is "+uncertainty+"% certain that the overall mood on "+stock_ticker+" is "+overall_sentiment
    beta_explanation = "Since the beta of this company is "+beta+",it is "+volatility_class
    growth_explanation = "Given that the market is "+bear_bull_class+", our model expects "+growth_class
    # This entire section may be replaced with a LLM wrapper (AWS offers a variety of them)
    # Once we have growth prediction, we can inform user of stock's historical correlation
    # between its performance and market sentiment
    res.append(header)
    res.append(beta_explanation)
    res.append(growth_explanation)
    return res


# TODO: Use web scraper or online database to find what a stock's beta is
def get_beta(ticker):
    # Construct the URL for the stock's summary page on Yahoo Finance
    url = f'https://finance.yahoo.com/quote/{ticker}?p={ticker}'

    # Send an HTTP request to the URL
    response = requests.get(url)

    # print(response.status_code)

    if response.status_code == 200:
        # Parse the HTML content of the page using BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find the beta value on the page
        beta_tag = soup.find_all('span', {'class': "svelte-tx3nkj"})


        if beta_tag:
            beta_value = beta_tag.text
            return beta_value
        else:
            return f'Beta value not found for {ticker} on Yahoo Finance.'

    else:
        return f'Failed to retrieve data. Status code: {response.status_code}'

# TODO: Determine some function to output stock's movement nature from beta value
def get_volatility(beta):
    if beta<0.9:
        return 0

# TODO: Use overall sentiment and uncertainty metric to determine if/how much the market is bullish or bearish
# This task may involve a generative LLM
def get_bullish(overall_sentiment, uncertainty):
    return 0
# TODO: Uses bear/bull classification and volatility estimate to predict growth nature of stock in near future
# This task may involve an LLM
def get_growth(bear_bull_class, volatility_class):
    return 0

get_beta('MSFT')

'Beta value not found for MSFT on Yahoo Finance.'

In [70]:
def garch_model(stock_prices):
    model = arch_model(train, mean='Zero', vol='GARCH', p=15, q=15)
    model_fit = model.fit()
    yhat = model_fit.forecast(horizon=30)  # predict up to 1 month in the future
    return yhat.variance.values[-1, -1]  # return final predicted variance value 


def get_stock_prices(stock_ticker):
    return 0
    # TODO: get closing price of stock for every trading day for last 5 years (or longest available frame)



def get_webpage_listing(stock_ticker):
    return 0
    # TODO: get list of webpages/article titles corresponding to specific stock ticker
    # may also include correlation constant


def combined_data(stock_ticker):
    garch_volatility=garch_model(get_stock_prices(stock_ticker))
    sentiment_volatility=sentiment(get_webpage_listing(stock_ticker))['uncertainty']
    beta=get_beta(stock_ticker)
    return garch_volatility, sentiment_volatility, beta


def get_stock_tickers(n):
    return 0
    # TODO: return list of n most popular stock tickers as strings

def train_neural_network(n):
    # n = number of inputs into the NN
    tickers = get_stock_tickers(n)
    data = []
    for ticker in tickers:
        data.append(combined_data(ticker))
    X = data[:, 0:1]
    Y = data[:, 3]
    model = Sequential(
        [
            tf.keras.Input(shape=(2,)),
            Dense(units=25, activation='relu'),
            Dense(units=10, activation='relu'),
            Dense(units=1)
        ]
    )

    model.compile(
        loss='mean_squared_error',
        optimizer=tf.keras.optimizers.Adam(0.01),
    )

    model.fit(
        X, Y,
        epochs=20
    )

    return model

In [71]:
model = train_neural_network(1000)


def beta_prediction(stock_ticker):
    stock_info = combined_data(stock_ticker)[0:1]
    res = model.predict(stock_info)
    return res

TypeError: 'int' object is not iterable

In [72]:
# testing get_beta
print(get_beta('MSFT'))

Beta value not found for MSFT on Yahoo Finance.
