In [1]:


import joblib 
import warnings
warnings.filterwarnings('ignore')


import nltk
nltk.download('vader_lexicon')
from nltk.sentiment import SentimentIntensityAnalyzer

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/oscarpinon/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [2]:

# augment vocab
sia = SentimentIntensityAnalyzer()


positive_words='buy bull long support undervalued underpriced cheap upward rising trend moon rocket hold breakout call beat support buying holding high profit'
negative_words='sell bear bubble bearish short overvalued overbought overpriced expensive downward falling sold sell low put miss resistance squeeze cover seller '

#MEJORAR
financial_lingo_value = 4


dictOfpos = { i : financial_lingo_value for i in positive_words.split(" ") }
dictOfneg = { i : -financial_lingo_value for i in negative_words.split(" ")  }
Financial_Lexicon = {**dictOfpos, **dictOfneg}
sia.lexicon.update(Financial_Lexicon)

In [3]:
loaded_model = joblib.load('stock_price_predictor.pkl')
sample_data = [[
    4.46404800e+06, # volume
    1.32018636e-02, # volatility 
    1.80000000e+02, # tweet count
    2.90000000e+01, # comment_num
    7.90000000e+01, # retweet_num
    3.68000000e+02, #likes
    2.23290556e-01  # avg_sentiment_score
               ]]

print(sample_data)
prediction = loaded_model.predict(sample_data)
print(f'Predicted Close Value: {prediction}')

[[4464048.0, 0.0132018636, 180.0, 29.0, 79.0, 368.0, 0.223290556]]
Predicted Close Value: [-0.00158679]


In [4]:

import yfinance as yf
import pandas as pd
import numpy as np
import datetime


def collect_stock_data(ticker_symbol, start_date, end_date):
    stock = yf.Ticker(ticker_symbol)
    hist = stock.history(start=start_date, end=end_date, interval="1d")

    stock_data = {
        'volume': hist['Volume'].iloc[-1],
        'open_value': hist['Open'].iloc[-1],
        'close_value': hist['Close'].iloc[-1],
        'high_value': hist['High'].iloc[-1],
        'low_value': hist['Low'].iloc[-1]
    }
    
    return stock_data

def prepare_features(stock_data, tweet_data):
    volume = stock_data['volume']
    open_value = stock_data['open_value']
    close_value = stock_data['close_value']
    high_value = stock_data['high_value']
    low_value = stock_data['low_value']
    
    daily_return = (close_value - open_value) / open_value
    volatility = (high_value - low_value) / open_value
    
    tweet_count = tweet_data['tweet_count'].values[0]
    comment_num = tweet_data['comment_num'].values[0]
    retweet_num = tweet_data['retweet_num'].values[0]
    like_num = tweet_data['like_num'].values[0]
    avg_sentiment_score = tweet_data['avg_sentiment_score'].values[0]
    
    return np.array([[volume, volatility, tweet_count, comment_num, retweet_num, like_num, avg_sentiment_score]])

def collect_tweets(ticker_symbol, since_date, until_date):

    return pd.DataFrame({
        'date': [date],
        'ticker_symbol': [ticker_symbol],
        'tweet_count': [180],
        'comment_num': [29],
        'retweet_num': [79],
        'like_num': [368],
        'avg_sentiment_score': [-0.223]
    })

# Function to make predictions
def predict_stock_price(ticker_symbol, date):
    tweet_data = collect_tweets(ticker_symbol, date, date)
    stock_data = collect_stock_data(ticker_symbol, date - datetime.timedelta(days=1), date)
    
    features = prepare_features(stock_data, tweet_data)
    
    model = joblib.load('stock_price_predictor.pkl')
    
    prediction = model.predict(features)
    return prediction

In [5]:

ticker_symbol = 'AMZN'
date = datetime.datetime.now()

prediction = predict_stock_price(ticker_symbol, date)
print(f'Predicted Close Value for {ticker_symbol} on {date.strftime("%Y-%m-%d")}: {prediction}')

Predicted Close Value for AMZN on 2024-06-05: [0.00347204]
