In [1]:
import numpy as np
import requests
import json
from datetime import datetime, timedelta
from sentence_transformers import SentenceTransformer
from sentence_transformers.util import cos_sim

  from tqdm.autonotebook import tqdm, trange
  return torch._C._cuda_getDeviceCount() > 0
No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda'


# Get embeddings

In [2]:
def get_models(model_list: list[str]):

    dict_models = {}

    for model_string in model_list:
        model = SentenceTransformer(model_string, device="cpu")
        dict_models[model_string] = model
        
    return dict_models

In [3]:
def get_embeddings(model, text_input, model_string):

    if isinstance(text_input, str):
        text_input = [text_input]

    embeddings = model.encode(list(text_input), normalize_embeddings=True)
        
    return np.array(embeddings)

In [4]:
model_list = ["mixedbread-ai/mxbai-embed-large-v1",
              "WhereIsAI/UAE-Large-V1",
              "avsolatorio/GIST-large-Embedding-v0",
              "BAAI/bge-large-en-v1.5",
              "llmrails/ember-v1",
             ]

# Get news

In [5]:
def get_previous_date(days_back: int = 30):

    now = datetime.utcnow()
    previous_date = now - timedelta(days=days_back)
    formatted_date = previous_date.strftime('%Y-%m-%dT%H:%M:%SZ')
    
    return formatted_date

In [6]:
def request_data(ticker_symbol: str, key: str, date: str|None= None):

    url = f'''https://api.polygon.io/v2/reference/news?ticker={ticker_symbol}&limit=100&sort=published_utc&apiKey={key}'''
    if date:
        url = f'''https://api.polygon.io/v2/reference/news?ticker={ticker_symbol}&published_utc.gte={date}&limit=100&sort=published_utc&apiKey={key}'''

    headers = {
        'Accept': 'application/json'
    }
    
    try:
        # Make the GET request
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raise an error for bad responses (4xx or 5xx)

        data = response.json()

        if not data:
            return None
        
        return data

    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
        return None, None


In [7]:
date = get_previous_date(90)

In [8]:
ticker = "IONQ"

In [9]:
data = request_data(ticker, "qz0eLz8rGOD47auKwSn69ErwuCShUWE9")

In [10]:
titles = [value["title"] for value in data['results']]

In [11]:
dict_models = get_models(model_list)

In [12]:
embeddings_list = []

for model_string in model_list:
    model = dict_models[model_string]
    embeddings = get_embeddings(model, titles, model_string)
    embeddings_list.append(embeddings)

In [13]:
embeddings_list = np.array(embeddings_list)

In [14]:
embeddings_list.shape

(5, 100, 1024)

In [15]:
e_terms_list = []

for model_string in model_list:
    model = dict_models[model_string]
    embeddings = get_embeddings(model, "A significant event (catalyst) happening in the future.", model_string)
    e_terms_list.append(embeddings)

In [16]:
e_terms_list = np.squeeze(np.array(e_terms_list))

In [17]:
similarity = cos_sim(embeddings_list[0], e_terms_list).mean(axis=1).numpy()

In [18]:
np.array(titles)[np.argsort(similarity)[::-1]]

array(["Will Quantum Computing Take Off in 2024? Here's 1 Magnificent Stock to Buy If It Does.",
       '3 Breakthrough Stocks Leading the Next Tech Revolution',
       'IonQ Has The Potential To Become A Long-Term Compounder',
       'Got $3,000? These Stocks Could Double Your Money by 2030',
       'Got $3,000? These 3 Tech Stocks Could Make You Rich in 2024 and Beyond',
       'Will IonQ Be a Trillion-Dollar Stock by 2050?',
       'IonQ Surged Over 290% in 2023, But Is It a Buy?',
       'IonQ Stock Outperformed the S&P 500 in 2023. Could It Do It Again in 2024?',
       '3 Growth Stocks That Could More Than Double Their Revenue by 2025',
       'Microsoft Creates Buzz in Quantum Realm: 3 Stocks to Watch',
       '3 Companies Already Working on the Next Phase of Artificial Intelligence (AI)',
       '3 Unstoppable Technology Stocks to Take You From Next to Nothing to $500,000 and Beyond',
       '3 Hypergrowth Stocks to Buy Heading Into 2024',
       'Here is What to Know Beyond Wh

In [178]:
similarity[np.argsort(similarity)[::-1]]

array([0.53699243, 0.5339109 , 0.50433815, 0.5017838 , 0.49928695,
       0.4962042 , 0.4940552 , 0.49245661, 0.48951522, 0.488038  ,
       0.4856089 , 0.4781688 , 0.4710664 , 0.46704644, 0.4647944 ,
       0.46374884, 0.4628927 , 0.46228418, 0.45839572, 0.45772678,
       0.45106182, 0.44927406, 0.44903046, 0.44808918, 0.44666806,
       0.44517222, 0.44509298, 0.44291025, 0.44289273, 0.44243583,
       0.4381907 , 0.4367988 , 0.43673402, 0.4366733 , 0.43432045,
       0.43394095, 0.43355584, 0.42974663, 0.4294463 , 0.42888302,
       0.42846155, 0.42623854, 0.4253398 , 0.42392564, 0.42389908,
       0.42371598, 0.42345062, 0.41979876, 0.41954994, 0.41256848,
       0.41207615, 0.41179013, 0.40878052, 0.40812746, 0.40784088,
       0.39956793, 0.3994193 , 0.39905488, 0.39642152, 0.390546  ,
       0.39046952, 0.38659412, 0.38634506, 0.38629   , 0.38427725,
       0.38424024, 0.38391083, 0.38216695, 0.38141775, 0.38141775,
       0.3767775 , 0.37564725, 0.37527055, 0.3752303 , 0.37521