![Img](https://app.theheadstarter.com/static/hs-logo-opengraph.png)

# Headstarter Financial Analysis & Automation with LLMs Project

# Install Libraries

In [1]:
! pip install yfinance langchain_pinecone openai python-dotenv langchain-community sentence_transformers

Collecting langchain_pinecone
  Downloading langchain_pinecone-0.2.0-py3-none-any.whl.metadata (1.7 kB)
Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting langchain-community
  Downloading langchain_community-0.3.10-py3-none-any.whl.metadata (2.9 kB)
Collecting aiohttp<3.10,>=3.9.5 (from langchain_pinecone)
  Downloading aiohttp-3.9.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.5 kB)
Collecting pinecone-client<6.0.0,>=5.0.0 (from langchain_pinecone)
  Downloading pinecone_client-5.0.1-py3-none-any.whl.metadata (19 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting httpx-sse<0.5.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting langchain<0.4.0,>=0.3.10 (from langchain-community)
  Downloading langchain-0.3.10-py3-none-any.whl.metadata (7.1 kB)
Co

In [2]:
from langchain_pinecone import PineconeVectorStore
from openai import OpenAI
import dotenv
import json
import yfinance as yf
import concurrent.futures
from langchain_community.embeddings import HuggingFaceEmbeddings
from google.colab import userdata
from langchain.schema import Document
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
from pinecone import Pinecone
import numpy as np
import requests
import os

In [3]:
def get_stock_info(symbol: str) -> dict:
    """
    Retrieves and formats detailed information about a stock from Yahoo Finance.

    Args:
        symbol (str): The stock ticker symbol to look up.

    Returns:
        dict: A dictionary containing detailed stock information, including ticker, name,
              business summary, city, state, country, industry, and sector.
    """
    data = yf.Ticker(symbol)
    stock_info = data.info

    properties = {
        "Ticker": stock_info.get('symbol', 'N/A'),
        'Name': stock_info.get('longName', 'N/A'),
        'Business Summary': stock_info.get('longBusinessSummary', 'N/A'),
        'City': stock_info.get('city', 'N/A'),
        'State': stock_info.get('state', 'N/A'),
        'Country': stock_info.get('country', 'N/A'),
        'Industry': stock_info.get('industry', 'N/A'),
        'Sector': stock_info.get('sector', 'N/A'),
        'Market Cap': stock_info.get('marketCap', 0),
        'Volume': stock_info.get('volume', 0),
        '52_Week_Change': stock_info.get('52WeekChnage', 0),
        'Revenue_Growth': stock_info.get('revenueGrowth', 0),
        'EBIDTA_Margins': stock_info.get('ebitdaMargins', 0),
        'Gross_Margins': stock_info.get('grossMargins', 0),
        'Earnings_Growth': stock_info.get('earningsGrowth', 0),
        'Shares_Short': stock_info.get('sharesShort', 0),
        'Beta': stock_info.get('beta', 0),
        'website': stock_info.get('website', 'N/A'),
    }

    return properties

In [46]:
data = yf.Ticker("TSLA")
stock_info = data.info

for key, value in stock_info.items():
    print(f"{key}: {value}")

dict_keys(['address1', 'city', 'state', 'zip', 'country', 'phone', 'website', 'industry', 'industryKey', 'industryDisp', 'sector', 'sectorKey', 'sectorDisp', 'longBusinessSummary', 'fullTimeEmployees', 'companyOfficers', 'auditRisk', 'boardRisk', 'compensationRisk', 'shareHolderRightsRisk', 'overallRisk', 'governanceEpochDate', 'compensationAsOfEpochDate', 'maxAge', 'priceHint', 'previousClose', 'open', 'dayLow', 'dayHigh', 'regularMarketPreviousClose', 'regularMarketOpen', 'regularMarketDayLow', 'regularMarketDayHigh', 'beta', 'trailingPE', 'forwardPE', 'volume', 'regularMarketVolume', 'averageVolume', 'averageVolume10days', 'averageDailyVolume10Day', 'bid', 'ask', 'bidSize', 'askSize', 'marketCap', 'fiftyTwoWeekLow', 'fiftyTwoWeekHigh', 'priceToSalesTrailing12Months', 'fiftyDayAverage', 'twoHundredDayAverage', 'currency', 'enterpriseValue', 'profitMargins', 'floatShares', 'sharesOutstanding', 'sharesShort', 'sharesShortPriorMonth', 'sharesShortPreviousMonthDate', 'dateShortInterest',

In [None]:
def get_huggingface_embeddings(text, model_name="sentence-transformers/all-mpnet-base-v2"):
    """
    Generates embeddings for the given text using a specified Hugging Face model.

    Args:
        text (str): The input text to generate embeddings for.
        model_name (str): The name of the Hugging Face model to use.
                          Defaults to "sentence-transformers/all-mpnet-base-v2".

    Returns:
        np.ndarray: The generated embeddings as a NumPy array.
    """
    model = SentenceTransformer(model_name)
    return model.encode(text)


def cosine_similarity_between_sentences(sentence1, sentence2):
    """
    Calculates the cosine similarity between two sentences.

    Args:
        sentence1 (str): The first sentence for similarity comparison.
        sentence2 (str): The second sentence for similarity comparison.

    Returns:
        float: The cosine similarity score between the two sentences,
               ranging from -1 (completely opposite) to 1 (identical).

    Notes:
        Prints the similarity score to the console in a formatted string.
    """
    # Get embeddings for both sentences
    embedding1 = np.array(get_huggingface_embeddings(sentence1))
    embedding2 = np.array(get_huggingface_embeddings(sentence2))

    # Reshape embeddings for cosine_similarity function
    embedding1 = embedding1.reshape(1, -1)
    embedding2 = embedding2.reshape(1, -1)

    # Calculate cosine similarity
    similarity = cosine_similarity(embedding1, embedding2)
    similarity_score = similarity[0][0]
    print(f"Cosine similarity between the two sentences: {similarity_score:.4f}")
    return similarity_score


# Example usage
sentence1 = "I like walking to the park"
sentence2 = "I like running to the office"

similarity = cosine_similarity_between_sentences(sentence1, sentence2)

Cosine similarity between the two sentences: 0.6133


In [None]:
aapl_info = get_stock_info('AAPL')
print(aapl_info)

{'Ticker': 'AAPL', 'Name': 'Apple Inc.', 'Business Summary': 'Apple Inc. designs, manufactures, and markets smartphones, personal computers, tablets, wearables, and accessories worldwide. The company offers iPhone, a line of smartphones; Mac, a line of personal computers; iPad, a line of multi-purpose tablets; and wearables, home, and accessories comprising AirPods, Apple TV, Apple Watch, Beats products, and HomePod. It also provides AppleCare support and cloud services; and operates various platforms, including the App Store that allow customers to discover and download applications and digital content, such as books, music, video, games, and podcasts, as well as advertising services include third-party licensing arrangements and its own advertising platforms. In addition, the company offers various subscription-based services, such as Apple Arcade, a game subscription service; Apple Fitness+, a personalized fitness service; Apple Music, which offers users a curated listening experien

In [None]:
aapl_description = aapl_info['Business Summary']

company_description = "I want to find companies that make smartphones and are headquarted in California"

similarity = cosine_similarity_between_sentences(aapl_description, company_description)

Cosine similarity between the two sentences: 0.3635


# Get all the Stocks in the Stock Market

First, we need to get the symbols (also known as tickers) of all the stocks in the stock market


In [4]:
def get_company_tickers():
    """
    Downloads and parses the Stock ticker symbols from the GitHub-hosted SEC company tickers JSON file.

    Returns:
        dict: A dictionary containing company tickers and related information.

    Notes:
        The data is sourced from the official SEC website via a GitHub repository:
        https://raw.githubusercontent.com/team-headstart/Financial-Analysis-and-Automation-with-LLMs/main/company_tickers.json
    """
    # URL to fetch the raw JSON file from GitHub
    url = "https://raw.githubusercontent.com/team-headstart/Financial-Analysis-and-Automation-with-LLMs/main/company_tickers.json"

    # Making a GET request to the URL
    response = requests.get(url)

    # Checking if the request was successful
    if response.status_code == 200:
        # Parse the JSON content directly
        company_tickers = json.loads(response.content.decode('utf-8'))

        # Optionally save the content to a local file for future use
        with open("company_tickers.json", "w", encoding="utf-8") as file:
            json.dump(company_tickers, file, indent=4)

        print("File downloaded successfully and saved as 'company_tickers.json'")
        return company_tickers
    else:
        print(f"Failed to download file. Status code: {response.status_code}")
        return None

company_tickers = get_company_tickers()

File downloaded successfully and saved as 'company_tickers.json'


In [None]:
len(company_tickers)

9998

# Inserting Stocks into Pinecone

**1. Create an account on [Pinecone.io](https://app.pinecone.io/)**

**2. Create a new index called "stocks" and set the dimensions to 768. Leave the rest of the settings as they are.**

![Screenshot 2024-12-02 at 4 48 03 PM](https://github.com/user-attachments/assets/13b484da-cd00-4337-a4db-4779080eb42c)


**3. Create an API Key for Pinecone**

![Screenshot 2024-11-24 at 10 44 37 PM](https://github.com/user-attachments/assets/e7feacc6-2bd1-472a-82e5-659f65624a88)


**4. Store your Pinecone API Key within Google Colab's secrets section, and then enable access to it (see the blue checkmark)**


![Screenshot 2024-11-24 at 10 45 25 PM](https://github.com/user-attachments/assets/eaf73083-0b5f-4d17-9e0c-eab84f91b0bc)




In [5]:
pinecone_api_key = userdata.get("PINECONE_API_KEY")
os.environ['PINECONE_API_KEY'] = pinecone_api_key

index_name = "stocks"
namespace = "stock-descriptions2"

hf_embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
vectorstore = PineconeVectorStore(index_name=index_name, embedding=hf_embeddings)

pc = Pinecone(api_key=userdata.get("PINECONE_API_KEY"),)
pinecone_index = pc.Index(index_name)

  hf_embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

# Sequential Processing

It will take very long to process all the stocks like this!

[![](https://mermaid.ink/img/pako:eNqNkl1rgzAUhv9KyMXYwF74cSVjYA0rhY6WKRQWe5FqWqU1cUm8GKX_ffmwa65Gc6Hn5LxvzmM8F1jzhsIUHgUZWlCiigG9MlwoXp_AqpPqdS_esmyzCsBivV7o10fxXgagLFbZDsxmb2COi44dzxRsBK-plDoBZSsoaXbuNPeU4941qWBBv0fKVEfOvud5yejh0NWdLr1U0LnMmts2eYhDMLsZgEHa3TV5aEXbEG-zZZnasiXfLMGnaScVeNKRHDiT1DNunRGF5pMFtUaAiCKe5h4hp84jHHks9mJ8mMjBRBOMrT9G45wommis84bjYThZHuPYwzA_xqeIHUU8UZjyYxDOiOIJwhj_uRKnzhOc3FHsdHgoiUNJJhRTfgzFGVEyoRijj0JZAwPYU9GTrtFjfDHbFVQt7WkFUx02RJzMMF21joyKFz-shqkSIw2g4OOxhemBnKXOxqEhiqKO6DHt_3YHwr44v-XXX7It6B4?type=png)](https://mermaid.live/edit#pako:eNqNkl1rgzAUhv9KyMXYwF74cSVjYA0rhY6WKRQWe5FqWqU1cUm8GKX_ffmwa65Gc6Hn5LxvzmM8F1jzhsIUHgUZWlCiigG9MlwoXp_AqpPqdS_esmyzCsBivV7o10fxXgagLFbZDsxmb2COi44dzxRsBK-plDoBZSsoaXbuNPeU4941qWBBv0fKVEfOvud5yejh0NWdLr1U0LnMmts2eYhDMLsZgEHa3TV5aEXbEG-zZZnasiXfLMGnaScVeNKRHDiT1DNunRGF5pMFtUaAiCKe5h4hp84jHHks9mJ8mMjBRBOMrT9G45wommis84bjYThZHuPYwzA_xqeIHUU8UZjyYxDOiOIJwhj_uRKnzhOc3FHsdHgoiUNJJhRTfgzFGVEyoRijj0JZAwPYU9GTrtFjfDHbFVQt7WkFUx02RJzMMF21joyKFz-shqkSIw2g4OOxhemBnKXOxqEhiqKO6DHt_3YHwr44v-XXX7It6B4)

In [None]:
for idx, stock in company_tickers.items():
    stock_ticker = stock['ticker']
    stock_data = get_stock_info(stock_ticker)
    stock_description = stock_data['Business Summary']

    print(f"Processing stock {idx} / {len(company_tickers)} :", stock_ticker)

    vectorstore_from_documents = PineconeVectorStore.from_documents(
        documents=[Document(page_content=stock_description, metadata=stock_data)],
        id=stock_ticker,
        embedding=hf_embeddings,
        index_name=index_name,
        namespace=namespace
    )

Processing stock 0 / 9998 : NVDA
Processing stock 1 / 9998 : AAPL
Processing stock 2 / 9998 : MSFT


KeyboardInterrupt: 

# Parallelizing

[![](https://mermaid.ink/img/pako:eNqFk0uLgzAQgP_KkMOe7MHXRZaC1baXvsDCwqqHrGarVJMSE9hS-983NnWxULceRmf4Pp2MyQVlLCfIQweOTwXsw4SCuvw4Eiw7wqpsxPsXn_r-bmXAcrtdqts6WuwN2Ecr3wB__blJYTKZwixe45JCwKjgrKoI7zwXdphjlVXwwfiR8CbVH9BxdjPbqKxlJTAlTDbVuYXAjDUNZveSHWcZaRromkj_F61etIbire8Xpt2b9tDslvpCdHrRGYrddF6Ibi-6D4vsBjqcUWDe_NCMl0TAG6gfwwmEWOA7FlgasEYBWwP2KOBowBkFXA24Y4COoW61DRklLcwvQUHUHooEFrK53hHrAbkX7WdF51nRfVLUcX4fs8y6ObawMON-phvyI2CGRVakyEA14TUuc7XnL52ZIFGQmiTIU4855scEJfSqOCwFi840Q57gkhiIM3kokPeNq0Zl8pRjQcISq4NT_1VPmH4y1ufXXwdkCM8?type=png)](https://mermaid.live/edit#pako:eNqFk0uLgzAQgP_KkMOe7MHXRZaC1baXvsDCwqqHrGarVJMSE9hS-983NnWxULceRmf4Pp2MyQVlLCfIQweOTwXsw4SCuvw4Eiw7wqpsxPsXn_r-bmXAcrtdqts6WuwN2Ecr3wB__blJYTKZwixe45JCwKjgrKoI7zwXdphjlVXwwfiR8CbVH9BxdjPbqKxlJTAlTDbVuYXAjDUNZveSHWcZaRromkj_F61etIbire8Xpt2b9tDslvpCdHrRGYrddF6Ibi-6D4vsBjqcUWDe_NCMl0TAG6gfwwmEWOA7FlgasEYBWwP2KOBowBkFXA24Y4COoW61DRklLcwvQUHUHooEFrK53hHrAbkX7WdF51nRfVLUcX4fs8y6ObawMON-phvyI2CGRVakyEA14TUuc7XnL52ZIFGQmiTIU4855scEJfSqOCwFi840Q57gkhiIM3kokPeNq0Zl8pRjQcISq4NT_1VPmH4y1ufXXwdkCM8)

In [6]:
# Initialize tracking lists
successful_tickers = []
unsuccessful_tickers = []

# Load existing successful/unsuccessful tickers
try:
    with open('successful_tickers.txt', 'r') as f:
        successful_tickers = [line.strip() for line in f if line.strip()]
    print(f"Loaded {len(successful_tickers)} successful tickers")
except FileNotFoundError:
    print("No existing successful tickers file found")

try:
    with open('unsuccessful_tickers.txt', 'r') as f:
        unsuccessful_tickers = [line.strip() for line in f if line.strip()]
    print(f"Loaded {len(unsuccessful_tickers)} unsuccessful tickers")
except FileNotFoundError:
    print("No existing unsuccessful tickers file found")

Loaded 3899 successful tickers
Loaded 117 unsuccessful tickers


# Perform RAG

In [None]:
def document_exists(index, doc_id, namespace=None):
    response = index.fetch(ids=[doc_id], namespace=namespace)
    return doc_id in response.get('vectors', {})

def process_stock(stock_ticker: str) -> str:
    # Skip if already processed
    if stock_ticker in successful_tickers:
        return f"Already processed {stock_ticker}"

    elif document_exists(pinecone_index, stock_ticker, namespace):
        with open('successful_tickers.txt', 'a') as f:
            f.write(f"{stock_ticker}\n")
        successful_tickers.append(stock_ticker)
        return f"Document already exists for {stock_ticker}"

    try:
        # Get and store stock data
        stock_data = get_stock_info(stock_ticker)
        stock_description = stock_data['Business Summary']

        # Store stock description in Pinecone
        vectorstore_from_texts = PineconeVectorStore.from_documents(
            documents=[Document(page_content=stock_description, metadata=stock_data)],
            ids=[stock_ticker],
            embedding=hf_embeddings,
            index_name=index_name,
            namespace=namespace
        )

        # Track success
        with open('successful_tickers.txt', 'a') as f:
            f.write(f"{stock_ticker}\n")
        successful_tickers.append(stock_ticker)

        return f"Processed {stock_ticker} successfully"

    except Exception as e:
        # Track failure
        with open('unsuccessful_tickers.txt', 'a') as f:
            f.write(f"{stock_ticker}\n")
        unsuccessful_tickers.append(stock_ticker)

        return f"ERROR processing {stock_ticker}: {e}"

def parallel_process_stocks(tickers: list, max_workers: int = 10) -> None:
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_ticker = {
            executor.submit(process_stock, ticker): ticker
            for ticker in tickers
        }

        for future in concurrent.futures.as_completed(future_to_ticker):
            ticker = future_to_ticker[future]
            try:
                result = future.result()
                print(result)

                # Stop on error
                if result.startswith("ERROR"):
                    print(f"Stopping program due to error in {ticker}")
                    executor.shutdown(wait=False)
                    raise SystemExit(1)

            except Exception as exc:
                print(f'{ticker} generated an exception: {exc}')
                print("Stopping program due to exception")
                executor.shutdown(wait=False)
                raise SystemExit(1)

# Prepare your tickers
tickers_to_process = [company_tickers[num]['ticker'] for num in company_tickers.keys()]

# Process them
parallel_process_stocks(tickers_to_process, max_workers=5)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Already processed AYI
Already processed ESRT
Already processed GL
Already processed MAG
Already processed ACVA
Already processed EOLS
Already processed VIAV
Already processed BMA
Already processed TIPT
Already processed WERN
Already processed FNB
Already processed SNEX
Already processed VET
Already processed JGSMY
Already processed WMS
Already processed PEN
Already processed SEZL
Already processed THQ
Already processed PHK
Already processed KLIC
Already processed RGEN
Already processed SAIC
Already processed HTGC
Already processed MODG
Already processed HAYW
Already processed ETD
Already processed EVTC
Already processed JILL
Already processed CRRFY
Already processed SKX
Already processed CAKE
Already processed HUBG
Already processed TCBK
Already processed FROG
Already processed HLIO
Already processed PATH
Already processed IQ
Already processed HIX
Already processed NOAH
Already processed CPRX
Already processed IPG
Already

ERROR:yfinance:500 Server Error: Internal Server Error for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/THCPW?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=THCPW&crumb=xQhtLoVVoIs


Processed UNOV successfully
Processed IRAAW successfully
Processed THCPU successfully
Processed ARQQW successfully
Processed THCPW successfully
Processed MSCF successfully
Processed MHNC successfully
Processed GDL-PC successfully
Processed DSAQW successfully
Processed SCCC successfully
Processed SCCD successfully
Processed CTLPP successfully
Processed SACH-PA successfully
Processed GFAIW successfully
Processed SCCF successfully
Processed SCCG successfully
Processed SCCE successfully
Processed SACC successfully
Processed BLEUR successfully
Processed BLEUU successfully
Processed BLEUW successfully
Processed TEN-PE successfully
Processed OPTXW successfully
Processed OUST-WT successfully
Processed DSAQU successfully
Processed TEN-PF successfully
Processed OUST-WTA successfully
Processed NMPWP successfully
Processed DCFWQ successfully
Processed PLMJW successfully
Processed PLMJU successfully
Processed NEWEN successfully
Processed NMKCP successfully
Processed NMKBP successfully
Processed RBM

ERROR:yfinance:404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/ADZCF?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=ADZCF&crumb=xQhtLoVVoIs


Processed SBXD-UN successfully
Processed ADZCF successfully
Processed DGP successfully
Processed OLOXF successfully
Processed BW-PA successfully
Processed DGZ successfully
Processed DZZ successfully
Processed CTSWF successfully
Processed CAPNU successfully
Processed CTSUF successfully
Processed FRSPF successfully
Processed CAPNR successfully
Processed SAT successfully
Processed SAY successfully
Processed SAZ successfully
Processed CFR-PB successfully
Processed MKFGW successfully
Processed SAJ successfully
Processed GRND-WT successfully
Processed ADSEW successfully
Processed RELIW successfully
Processed GMTH successfully
Processed CNDAW successfully
Processed WBS-PG successfully
Processed CNDAU successfully
Processed NCPLW successfully
Processed KHOB successfully
Processed FFHPF successfully
Processed FAXRF successfully
Processed CODI-PC successfully
Processed BLUAW successfully
Processed FXFHF successfully
Processed FRFZF successfully
Processed FITBO successfully
Processed FITBP succes

ERROR:yfinance:500 Server Error: Internal Server Error for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/DMYY-WT?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=DMYY-WT&crumb=xQhtLoVVoIs


Processed DMYY-UN successfully
Processed CHEB-WT successfully
Processed DMYY-WT successfully
Processed CHEB-UN successfully
Processed ATEK-UN successfully
Processed ATEK-WT successfully
Processed LUNRW successfully
Processed IONQ-WT successfully
Processed XFOWW successfully
Processed NMHIW successfully
Processed TBMCR successfully
Processed AGXRW successfully
Processed CRTDW successfully
Processed MDNC successfully
Processed BURUW successfully
Processed AP-WT successfully
Processed GDEVW successfully
Processed WTFCP successfully
Processed GLTK successfully
Processed VAL-WT successfully
Processed HSPOR successfully
Processed CICB successfully
Processed HSPOW successfully
Processed HSPOU successfully
Processed HUDAR successfully
Processed HUDAU successfully
Processed ABLLL successfully
Processed ABLLW successfully
Processed RCFA-WT successfully
Processed RCFA-UN successfully
Processed VLN-WT successfully
Processed VCICU successfully
Processed VCICW successfully
Processed LNZAW successful

ERROR:yfinance:500 Server Error: Internal Server Error for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/LUCYW?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=LUCYW&crumb=xQhtLoVVoIs


Processed LUCYW successfully
Processed BIPH successfully
Processed BIPJ successfully
Processed BRIPF successfully
Processed BIPI successfully
Processed MNQFF successfully
Processed STRRP successfully
Processed MNLCF successfully
Processed BIP-PB successfully
Processed BIP-PA successfully
Processed MNUFF successfully
Processed HYZNW successfully
Processed CLDT-PA successfully
Processed JOCM successfully
Processed ATHS successfully
Processed ATH-PB successfully
Processed ATH-PC successfully
Processed SAIHW successfully
Processed ATH-PD successfully
Processed ATH-PE successfully
Processed SIMAU successfully
Processed BKKT-WT successfully
Processed SIMAW successfully
Processed ICUCW successfully
Processed ALSAR successfully
Processed ALSAU successfully
Processed ALSAW successfully
Processed GRABW successfully
Processed ATLCL successfully
Processed ATLCZ successfully
Processed HSDTW successfully
Processed CRGOW successfully
Processed ATLCP successfully
Processed AGM-PE successfully
Processe

ERROR:yfinance:500 Server Error: Internal Server Error for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/PFTAW?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=PFTAW&crumb=xQhtLoVVoIs


Processed COF-PJ successfully
Processed PFTAU successfully
Processed PFTAW successfully
Processed LIFWZ successfully
Processed LIFWW successfully
Processed INPAP successfully
Processed COF-PK successfully
Processed COF-PL successfully
Processed COF-PN successfully
Processed DHAIW successfully
Processed CRESW successfully
Processed PSPX successfully
Processed EPDU successfully
Processed SBEV-WT successfully
Processed PROCW successfully
Processed MYPSW successfully
Processed LMMY successfully
Processed BFRIW successfully
Processed AMBI-WT successfully
Processed TFC-PI successfully
Processed BMTX-WT successfully
Processed TFC-PO successfully
Processed TFC-PR successfully
Processed BWVTF successfully
Processed ATMP successfully
Processed EVVAQ successfully
Processed COWTF successfully
Processed DTSTW successfully
Processed DJP successfully
Processed VXZ successfully
Processed VXX successfully
Processed PGMFF successfully
Processed GRN successfully
Processed JJCTF successfully
Processed JJE

In [8]:
uploaded_stocks = []
for ids in pinecone_index.list(namespace=namespace):
    uploaded_stocks.extend(ids)

print(f"Uploaded stocks: {len(uploaded_stocks)}")

Uploaded stocks: 9998


In [9]:
# check if all stocks in tickers_to_process in uploaded_stocks
all(item in uploaded_stocks for item in tickers_to_process)

True

In [None]:
query = "What is the best stock to buy?"

In [None]:
raw_query_embedding = get_huggingface_embeddings(query)

In [None]:
top_matches = pinecone_index.query(vector=raw_query_embedding.tolist(), top_k=10, include_metadata=True, namespace=namespace)

In [None]:
top_matches

{'matches': [{'id': 'MKTW',
              'metadata': {'Business Summary': 'MarketWise, Inc. operates a '
                                               'content and technology '
                                               'multi-brand platform for '
                                               'self-directed investors in the '
                                               'United States and '
                                               'Internationally. Its platform '
                                               'includes subscription '
                                               'businesses that provides '
                                               'financial research, software, '
                                               'education, and tools to '
                                               'navigate the financial '
                                               'markets. The company offers '
                                               'various investmen

In [None]:
contexts = [item['metadata']['text'] for item in top_matches['matches']]

In [None]:
augmented_query = "<CONTEXT>\n" + "\n\n-------\n\n".join(contexts[ : 10]) + "\n-------\n</CONTEXT>\n\n\n\nMY QUESTION:\n" + query

In [None]:
print(augmented_query)

<CONTEXT>
Honeywell International Inc. engages in the aerospace technologies, building automation, energy and sustainable solutions, and industrial automation businesses in the United States, Europe, and internationally. The company's Aerospace segment offers auxiliary power units, propulsion engines, integrated avionics, environmental control and electric power systems, engine controls, flight safety, communications, navigation hardware, data and software applications, radar and surveillance systems, aircraft lighting, advanced systems and instruments, satellite and space components, and aircraft wheels and brakes; spare parts; repair, overhaul, and maintenance services; and thermal systems, as well as wireless connectivity services. Its Honeywell Building Technologies segment provides software applications for building control and optimization; sensors, switches, control systems, and instruments for energy management; access control; video surveillance; fire products; and installatio

# Setting up Groq for RAG

1. Get your Groq API Key [here](https://console.groq.com/keys). Through Groq, you get free access to various LLMs

2. Paste your Groq API Key into your Google Colab secrets, and make sure to enable permissions for it

![Screenshot 2024-11-25 at 12 00 16 AM](https://github.com/user-attachments/assets/e5525d29-bca6-4dbd-892b-cc770a6b281d)

In [None]:
client = OpenAI(
  base_url="https://api.groq.com/openai/v1",
  api_key=userdata.get("GROQ_API_KEY")
)

In [None]:
system_prompt = f"""You are an expert at providing answers about stocks. Please answer my question provided.
"""

llm_response = client.chat.completions.create(
    model="llama-3.1-70b-versatile",
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": augmented_query}
    ]
)

response = llm_response.choices[0].message.content

In [None]:
print(response)

Based on the context provided, some companies that manufacture consumer hardware include:

1. Advanced Micro Devices, Inc. (AMD): They manufacture x86 microprocessors and graphics processing units (GPUs) for consumer PCs and laptops, as well as embedded processors for various applications.
2. NVIDIA Corporation: They manufacture graphics processing units (GPUs) for gaming PCs, laptops, and mobile devices, as well as specialized hardware for professional workflows and data center applications.
3. Applied Materials, Inc.: They provide manufacturing equipment and services to the semiconductor industry, which is used to produce consumer hardware devices such as smartphones, laptops, and desktop PCs.
4. Texas Instruments Incorporated: They manufacture a wide range of semiconductor products, including microcontrollers, data converters, and power management devices, which are used in various consumer hardware devices such as smartphones, smart home devices, and gaming consoles.

Additionally,