## Main idea

Combine 
- fundamental,
- technical (including weekly RSI), and
- financial statements analysis

to find stocks that are attractive to buy long term (as a buy and hold).

Goal is to hold these medium/long term and not worry so much about the allocation, then when the stocks look less attractive, then sell them (or a portion).

In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from groq import Groq
import re
from datetime import datetime, timedelta

In [None]:
from dotenv import load_dotenv
import os

# Load environment variables
load_dotenv('groq_api.env')

In [None]:
# Initialize Groq client
groq_api_key = os.getenv('GROQ_API_KEY')
groq_client = Groq(api_key=groq_api_key)

## Data collection

### TODO
Extend the function below to get all, or top 100 sp500 stocks, plus any that we currently own

In [None]:
def get_top_sp500_stocks(n=20):
    # Declare top 10 stocks in S&P 500 (for demonstration)
    tickers = ['AAPL', 'MSFT', 'AMZN', 'GOOGL', 'GOOG', 'META', 'TSLA', 'BRK.B', 'JPM', 'V', 'DASH', 'NFLX', 'DIS', 'SBUX', 'BABA', 'NVDA', 'BIDU', 'XOM', 'PEG', 'CEG', 'BWXT', 'NEM', 'GFI', 'HMY', 'CVX', 'AVGO', 'HD', 'PG', 'WMT', 'JNJ', 'ABBV']
    
    # Get market cap for each stock
    market_caps = {}
    for ticker in tickers:
        stock = yf.Ticker(ticker)
        market_caps[ticker] = stock.info.get('marketCap', 0)
    
    # Sort by market cap and get top n
    top_stocks = sorted(market_caps.items(), key=lambda x: x[1], reverse=True)[:n]
    return [stock[0] for stock in top_stocks]

In [None]:
top_stocks = get_top_sp500_stocks(20)
pd.DataFrame(top_stocks, columns=['Ticker']).to_csv('top_20_stocks.csv', index=False)

## Retrieve financial data

Retrieve both historical price data and income statements for each stock over the past five years.

In [None]:
def get_financial_data(ticker, start_date, end_date):
    stock = yf.Ticker(ticker)
    
    # Get historical price data
    price_data = stock.history(start=start_date, end=end_date)
    
    # Get income statement
    income_statement = stock.financials
    
    return {
        "price_data": price_data,
        "income_statement": income_statement
    }

In [None]:
start_date = datetime.now() - timedelta(days=5*365)
end_date = datetime.now()

all_data = {}
for ticker in top_stocks:
    all_data[ticker] = get_financial_data(ticker, start_date, end_date)

In [None]:
# all_data

In [None]:
def format_income_statement_for_llm(income_statement_column):
    formatted_text = ""
    for index, value in income_statement_column.items():
        formatted_value = f"{value:,.2f}" if isinstance(value, (int, float)) else str(value)
        formatted_text += f"{index}: {formatted_value}\n"
    return formatted_text.strip()

In [None]:
# Example usage
for ticker, data in all_data.items():
    current_year = data['income_statement'].columns[0]
    formatted_statement = format_income_statement_for_llm(data['income_statement'][current_year])
    print(f"Formatted Income Statement for {ticker}:\n{formatted_statement}\n")