In [None]:
# ===============================
# 1️⃣ Install Dependencies
# ===============================
!pip install openai pandas matplotlib alpha_vantage transformers accelerate bitsandbytes

# ===============================
# 2️⃣ A - Imports
# ===============================
import pandas as pd
import time
from alpha_vantage.timeseries import TimeSeries
import matplotlib.pyplot as plt
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from google.colab import userdata

# ===============================
# 2️⃣ B - List Creation
# ===============================

# Fortune 100 companies with stock symbols
data = [
    ["Walmart", "WMT"],
    ["Amazon", "AMZN"],
    ["UnitedHealth", "UNH"],
    ["Apple", "AAPL"],
    ["CVS Health", "CVS"],
    ["Berkshire Hathaway", "BRK.B"],
    ["Alphabet", "GOOGL"],
    ["Exxon Mobil", "XOM"],
    ["McKesson", "MCK"],
    ["Cencora", "CNC"],
    ["JPMorgan Chase", "JPM"],
    ["Costco", "COST"],
    ["Cigna", "CI"],
    ["Microsoft", "MSFT"],
    ["Cardinal Health", "CAH"],
    ["Chevron", "CVX"],
    ["Bank of America", "BAC"],
    ["General Motors", "GM"],
    ["Ford Motor", "F"],
    ["Elevance Health", "EVHC"],
    ["Citi", "C"],
    ["Meta", "META"],
    ["Centene", "CNC"],
    ["Home Depot", "HD"],
    ["Fannie Mae", "FNMA"],
    ["Walgreens", "WBA"],
    ["Kroger", "KR"],
    ["Phillips 66", "PSX"],
    ["Marathon Petroleum", "MPC"],
    ["Verizon", "VZ"],
    ["Nvidia", "NVDA"],
    ["Goldman Sachs", "GS"],
    ["Wells Fargo", "WFC"],
    ["Valero Energy", "VLO"],
    ["Comcast", "CMCSA"],
    ["State Farm", "STF"],  # Private company, no public ticker
    ["AT&T", "T"],
    ["Freddie Mac", "FMCC"],
    ["Humana", "HUM"],
    ["Morgan Stanley", "MS"],
    ["Target", "TGT"],
    ["StoneX", "SNEX"],
    ["Tesla", "TSLA"],
    ["Dell Technologies", "DELL"],
    ["PepsiCo", "PEP"],
    ["Walt Disney", "DIS"],
    ["UPS", "UPS"],
    ["Johnson & Johnson", "JNJ"],
    ["FedEx", "FDX"],
    ["Archer Daniels Midland", "ADM"],
    ["Procter & Gamble", "PG"],
    ["Lowe's", "LOW"],
    ["Energy Transfer", "ET"],
    ["RTX", "RTX"],
    ["Albertsons", "ACI"],  # Some may be private
    ["Sysco", "SYY"],
    ["Progressive", "PGR"],
    ["American Express", "AXP"],
    ["Lockheed Martin", "LMT"],
    ["MetLife", "MET"],
    ["HCA Healthcare", "HCA"],
    ["Prudential Financial", "PRU"],
    ["Boeing", "BA"],
    ["Caterpillar", "CAT"],
    ["Merck", "MRK"],
    ["Allstate", "ALL"],
    ["Pfizer", "PFE"],
    ["IBM", "IBM"],
    ["New York Life Insurance", "NYLI"],  # Private
    ["Delta Airlines", "DAL"],
    ["Publix Super Markets", "PUSH"],  # Private
    ["Nationwide", "NWG"],  # Private
    ["TD Synnex", "SNX"],
    ["United Airlines", "UAL"],
    ["ConocoPhillips", "COP"],
    ["TJX", "TJX"],
    ["AbbVie", "ABBV"],
    ["Enterprise Products Partners", "EPD"],
    ["Charter Communications", "CHTR"],
    ["Performance Food", "PFGC"],
    ["American Airlines", "AAL"],
    ["Capital One", "COF"],
    ["Cisco Systems", "CSCO"],
    ["HP", "HPQ"],
    ["Tyson Foods", "TSN"],
    ["Intel", "INTC"],
    ["Oracle", "ORCL"],
    ["Broadcom", "AVGO"],
    ["Deere", "DE"],
    ["Nike", "NKE"],
    ["Liberty Mutual Insurance", "LMIC"],  # Private
    ["Plains GP", "PAGP"],
    ["USAA", "USAA"],  # Private
    ["Bristol-Myers Squibb", "BMY"],
    ["Ingram Micro", "IM"],
    ["General Dynamics", "GD"],
    ["Coca-Cola", "KO"],
    ["TIAA", "TIAA"],  # Private
    ["Travelers", "TRV"],
    ["Eli Lilly", "LLY"]
]

# Convert to DataFrame and save as CSV
fortune100_df = pd.DataFrame(data[:25], columns=["Company", "Symbol"])
fortune100_df.to_csv("fortune100.csv", index=False)

print("fortune100.csv created successfully!")
fortune100_df.head(10)

# ===============================
# 3️⃣ Set Your Alpha Vantage API Key
# ===============================
ts = TimeSeries(key=userdata.get('API_KEY'), output_format='pandas')

# ===============================
# 4️⃣ Load Fortune 100 tickers
# ===============================
# Create a CSV file "fortune100.csv" with columns: Company,Symbol
fortune100 = pd.read_csv("fortune100.csv")

# ===============================
# 5️⃣ Fetch Daily Stock Data (Memory Efficient)
# ===============================
all_data = {}

for symbol in fortune100['Symbol']:
    try:
        data, meta = ts.get_daily(symbol=symbol, outputsize='compact')
        all_data[symbol] = data
        print(f"Fetched {symbol}")
        time.sleep(15)  # Respect free-tier rate limit
    except Exception as e:
        print(f"Error fetching {symbol}: {e}")

# ===============================
# 6️⃣ Summarize Stock Data
# ===============================
summaries = {}
for symbol, df in all_data.items():
    last_5 = df.head(5)
    trend = "up" if last_5['4. close'].iloc[0] > last_5['4. close'].iloc[-1] else "down"
    summaries[symbol] = {
        "latest_close": round(last_5['4. close'].iloc[0], 2),
        "5d_avg": round(last_5['4. close'].mean(), 2),
        "trend": trend
    }

# ===============================
# 7️⃣ Plot Last 30 Days for a Company
# ===============================
def plot_stock(symbol):
    if symbol in all_data:
        df = all_data[symbol]
        df['4. close'].head(30).plot(kind='line', title=f"{symbol} Closing Prices - Last 30 Days")
        plt.show()
    else:
        print(f"No data for {symbol}")

# ===============================
# 8️⃣ Load Falcon 7B Instruct Model (4-bit quantized)
# ===============================
model_name = "tiiuae/falcon-7b-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    load_in_4bit=True,        # 4-bit quantization for T4 GPU
    torch_dtype=torch.float16
)

# ===============================
# 9️⃣ Function to Query LLM
# ===============================
def query_stock_llm(user_query):
    # Detect Fortune 100 symbols mentioned in query
    symbols_in_query = [s for s in summaries.keys() if s.upper() in user_query.upper()]
    if not symbols_in_query:
        return "No known Fortune 100 company mentioned in your query."

    # Build context from summaries
    context_text = ""
    for s in symbols_in_query:
        summary = summaries[s]
        context_text += f"{s}: latest close ${summary['latest_close']}, 5-day avg ${summary['5d_avg']}, trend {summary['trend']}\n"

    prompt = f"""
You are a financial assistant. Answer the user's question based ONLY on the following stock summaries:

{context_text}

Question: {user_query}
Answer:
"""
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    outputs = model.generate(**inputs, max_new_tokens=150)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)


In [None]:
print("💡 Stock LLM ready! Type your query about Fortune 100 companies.")
print("👉 Example: 'Compare Apple and Amazon stock performance over the last 5 days.'")
print("👉 Example: 'plot for AAPL' to see a chart.")
print("Type 'exit' to stop.\n")

for _ in range(10):
    user_query = input("Enter your query: ")

    if user_query.lower() in ["exit", "quit", "q"]:
        print("👋 Exiting Stock LLM. Goodbye!")
        break

    if user_query.lower().startswith("plot for "):
        symbol = user_query.split("plot for ")[1].upper().strip()
        plot_stock(symbol)
        continue

    response = query_stock_llm(user_query)
    print("\n📊 Response:\n", response, "\n")
