# Initial Setup and Configuration

This notebook sets up the Databricks environment and loads initial data.

In [None]:
# Databricks notebook source
# MAGIC %md
# MAGIC ## Step 1: Install Required Libraries
# MAGIC 
# MAGIC Install all required Python packages.

%pip install yfinance>=0.2.0
%pip install alpha-vantage>=2.3.0
%pip install fredapi>=0.5.0
%pip install openai>=1.0.0
%pip install anthropic>=0.18.0
%pip install pydantic>=2.0.0
%pip install python-dotenv>=1.0.0

In [None]:
# MAGIC %md
# MAGIC ## Step 2: Configure API Keys
# MAGIC 
# MAGIC Set up API keys from Databricks Secrets or environment variables.

import os

# Option 1: Use Databricks Secrets (Recommended)
try:
    openai_key = dbutils.secrets.get(scope="stocks_ai_secrets", key="openai_api_key")
    os.environ['OPENAI_API_KEY'] = openai_key
    print("✓ OpenAI API key loaded from secrets")
except Exception as e:
    print(f"⚠ OpenAI key not found in secrets: {e}")

try:
    alpha_vantage_key = dbutils.secrets.get(scope="stocks_ai_secrets", key="alpha_vantage_api_key")
    os.environ['ALPHA_VANTAGE_API_KEY'] = alpha_vantage_key
    print("✓ Alpha Vantage API key loaded from secrets")
except Exception as e:
    print(f"⚠ Alpha Vantage key not found in secrets: {e}")

try:
    fred_key = dbutils.secrets.get(scope="stocks_ai_secrets", key="fred_api_key")
    os.environ['FRED_API_KEY'] = fred_key
    print("✓ FRED API key loaded from secrets")
except Exception as e:
    print(f"⚠ FRED key not found in secrets: {e}")

# Option 2: Use environment variables (if set on cluster)
# Keys should already be in os.environ if set on cluster

In [None]:
# MAGIC %md
# MAGIC ## Step 3: Set Up Python Path
# MAGIC 
# MAGIC Update path to point to your repository.

import sys

# Update this path to your actual repository location
# Option 1: If using Databricks Repos
repo_path = '/Workspace/Repos/your-username/stocks-ai/stocks'

# Option 2: If uploaded to workspace
# repo_path = '/Workspace/Users/your-email@company.com/stocks'

if repo_path not in sys.path:
    sys.path.append(repo_path)
    print(f"✓ Added {repo_path} to Python path")
else:
    print(f"✓ Path already configured: {repo_path}")

In [None]:
# MAGIC %md
# MAGIC ## Step 4: Test Basic Data Loading
# MAGIC 
# MAGIC Test Yahoo Finance before full implementation.

import yfinance as yf
from datetime import date

# Test Yahoo Finance
print("Testing Yahoo Finance...")
ticker = yf.Ticker("AAPL")
info = ticker.info
print(f"✓ Company: {info.get('longName', 'N/A')}")
print(f"✓ Sector: {info.get('sector', 'N/A')}")
print(f"✓ Industry: {info.get('industry', 'N/A')}")

# Test price data
hist = ticker.history(period="5d")
if not hist.empty:
    print(f"✓ Latest price: ${hist['Close'].iloc[-1]:.2f}")
    print(f"✓ Data loaded successfully!")
else:
    print("⚠ No price data available")

In [None]:
# MAGIC %md
# MAGIC ## Step 5: Load Fortune 100 Companies
# MAGIC 
# MAGIC Load initial company master data.

from pyspark.sql import SparkSession
from datetime import datetime

spark = SparkSession.builder.getOrCreate()

# Fortune 100 companies (top 20 as example)
fortune100_symbols = [
    "AAPL", "MSFT", "GOOGL", "AMZN", "NVDA",
    "META", "TSLA", "BRK.B", "V", "JNJ",
    "WMT", "PG", "MA", "UNH", "HD",
    "DIS", "BAC", "ADBE", "NFLX", "CRM"
    # Add more as needed
]

# Load companies
companies = []
for symbol in fortune100_symbols:
    try:
        ticker = yf.Ticker(symbol)
        info = ticker.info
        
        companies.append({
            "symbol": symbol,
            "company_name": info.get("longName", symbol),
            "sector": info.get("sector", "Unknown"),
            "industry": info.get("industry", "Unknown"),
            "market_cap": info.get("marketCap"),
            "fortune_rank": None,
            "added_date": datetime.now(),
            "updated_date": datetime.now()
        })
        print(f"✓ Loaded {symbol}: {info.get('longName', symbol)}")
    except Exception as e:
        print(f"✗ Failed to load {symbol}: {e}")

if companies:
    df = spark.createDataFrame(companies)
    df.write.format("delta").mode("overwrite").saveAsTable("stocks_ai.fortune100.companies")
    print(f"\n✓ Saved {len(companies)} companies to Delta table")
    df.show(truncate=False)
else:
    print("✗ No companies loaded")

In [None]:
# MAGIC %md
# MAGIC ## Step 6: Verify Setup
# MAGIC 
# MAGIC Check that everything is configured correctly.

# MAGIC %sql
# MAGIC -- Verify companies table
# MAGIC SELECT COUNT(*) as company_count FROM stocks_ai.fortune100.companies;

In [None]:
print("\n" + "="*80)
print("SETUP VERIFICATION")
print("="*80)

checks = {
    "Python path configured": repo_path in sys.path,
    "OpenAI key available": os.getenv('OPENAI_API_KEY') is not None,
    "Yahoo Finance works": len(companies) > 0,
    "Delta table accessible": True
}

for check, status in checks.items():
    status_icon = "✓" if status else "✗"
    print(f"{status_icon} {check}")

# Verify Delta table
try:
    count = spark.sql("SELECT COUNT(*) as cnt FROM stocks_ai.fortune100.companies").collect()[0]['cnt']
    print(f"✓ Companies in table: {count}")
except Exception as e:
    print(f"✗ Delta table error: {e}")

if all(checks.values()):
    print("\n✅ Phase 1 Setup Complete!")
    print("You can now proceed to Phase 2: Core Infrastructure")
else:
    print("\n⚠️  Some checks failed. Please review the setup steps above.")