# Initial Setup and Configuration

This notebook sets up the Databricks environment and loads initial data.

In [None]:
# Databricks notebook source
# MAGIC %md
# MAGIC ## Step 1: Install Required Libraries
# MAGIC 
# MAGIC Install all required Python packages.

%pip install yfinance>=0.2.0
%pip install alpha-vantage>=2.3.0
%pip install fredapi>=0.5.0
%pip install openai>=1.0.0
%pip install anthropic>=0.18.0
%pip install pydantic>=2.0.0
%pip install python-dotenv>=1.0.0

In [None]:
# MAGIC %md
# MAGIC ## Step 2: Configure API Keys
# MAGIC 
# MAGIC Set up API keys from Databricks Secrets or environment variables.

import os

# Option 1: Use Databricks Secrets (Recommended)
try:
    openai_key = dbutils.secrets.get(scope="stocks_ai_secrets", key="openai_api_key")
    os.environ['OPENAI_API_KEY'] = openai_key
    print("✓ OpenAI API key loaded from secrets")
except Exception as e:
    print(f"⚠ OpenAI key not found in secrets: {e}")

try:
    alpha_vantage_key = dbutils.secrets.get(scope="stocks_ai_secrets", key="alpha_vantage_api_key")
    os.environ['ALPHA_VANTAGE_API_KEY'] = alpha_vantage_key
    print("✓ Alpha Vantage API key loaded from secrets")
except Exception as e:
    print(f"⚠ Alpha Vantage key not found in secrets: {e}")

try:
    fred_key = dbutils.secrets.get(scope="stocks_ai_secrets", key="fred_api_key")
    os.environ['FRED_API_KEY'] = fred_key
    print("✓ FRED API key loaded from secrets")
except Exception as e:
    print(f"⚠ FRED key not found in secrets: {e}")

# Option 2: Use environment variables (if set on cluster)
# Keys should already be in os.environ if set on cluster

In [None]:
# MAGIC %md
# MAGIC ## Step 3: Set Up Python Path
# MAGIC 
# MAGIC Automatically detect repository path or set manually.

import sys
import os

# Try to auto-detect repository path
repo_path = None

# Option 1: Check if running in Databricks Repos
try:
    # Get current notebook path
    notebook_path = dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()
    if '/Repos/' in notebook_path:
        # Extract repo path
        parts = notebook_path.split('/Repos/')
        if len(parts) > 1:
            repo_base = '/Workspace/Repos/' + parts[1].split('/')[0] + '/stocks-ai-system'
            if os.path.exists(repo_base):
                repo_path = repo_base
except:
    pass

# Option 2: Check common workspace locations
if not repo_path:
    possible_paths = [
        '/Workspace/Repos/stocks-ai-system',
        '/Workspace/Users/' + os.getenv('USER', 'user') + '/stocks-ai-system',
    ]
    for path in possible_paths:
        if os.path.exists(path):
            repo_path = path
            break

# Option 3: Manual override (uncomment and set if auto-detection fails)
# repo_path = '/Workspace/Repos/your-username/stocks-ai-system'

if repo_path and repo_path not in sys.path:
    sys.path.insert(0, repo_path)
    print(f"✓ Added {repo_path} to Python path")
elif repo_path:
    print(f"✓ Path already configured: {repo_path}")
else:
    print("⚠ Could not auto-detect repository path. Please set repo_path manually.")
    print("Current working directory:", os.getcwd())
    print("Python path:", sys.path[:3])

In [None]:
# MAGIC %md
# MAGIC ## Step 4: Test Basic Data Loading
# MAGIC 
# MAGIC Test Yahoo Finance before full implementation.

import yfinance as yf
from datetime import date

# Test Yahoo Finance
print("Testing Yahoo Finance...")
ticker = yf.Ticker("AAPL")
info = ticker.info
print(f"✓ Company: {info.get('longName', 'N/A')}")
print(f"✓ Sector: {info.get('sector', 'N/A')}")
print(f"✓ Industry: {info.get('industry', 'N/A')}")

# Test price data
hist = ticker.history(period="5d")
if not hist.empty:
    print(f"✓ Latest price: ${hist['Close'].iloc[-1]:.2f}")
    print(f"✓ Data loaded successfully!")
else:
    print("⚠ No price data available")

In [None]:
# MAGIC %md
# MAGIC ## Step 5: Load Fortune 100 Companies
# MAGIC 
# MAGIC Load initial company master data.

from pyspark.sql import SparkSession
from datetime import datetime

spark = SparkSession.builder.getOrCreate()

# Fortune 100 companies (top 20 as example)
fortune100_symbols = [
    "AAPL", "MSFT", "GOOGL", "AMZN", "NVDA",
    "META", "TSLA", "BRK.B", "V", "JNJ",
    "WMT", "PG", "MA", "UNH", "HD",
    "DIS", "BAC", "ADBE", "NFLX", "CRM"
    # Add more as needed
]

# Load companies
companies = []
for symbol in fortune100_symbols:
    try:
        ticker = yf.Ticker(symbol)
        info = ticker.info
        
        companies.append({
            "symbol": symbol,
            "company_name": info.get("longName", symbol),
            "sector": info.get("sector", "Unknown"),
            "industry": info.get("industry", "Unknown"),
            "market_cap": info.get("marketCap"),
            "fortune_rank": None,
            "added_date": datetime.now(),
            "updated_date": datetime.now()
        })
        print(f"✓ Loaded {symbol}: {info.get('longName', symbol)}")
    except Exception as e:
        print(f"✗ Failed to load {symbol}: {e}")

if companies:
    df = spark.createDataFrame(companies)
    df.write.format("delta").mode("overwrite").saveAsTable("stocks_ai.fortune100.companies")
    print(f"\n✓ Saved {len(companies)} companies to Delta table")
    df.show(truncate=False)
else:
    print("✗ No companies loaded")

In [None]:
# MAGIC %md
# MAGIC ## Step 6: Verify SQL Setup
# MAGIC 
# MAGIC First, verify that Unity Catalog schemas were created by running `setup/init.sql` in SQL Editor.
# MAGIC Then check that the companies table exists.

# MAGIC %sql
# MAGIC -- Verify catalog exists
# MAGIC SHOW CATALOGS LIKE 'stocks_ai';

# MAGIC %sql
# MAGIC -- Verify schemas exist
# MAGIC SHOW SCHEMAS IN stocks_ai;

# MAGIC %sql
# MAGIC -- Verify companies table exists and check count
# MAGIC SELECT COUNT(*) as company_count FROM stocks_ai.fortune100.companies;

In [None]:
# MAGIC %md
# MAGIC ## Step 7: Set Up MLflow Experiment
# MAGIC 
# MAGIC Create MLflow experiment for tracking predictions and models.

import mlflow

# Create or set MLflow experiment
experiment_path = "/Shared/stocks_ai/experiments"
try:
    mlflow.set_experiment(experiment_path)
    print(f"✓ MLflow experiment set: {experiment_path}")
except Exception as e:
    print(f"⚠ Could not set MLflow experiment: {e}")
    print("You may need to create it manually or check permissions")

# Test logging
try:
    with mlflow.start_run(run_name="phase1_setup_test"):
        mlflow.log_param("phase", "phase1_setup")
        mlflow.log_param("test", "setup_verification")
        mlflow.log_metric("companies_loaded", len(companies) if 'companies' in locals() else 0)
        print("✓ MLflow logging test successful")
except Exception as e:
    print(f"⚠ MLflow logging test failed: {e}")

print("\n" + "="*80)
print("SETUP VERIFICATION")
print("="*80)

checks = {
    "Python path configured": repo_path in sys.path if 'repo_path' in locals() and repo_path else False,
    "OpenAI key available": os.getenv('OPENAI_API_KEY') is not None,
    "Yahoo Finance works": len(companies) > 0 if 'companies' in locals() else False,
    "Delta table accessible": False,
    "MLflow configured": True
}

# Verify Delta table
try:
    count = spark.sql("SELECT COUNT(*) as cnt FROM stocks_ai.fortune100.companies").collect()[0]['cnt']
    checks["Delta table accessible"] = True
    checks["Companies loaded"] = count > 0
    print(f"✓ Companies in table: {count}")
except Exception as e:
    print(f"✗ Delta table error: {e}")
    print("  Make sure you ran setup/init.sql in SQL Editor first!")

for check, status in checks.items():
    status_icon = "✓" if status else "✗"
    print(f"{status_icon} {check}")

# Final status
all_critical = all([
    checks.get("Python path configured", False),
    checks.get("Yahoo Finance works", False),
    checks.get("Delta table accessible", False)
])

if all_critical:
    print("\n✅ Phase 1 Setup Complete!")
    print("You can now proceed to Phase 2: Core Infrastructure")
else:
    print("\n⚠️  Some critical checks failed. Please review:")
    print("  1. Make sure you ran setup/init.sql in SQL Editor")
    print("  2. Verify Python path is set correctly")
    print("  3. Check that companies were loaded successfully")