In [0]:
# Test Alpha Vantage API connection
import requests
import json

# Your API key
API_KEY = "5DUCAA0WEXYCTWHG"  # Replace with your actual key

# Get Apple stock data
url = f"https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=AAPL&apikey={API_KEY}&outputsize=compact"

response = requests.get(url)
data = response.json()

# Print results
print("API Response Status:", response.status_code)

if "Time Series (Daily)" in data:
    print("API connection successful!")
    print(f"Retrieved data for {len(data['Time Series (Daily)'])} days")
    
    # Show first date's data
    first_date = list(data['Time Series (Daily)'].keys())[0]
    print(f"\nSample data for {first_date}:")
    print(json.dumps(data['Time Series (Daily)'][first_date], indent=2))
else:
    print("API error:", data.get("Note", data.get("Error Message", "Unknown error")))
# ```

# **Replace `YOUR_ALPHA_VANTAGE_KEY_HERE` with your actual API key**

# **Click:** Run button (▶️) or press **Shift+Enter**

# **You should see:**
# ```
# API Response Status: 200
# ✅ API connection successful!
# Retrieved data for 100 days

# Sample data for 2025-10-17:
# {
#   "1. open": "178.50",
#   "2. high": "180.20",
#   "3. low": "177.80",
#   "4. close": "179.45",
#   "5. volume": "52847392"
# }
# ```

# **If you see this: YOU'RE READY TO BUILD!** 🎉

# ---

# # 📊 YOUR DATABRICKS PROJECT (Updated Plan)

# ## **What You'll Build This Weekend:**

# ### **Same Portfolio Project, Different Platform:**

# **Project:** "Financial Market Analytics Platform on Databricks"

# **Architecture:**
# ```
# Alpha Vantage API
#        ↓
#   (Python HTTP request)
#        ↓
# Bronze Layer (Raw JSON - Delta Lake)
#        ↓
#   (PySpark transformations)
#        ↓
# Silver Layer (Cleansed - Delta Lake)
#        ↓
#   (Aggregations)
#        ↓
# Gold Layer (Analytics - Delta Lake)
#        ↓
#   (Optional: Export or visualize)
# ```

# **Technologies Shown:**
# - ✅ Databricks (notebooks, clusters)
# - ✅ Delta Lake (Bronze/Silver/Gold)
# - ✅ PySpark (transformations)
# - ✅ Python (API integration)
# - ✅ Medallion Architecture (industry standard)
# - ✅ Data Quality (validation, deduplication)

# ---

# ## **Weekend Project Plan (Adjusted for Databricks):**

# ### **Saturday (4 hours):**

# **Hour 1: Bronze Layer**
# - Create notebook: `02_Ingest_to_Bronze`
# - Pull data from Alpha Vantage API
# - Write to Delta Lake (Bronze)
# - Test with 5 stocks (AAPL, MSFT, GOOGL, AMZN, TSLA)

# **Hour 2: Silver Layer**
# - Create notebook: `03_Bronze_to_Silver`
# - Read from Bronze
# - Cleanse data (deduplicate, validate, type conversions)
# - Add derived columns (daily_change_pct, etc.)
# - Write to Delta Lake (Silver)

# **Hour 3: Gold Layer**
# - Create notebook: `04_Silver_to_Gold`
# - Create aggregations:
#   - Daily stock summary
#   - Top gainers/losers
#   - Volatility analysis
# - Write to Delta Lake (Gold)

# **Hour 4: Polish**
# - OPTIMIZE Delta tables (Z-ORDER)
# - Add comments to code
# - Test end-to-end
# - Take screenshots

# ---

# ### **Sunday (2-3 hours):**

# **Hour 1: Documentation**
# - Export notebooks from Databricks (File → Export → .ipynb)
# - Create GitHub repo
# - Write README.md with:
#   - Architecture diagram
#   - Setup instructions
#   - Screenshots
#   - Skills demonstrated

# **Hour 2: GitHub Publishing**
# - Upload notebooks to GitHub
# - Add architecture diagram (draw.io or PowerPoint)
# - Add sample outputs (screenshots of tables)
# - Make repo public

# **Hour 3: LinkedIn Post**
# - Write post about your project
# - Include GitHub link
# - Mention: Databricks, Delta Lake, PySpark, Medallion Architecture
# - Tag: #Databricks #DataEngineering #DeltaLake

# ---

# # 💪 WHY THIS IS ACTUALLY BETTER THAN FABRIC

# ## **Advantages of Databricks for Your Portfolio:**

# ### **1. You're More Familiar**
# - ✅ You used Databricks at IBM (50+ pipelines!)
# - ✅ Less learning curve = faster build
# - ✅ Can showcase advanced features (optimization, performance tuning)

# ### **2. More Relevant to Job Applications**
# - ✅ Agilus/Metergy: "Databricks Administration" required
# - ✅ Federal Gov: "Databricks" mentioned
# - ✅ Most Azure data engineering roles mention Databricks
# - ✅ **Your project directly matches job requirements!**

# ### **3. Better for Interviews**
# - ✅ "Tell me about a recent Databricks project" → You have one!
# - ✅ Can discuss: cluster optimization, Delta Lake, PySpark
# - ✅ Shows continuous learning (built project AFTER IBM role)

# ### **4. Demonstrates Key Skills**
# - ✅ Delta Lake (table format)
# - ✅ PySpark (data transformations)
# - ✅ Medallion Architecture (Bronze/Silver/Gold)
# - ✅ Performance optimization (OPTIMIZE, Z-ORDER)
# - ✅ **These are the SAME skills needed for Fabric!**

# ### **5. Portfolio Diversity**
# - ✅ IBM experience = large-scale Databricks
# - ✅ This project = end-to-end architecture
# - ✅ Shows you can build from scratch (not just maintain)

# ---

# # 🎯 YOUR UPDATED RESUME BULLET (Add This!)

# ## **When You Complete the Project, Add:**
# ```
# Personal Projects Section:

# Financial Market Analytics Platform (Databricks)                           2025
# github.com/joseveliz/databricks-market-analytics

# - Built end-to-end data analytics platform on Databricks using medallion architecture
#   (Bronze/Silver/Gold layers) with Delta Lake for ACID transactions
  
# - Ingested real-time stock market data via REST APIs, processing 5 stocks with 100+ days
#   of historical data through automated PySpark transformations
  
# - Implemented data quality validation, deduplication, and type conversions in Silver layer,
#   reducing data errors by 100% through automated checks
  
# - Created analytics-ready Gold layer with pre-aggregated metrics (daily summaries, top
#   gainers/losers, volatility analysis) optimized with OPTIMIZE and Z-ORDER commands
  
# - Demonstrated end-to-end data engineering skills: API integration, Delta Lake, PySpark,
#   medallion architecture, performance optimization

Technologies: Databricks, Delta Lake, PySpark, Python, REST APIs, Medallion Architecture