# Solar Analytics - Data Exploration

This notebook explores the data collected from NREL, OpenWeather, and Tomorrow.io APIs.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sqlalchemy import create_engine
from dotenv import load_dotenv
import os

# Load environment variables
load_dotenv()

# Create database connection
db_url = f"postgresql://{os.getenv('DB_USER')}:{os.getenv('DB_PASSWORD')}@{os.getenv('DB_HOST')}/solar_analytics"
engine = create_engine(db_url)

print("✅ Connected to database")

## 1. Load Data from Database

In [None]:
# Load NREL solar data
nrel_query = """
SELECT * FROM api_ingest.nrel_pvdaq 
WHERE timestamp IS NOT NULL
ORDER BY timestamp
"""
df_nrel = pd.read_sql(nrel_query, engine)
print(f"NREL data: {len(df_nrel)} records")

# Load weather data
weather_query = "SELECT * FROM api_ingest.weather_test ORDER BY timestamp"
df_weather = pd.read_sql(weather_query, engine)
print(f"Weather data: {len(df_weather)} records")

# Load Tomorrow.io forecasts
tomorrow_query = "SELECT * FROM api_ingest.tomorrow_weather ORDER BY valid_time"
df_tomorrow = pd.read_sql(tomorrow_query, engine)
print(f"Tomorrow.io data: {len(df_tomorrow)} records")

## 2. NREL Solar Resource Analysis

In [None]:
# Separate monthly and hourly data
df_monthly = df_nrel[df_nrel['site_id'] == 'NREL_MONTHLY'].copy()
df_hourly = df_nrel[df_nrel['site_id'] == 'PVWATTS_SIM'].copy()

# Plot monthly solar irradiance
if len(df_monthly) > 0:
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
    
    # GHI by month
    df_monthly['month'] = pd.to_datetime(df_monthly['timestamp']).dt.month_name()
    ax1.bar(range(len(df_monthly)), df_monthly['ghi'])
    ax1.set_xticks(range(len(df_monthly)))
    ax1.set_xticklabels(df_monthly['month'], rotation=45)
    ax1.set_ylabel('GHI (kWh/m²/day)')
    ax1.set_title('Monthly Average Global Horizontal Irradiance')
    
    # DNI by month
    ax2.bar(range(len(df_monthly)), df_monthly['dni'], color='orange')
    ax2.set_xticks(range(len(df_monthly)))
    ax2.set_xticklabels(df_monthly['month'], rotation=45)
    ax2.set_ylabel('DNI (kWh/m²/day)')
    ax2.set_title('Monthly Average Direct Normal Irradiance')
    
    plt.tight_layout()
    plt.show()

## 3. PV System Output Analysis

In [None]:
# Analyze hourly PV output
if len(df_hourly) > 0:
    df_hourly['hour'] = pd.to_datetime(df_hourly['timestamp']).dt.hour
    
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8))
    
    # AC power output by hour
    ax1.plot(df_hourly['hour'], df_hourly['ac_power'], 'b-', linewidth=2)
    ax1.set_xlabel('Hour of Day')
    ax1.set_ylabel('AC Power (W)')
    ax1.set_title('Simulated PV System Output (4kW System)')
    ax1.grid(True, alpha=0.3)
    
    # POA irradiance vs AC power
    ax2.scatter(df_hourly['poa_irradiance'], df_hourly['ac_power'], alpha=0.6)
    ax2.set_xlabel('Plane of Array Irradiance (W/m²)')
    ax2.set_ylabel('AC Power (W)')
    ax2.set_title('Power Output vs Irradiance')
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    # Calculate capacity factor
    system_capacity = 4000  # 4kW system
    avg_output = df_hourly['ac_power'].mean()
    capacity_factor = avg_output / system_capacity
    print(f"\nAverage capacity factor: {capacity_factor:.1%}")

## 4. Weather Forecast Analysis

In [None]:
# Analyze Tomorrow.io solar forecasts
if len(df_tomorrow) > 0:
    df_tomorrow['hour'] = pd.to_datetime(df_tomorrow['valid_time']).dt.hour
    df_tomorrow['forecast_age'] = (pd.to_datetime(df_tomorrow['valid_time']) - 
                                  pd.to_datetime(df_tomorrow['forecast_time'])).dt.total_seconds() / 3600
    
    # Plot solar GHI forecast
    plt.figure(figsize=(12, 6))
    plt.plot(df_tomorrow['valid_time'], df_tomorrow['solar_ghi'], 'g-', linewidth=2)
    plt.xlabel('Time')
    plt.ylabel('Solar GHI (W/m²)')
    plt.title('48-Hour Solar Irradiance Forecast')
    plt.xticks(rotation=45)
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()
    
    # Temperature and cloud cover
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8), sharex=True)
    
    ax1.plot(df_tomorrow['valid_time'], df_tomorrow['temperature'], 'r-', linewidth=2)
    ax1.set_ylabel('Temperature (°C)')
    ax1.set_title('Temperature Forecast')
    ax1.grid(True, alpha=0.3)
    
    ax2.fill_between(df_tomorrow['valid_time'], 0, df_tomorrow['cloud_cover'], alpha=0.5)
    ax2.set_ylabel('Cloud Cover (%)')
    ax2.set_xlabel('Time')
    ax2.set_title('Cloud Cover Forecast')
    ax2.grid(True, alpha=0.3)
    
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()