# PV Production Analysis

This notebook analyzes photovoltaic (PV) production patterns and their correlations with weather conditions.

## Analysis Goals
- Identify daily and seasonal PV production patterns
- Correlate PV output with weather parameters (irradiance, cloud cover, temperature)
- Analyze system efficiency and performance degradation
- Forecast production based on weather predictions

## 1. Setup and Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from influxdb_client import InfluxDBClient
from datetime import datetime, timedelta
import pytz
from scipy import stats
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Set up plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette('husl')

# Configure pandas display
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

## 2. Database Connection

In [None]:
# InfluxDB connection parameters
INFLUX_URL = "http://localhost:8086"
INFLUX_TOKEN = "your-token-here"
INFLUX_ORG = "loxone"
INFLUX_BUCKET = "loxone"

# Initialize InfluxDB client
client = InfluxDBClient(url=INFLUX_URL, token=INFLUX_TOKEN, org=INFLUX_ORG)
query_api = client.query_api()

## 3. Data Loading Functions

In [None]:
def load_pv_data(start_date, end_date):
    """Load PV production data from InfluxDB"""
    query = f'''
    from(bucket: "{INFLUX_BUCKET}")
        |> range(start: {start_date}, stop: {end_date})
        |> filter(fn: (r) => r["_measurement"] == "power")
        |> filter(fn: (r) => r["_field"] =~ /pv_production|solar_power/)
        |> aggregateWindow(every: 5m, fn: mean, createEmpty: false)
        |> yield(name: "pv_data")
    '''
    # Execute query and convert to DataFrame
    result = query_api.query_data_frame(query)
    return result

def load_weather_data(start_date, end_date):
    """Load weather data from InfluxDB"""
    query = f'''
    from(bucket: "weather_forecast")
        |> range(start: {start_date}, stop: {end_date})
        |> filter(fn: (r) => r["_measurement"] == "weather")
        |> filter(fn: (r) => r["_field"] =~ /solar_radiation|cloud_cover|temperature/)
        |> aggregateWindow(every: 5m, fn: mean, createEmpty: false)
        |> yield(name: "weather_data")
    '''
    # Execute query and convert to DataFrame
    result = query_api.query_data_frame(query)
    return result

## 4. Load and Prepare Data

In [None]:
# Define analysis period
end_date = datetime.now(pytz.UTC)
start_date = end_date - timedelta(days=30)

# Load data
print(f"Loading data from {start_date} to {end_date}")
pv_data = load_pv_data(start_date.isoformat(), end_date.isoformat())
weather_data = load_weather_data(start_date.isoformat(), end_date.isoformat())

print(f"PV data shape: {pv_data.shape}")
print(f"Weather data shape: {weather_data.shape}")

## 5. Daily Production Pattern Analysis

In [None]:
# Analyze daily production patterns
# Add hour of day column
pv_data['hour'] = pd.to_datetime(pv_data['_time']).dt.hour
pv_data['date'] = pd.to_datetime(pv_data['_time']).dt.date

# Calculate hourly averages
hourly_avg = pv_data.groupby('hour')['_value'].mean()

# Plot daily production curve
plt.figure(figsize=(12, 6))
plt.plot(hourly_avg.index, hourly_avg.values, 'b-', linewidth=2)
plt.fill_between(hourly_avg.index, hourly_avg.values, alpha=0.3)
plt.xlabel('Hour of Day')
plt.ylabel('Average PV Production (W)')
plt.title('Average Daily PV Production Pattern')
plt.grid(True, alpha=0.3)
plt.xlim(0, 23)
plt.show()

## 6. Weather Correlation Analysis

In [None]:
# Merge PV and weather data
# This is a placeholder - actual implementation would depend on data structure
merged_data = pd.merge_asof(
    pv_data.sort_values('_time'),
    weather_data.sort_values('_time'),
    on='_time',
    direction='nearest',
    tolerance=pd.Timedelta('5min')
)

# Create correlation matrix
correlation_cols = ['pv_production', 'solar_radiation', 'cloud_cover', 'temperature']
# Note: Column names would need to be adjusted based on actual data structure

# Placeholder for correlation heatmap
plt.figure(figsize=(10, 8))
# sns.heatmap(merged_data[correlation_cols].corr(), annot=True, cmap='coolwarm', center=0)
plt.title('Correlation between PV Production and Weather Parameters')
plt.tight_layout()
plt.show()

## 7. Production Efficiency Analysis

In [None]:
# Calculate system efficiency
# Placeholder for efficiency calculations

# Example: Calculate daily peak production
daily_peak = pv_data.groupby('date')['_value'].max()
daily_total = pv_data.groupby('date')['_value'].sum() * 5 / 60  # Convert to kWh

# Plot daily production trends
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10))

# Daily peak production
ax1.plot(daily_peak.index, daily_peak.values, 'g-', label='Daily Peak')
ax1.set_ylabel('Peak Production (W)')
ax1.set_title('Daily Peak PV Production')
ax1.grid(True, alpha=0.3)
ax1.legend()

# Daily total production
ax2.bar(daily_total.index, daily_total.values, color='orange', alpha=0.7)
ax2.set_ylabel('Total Production (kWh)')
ax2.set_xlabel('Date')
ax2.set_title('Daily Total PV Production')
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 8. Production Forecasting Model

In [None]:
# Simple linear regression model for PV production based on solar radiation
# This is a placeholder - actual implementation would use merged data

# Example model structure
# X = merged_data[['solar_radiation', 'temperature', 'cloud_cover']].values
# y = merged_data['pv_production'].values

# model = LinearRegression()
# model.fit(X, y)
# predictions = model.predict(X)

# Plot actual vs predicted
plt.figure(figsize=(10, 8))
# plt.scatter(y, predictions, alpha=0.5)
# plt.plot([y.min(), y.max()], [y.min(), y.max()], 'r--', lw=2)
plt.xlabel('Actual Production (W)')
plt.ylabel('Predicted Production (W)')
plt.title('PV Production: Actual vs Predicted')
plt.grid(True, alpha=0.3)
plt.show()

## 9. Seasonal Analysis

In [None]:
# Analyze seasonal variations
# Add month column
pv_data['month'] = pd.to_datetime(pv_data['_time']).dt.month
pv_data['month_name'] = pd.to_datetime(pv_data['_time']).dt.strftime('%B')

# Monthly production boxplot
plt.figure(figsize=(14, 8))
# sns.boxplot(data=pv_data, x='month_name', y='_value')
plt.xlabel('Month')
plt.ylabel('PV Production (W)')
plt.title('Monthly PV Production Distribution')
plt.xticks(rotation=45)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 10. Key Findings and Recommendations

### Summary Statistics

In [None]:
# Calculate summary statistics
print("PV Production Summary:")
print(f"Average daily production: {daily_total.mean():.2f} kWh")
print(f"Peak daily production: {daily_total.max():.2f} kWh")
print(f"Minimum daily production: {daily_total.min():.2f} kWh")
print(f"Standard deviation: {daily_total.std():.2f} kWh")
print(f"\nPeak power recorded: {pv_data['_value'].max():.0f} W")
print(f"Average peak hour: {hourly_avg.idxmax()}:00")

### Insights

1. **Daily Pattern**: Document peak production hours and typical daily curve
2. **Weather Impact**: Quantify the correlation between weather parameters and production
3. **Seasonal Variation**: Identify months with highest/lowest production
4. **System Efficiency**: Track any degradation or performance issues
5. **Forecasting Accuracy**: Evaluate the prediction model performance

### Recommendations

- Optimize battery charging schedules based on production patterns
- Plan maintenance during low production periods
- Adjust consumption patterns to match production peaks
- Consider weather forecasts for energy management decisions