# Phase 3

This phase is to create a dataset that shows the correlation of the adoption rate of solar panels and the energy price trend by canton on a yearly basis from 2021 to 2024.
I will be using the multiple linear regression model to interpret the relationship between the adoption rate of solar panels and the energy price trend by canton on a yearly basis from 2021 to 2024.


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import LeaveOneOut, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import plotly.express as px
import plotly.graph_objects as go

In [3]:
# Load the datasets
energy_prices_df = pd.read_csv('../data/processed/canton_energy_prices_2021_2024.csv')
solar_adoption_df = pd.read_csv('../data/processed/canton_solar_adoption_2021_2024.csv')

# Merge datasets
combined_df = pd.merge(
    energy_prices_df, 
    solar_adoption_df, 
    on=['Canton', 'Year']
)

# Calculate adoption rate (year-over-year growth)
combined_df['adoption_rate'] = combined_df.groupby('Canton')['solar_power_installed_kwp'].pct_change() * 100

# Calculate price changes
combined_df['price_change'] = combined_df.groupby('Canton')['total_price'].pct_change() * 100
combined_df['price_before_surcharge_change'] = combined_df.groupby('Canton')['price_before_surcharge'].pct_change() * 100

# Drop rows with NaN values (first year for each canton)
combined_df = combined_df.dropna()

# Save the combined dataset
combined_df.to_csv('../data/processed/canton_combined_2021_2024.csv', index=False)

In [5]:
# Calculate correlation matrix
correlation_matrix = combined_df[[
    # 'total_price',
    # 'price_before_surcharge',
    # 'renewable_surcharge',
    # 'solar_power_installed_kwp',
    'price_change',
    'adoption_rate'
]].corr()

# Create correlation heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0)
plt.title('Correlation Matrix of Key Variables')
plt.tight_layout()
plt.savefig('../visualisations/correlation_heatmap.png')
plt.close()

In [7]:
# Create additional features
combined_df['surcharge_impact'] = (combined_df['total_price'] - 
                                 combined_df['price_before_surcharge']) / combined_df['price_before_surcharge']

# Normalize installations by previous year's base
combined_df['relative_growth'] = combined_df['adoption_rate'] / combined_df.groupby('Canton')['solar_power_installed_kwp'].shift(1)

# Create categorical features
combined_df['price_category'] = pd.qcut(combined_df['total_price'], q=3, labels=['Low', 'Medium', 'High'])

This sets up our foundation for analysis. Would you like me to proceed with the modeling steps next? We'll use these prepared features to:
- Test H1 (higher prices → more adoption)
- Test H2 (lower renewable tariffs → higher installation)
- Analyze canton-specific patterns

In [11]:
# Create correlation analysis function
def analyze_correlations(df):
    # Select relevant features
    features = [
        'total_price', 'price_before_surcharge', 'renewable_surcharge',
        'solar_power_installed_kwp', 'adoption_rate', 'price_change'
    ]
    
    # Calculate correlation matrix
    corr_matrix = df[features].corr()
    
    # Create heatmap
    plt.figure(figsize=(12, 8))
    sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0)
    plt.title('Correlation Matrix: Energy Prices vs Solar Adoption')
    plt.tight_layout()
    plt.savefig('../visualisations/correlation_matrix.png')
    plt.close()
    
    return corr_matrix

# Create scatter plot function
def plot_price_adoption_relationship(df):
    fig = px.scatter(df, 
                    x='total_price', 
                    y='adoption_rate',
                    color='Canton',
                    size='solar_power_installed_kwp',
                    title='Energy Price vs Solar Adoption Rate by Canton')
    
    fig.update_layout(
        xaxis_title="Total Energy Price (CHF/kWh)",
        yaxis_title="Solar Adoption Rate (%)",
        width=1200,
        height=800
    )
    
    fig.write_html('../visualisations/price_adoption_scatter.html')

In [8]:
def build_regression_model(df):
    # Prepare features and target
    X = df[['price_change']]
    y = df['adoption_rate']
    
    # Scale features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    # Initialize model
    model = LinearRegression()
    
    # Simple train-test split instead of LOOCV
    model.fit(X_scaled, y)
    y_pred = model.predict(X_scaled)
    
    # Calculate R-squared
    r2 = r2_score(y, y_pred)
    
    # Get feature importance
    feature_importance = pd.DataFrame({
        'Feature': ['Price Change'],
        'Coefficient': model.coef_
    })
    
    return model, r2, feature_importance, scaler

# Analyze canton patterns
def analyze_canton_patterns(df, model, scaler):
    canton_results = []
    
    for canton in df['Canton'].unique():
        canton_data = df[df['Canton'] == canton]
        
        # Calculate mean values and trends
        result = {
            'Canton': canton,
            # 'Mean_Price': canton_data['total_price'].mean(),
            'Price_Trend': canton_data['price_change'].mean(),
            'Adoption_Rate': canton_data['adoption_rate'].mean(),
            # 'Total_Installation': canton_data['solar_power_installed_kwp'].max()
        }
        canton_results.append(result)
    
    return pd.DataFrame(canton_results)


model, r2_score, feature_importance, scaler = build_regression_model(combined_df)
canton_patterns = analyze_canton_patterns(combined_df, model, scaler)


Unnamed: 0,Canton,Price_Trend,Adoption_Rate
0,Aargau,23.689863,30.155857
1,Appenzell Ausserrhoden,19.611867,32.99529
2,Appenzell Innerrhoden,19.718383,23.863373
3,Basel Landschaft,17.952067,22.257712
4,Basel Stadt,7.474562,15.909558
5,Bern,7.865421,22.636295
6,Fribourg,17.403996,21.541572
7,Geneva,14.201396,20.901161
8,Glarus,17.62454,41.772646
9,Grisons,14.462853,20.949355


In [9]:
def visualize_results(df, canton_patterns):
    # Create canton comparison plot
    fig = go.Figure()
    
    fig.add_trace(go.Bar(
        name='Actual Adoption',
        x=canton_patterns['Canton'],
        y=canton_patterns['Actual_Mean_Adoption']
    ))
    
    fig.add_trace(go.Bar(
        name='Predicted Adoption',
        x=canton_patterns['Canton'],
        y=canton_patterns['Predicted_Mean_Adoption']
    ))
    
    fig.update_layout(
        title='Actual vs Predicted Solar Adoption by Canton',
        barmode='group',
        width=1200,
        height=800
    )
    
    fig.write_html('../visualisations/adoption_comparison.html')

    fig, ax = plt.subplots(figsize=(15, 10))

    x = range(len(canton_patterns['Canton']))
    width = 0.35

    ax.bar([i - width/2 for i in x], canton_patterns['Actual_Mean_Adoption'], width, label='Actual Adoption')
    ax.bar([i + width/2 for i in x], canton_patterns['Predicted_Mean_Adoption'], width, label='Predicted Adoption')

    ax.set_xlabel('Canton')
    ax.set_ylabel('Adoption')
    ax.set_title('Actual vs Predicted Solar Adoption by Canton')
    ax.set_xticks(x)
    ax.set_xticklabels(canton_patterns['Canton'], rotation=45, ha='right')
    ax.legend()

    plt.tight_layout()
    plt.savefig('../visualisations/adoption_comparison_matplotlib.png')
    plt.close()

In [12]:
# Execute analysis pipeline
corr_matrix = analyze_correlations(combined_df)
plot_price_adoption_relationship(combined_df)

# Build and evaluate model
model, r2_score, feature_importance, scaler = build_regression_model(combined_df)
canton_patterns = analyze_canton_patterns(combined_df, model, scaler)

# Print summary statistics
print("\nModel Performance:")
print(f"R² Score: {r2_score:.3f}")
print("\nFeature Importance:")
print(feature_importance)

# Save results
canton_patterns.to_csv('../data/processed/canton_analysis_results.csv', index=False)

TypeError: 'float' object is not callable

### This implementation follows the structure outlined in:

## Research Questions

1. How do energy tariffs correlate with renewable energy adoption across Swiss cantons?
2. What impact do canton-specific tariff incentives have on solar panel installations?

## Hypotheses

- H1: Higher energy prices correlate with increased solar panel adoption
- H2: Cantons with lower renewable energy tariffs show higher installation rates

### And addressees the requirements of Phase 3:
- For phase 3:
    - produce a dataset that shows the correlation of the adoption rate of solar panels and the energy price tariff trend by canton on a yearly basis from march 2021 to august 2024
    - produce a dataset that shows the correlation of the adoption rate of solar panels and the energy price trend before and after renewable surcharge(incentivized) was implemented by canton on a monthly basis from march 2021 to august 2024
    - Implement logistic regression to interpret the relationship between the adoption rate of solar panels and the energy price trend by canton on a monthly basis from march 2021 to august 2024
    - Implement logistic regression to interpret the relationship between the adoption rate of solar panels and the energy price trend before and after renewable surcharge(incentivized) was implemented by canton on a monthly basis from march 2021 to august 2024
    - visualize the above as the result of phase 3, to show the adoption rate of each canton.