## Data Integration
- Combine the weather data with your carbon intensity dataset to analyze how weather conditions influence carbon intensity

In [2]:
import pandas as pd
import sqlite3

# Load weather data from the view
def load_weather_data():
    conn = sqlite3.connect('data/synop/synop.db')
    weather_data = pd.read_sql_query("SELECT * FROM WeatherCombined", conn)
    conn.close()
    return weather_data

# Load carbon intensity data
def load_carbon_data(file_path):
    carbon_data = pd.read_csv(file_path)
    return carbon_data

# Combine the datasets
def combine_data(weather_data, carbon_data):
    combined_data = pd.merge(carbon_data, weather_data, how='left', left_on=['Datetime (UTC)'], right_on=['Date'])
    return combined_data

# Example usage
weather_data = load_weather_data()
carbon_data = load_carbon_data('data/electricity/PL_2022_daily.csv')
combined_data = combine_data(weather_data, carbon_data)

## Data Preparation

In [3]:
# Clean and prepare data
combined_data.fillna(method='ffill', inplace=True)  # Example: forward fill for missing values

  combined_data.fillna(method='ffill', inplace=True)  # Example: forward fill for missing values


## Model Selection

In [4]:
from statsmodels.tsa.arima.model import ARIMA

# Example: ARIMA model
model = ARIMA(combined_data['Carbon Intensity gCO₂eq/kWh (direct)'], order=(1,1,1))
model_fit = model.fit()

## Model Training and Evaluation

In [5]:
# Predict and evaluate
predictions = model_fit.forecast(steps=30)  # Forecast next 30 days

## Forecasting

In [6]:
print(predictions)


365    628.685320
366    689.272457
367    730.663763
368    758.941055
369    778.259248
370    791.456856
371    800.473063
372    806.632665
373    810.840718
374    813.715534
375    815.679521
376    817.021258
377    817.937892
378    818.564109
379    818.991922
380    819.284190
381    819.483859
382    819.620267
383    819.713457
384    819.777122
385    819.820615
386    819.850329
387    819.870628
388    819.884496
389    819.893970
390    819.900443
391    819.904864
392    819.907885
393    819.909949
394    819.911359
Name: predicted_mean, dtype: float64


# Forecast based on weather info
- forecast carbon intensity for december 2022

## 1. Data Integration and Preparation

In [8]:
import pandas as pd
import sqlite3

# Establishing a connection to the database
conn = sqlite3.connect('data/synop/synop.db')

# Loading combined data (assuming carbon intensity data has been integrated into the WeatherCombined view)
query = '''
SELECT * FROM WeatherCombined WHERE Date BETWEEN '2022-01-01' AND '2022-11-30'
'''
data = pd.read_sql_query(query, conn)

# Close the connection
conn.close()

# Assume 'data' DataFrame has all the necessary columns, including carbon intensity
# Example columns: 'Date', 'Max_Daily_Temperature', 'Avg_Daily_Wind_Speed', 'Carbon_Intensity_direct'

## 2. Feature Engineering


In [9]:
# Adding temporal features
data['Month'] = pd.to_datetime(data['Date']).dt.month
data['Day'] = pd.to_datetime(data['Date']).dt.day

# Optionally add rolling averages or other statistical transforms
data['Temp_Rolling_Mean'] = data['Max_Daily_Temperature'].rolling(window=7).mean()


## 3. Model Selection


In [17]:
data.head(15)

Unnamed: 0,Station_ID,Date,Max_Daily_Temperature,Min_Daily_Temperature,Avg_Daily_Temperature,Total_Daily_Precipitation,Sunshine_Hours,Actinometry,Avg_Daily_Overall_Cloudiness,Wind_Duration_Over_10m_s,Avg_Daily_Wind_Speed,Month,Day,Temp_Rolling_Mean
0,349220690,2022-01-01,8.9,4.1,6.9,11.9,0.0,0.0,0.0,0.0,2.8,1,1,
1,349220690,2022-01-02,8.7,3.5,6.1,0.0,2.8,0.0,0.0,0.0,2.9,1,2,
2,349220690,2022-01-03,8.1,4.7,6.5,5.8,1.1,0.0,0.0,0.0,4.6,1,3,
3,349220690,2022-01-04,7.5,3.8,6.2,0.0,0.0,0.0,0.0,0.0,5.3,1,4,
4,349220690,2022-01-05,8.7,3.4,5.4,9.9,0.0,0.0,0.0,0.0,3.1,1,5,
5,349220690,2022-01-06,4.5,-2.5,-0.1,1.3,0.3,0.0,0.0,0.0,3.6,1,6,
6,349220690,2022-01-07,0.7,-5.9,-4.1,0.0,7.5,0.0,0.0,0.0,2.1,1,7,6.728571
7,349220690,2022-01-08,1.9,-9.7,-3.6,0.8,4.4,0.0,0.0,0.0,1.8,1,8,5.728571
8,349220690,2022-01-09,1.6,-4.7,-2.9,0.0,7.1,0.0,0.0,0.0,1.3,1,9,4.714286
9,349220690,2022-01-10,-3.4,-9.7,-6.4,0.1,0.0,0.0,0.0,0.0,2.8,1,10,3.071429


In [16]:

%pip install xgboost
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Prepare data for modeling
X = data[['Max_Daily_Temperature', 'Avg_Daily_Wind_Speed', 'Month', 'Day', 'Temp_Rolling_Mean']]
y = data['Carbon Intensity gCO₂eq/kWh (direct)']


# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model training
model = XGBRegressor(n_estimators=100)
model.fit(X_train, y_train)

# Prediction and evaluation
predictions = model.predict(X_test)
mse = mean_squared_error(y_test, predictions)
print("MSE: ", mse)


Note: you may need to restart the kernel to use updated packages.


KeyError: 'Carbon Intensity gCO₂eq/kWh (direct)'

## 4. Forecasting Future Carbon Intensity


In [None]:
# Assume you have weather forecast data in 'future_weather_data'
future_X = future_weather_data[['Max_Daily_Temperature', 'Avg_Daily_Wind_Speed', 'Month', 'Day', 'Temp_Rolling_Mean']]
future_predictions = model.predict(future_X)
print(future_predictions)
