#### Library

In [3]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

#### Capacity

In [16]:
# Load the JSON data from the specified file
input_file = '/home/kevin/Downloads/BESS/data/energy_generation_20240601_20240630.json'
df = pd.read_json(input_file)

# Initialize an empty DataFrame to store the output
out = pd.DataFrame()

# Flatten the JSON data using the 'return' column
flattened_data = pd.json_normalize(df['return'])

# Function to process data for a given fuel type
def process_fuel_type(fuel_type):
    fuel_data = pd.json_normalize(flattened_data[flattened_data.fuel_type == fuel_type]['Hours']).T
    fuel_data.rename(columns={0: 'energy'}, inplace=True)
    if 'begin_datetime_mpt' not in out.columns:
        out['begin_datetime_mpt'] = fuel_data['energy'].apply(lambda x: x['begin_datetime_mpt'])
    out[f'AC_{fuel_type}'] = fuel_data['energy'].apply(lambda x: x['outage_grouping.AC'])

# Process data for WIND, SOLAR, and HYDRO fuel types
for fuel in ['WIND', 'SOLAR', 'HYDRO']:
    process_fuel_type(fuel)

# Print the resulting DataFrame
# print(out.head())
out.to_csv('/home/kevin/Downloads/BESS/data/data_trial/AC.csv')

#### Generation

In [23]:
d1 = pd.read_csv('/home/kevin/Downloads/BESS/data/data_trial/CSD Generation (Hourly)/CSD Generation (Hourly) - 2024-01 to 2024-06.csv')
d2 = pd.read_csv('/home/kevin/Downloads/BESS/data/data_trial/CSD Generation (Hourly)/CSD Generation (Hourly) - 2024-07.csv')
d3 = pd.read_csv('/home/kevin/Downloads/BESS/data/data_trial/CSD Generation (Hourly)/CSD Generation (Hourly) - 2024-08.csv')
d4 = pd.read_csv('/home/kevin/Downloads/BESS/data/data_trial/CSD Generation (Hourly)/CSD Generation (Hourly) - 2024-09.csv')
d5 = pd.read_csv('/home/kevin/Downloads/BESS/data/data_trial/CSD Generation (Hourly)/CSD Generation (Hourly) - 2024-10.csv')
d6 = pd.read_csv('/home/kevin/Downloads/BESS/data/data_trial/CSD Generation (Hourly)/CSD Generation (Hourly) - 2024-11.csv')
d7 = pd.read_csv('/home/kevin/Downloads/BESS/data/data_trial/CSD Generation (Hourly)/CSD Generation (Hourly) - 2024-12.csv')

In [24]:
print(d1.shape)
print(d2.shape)
print(d3.shape)
print(d4.shape)
print(d5.shape)
print(d6.shape)
print(d7.shape)

(916350, 12)
(156720, 12)
(157726, 12)
(153960, 12)
(161280, 12)
(157800, 12)
(164472, 12)


In [27]:
df = pd.concat([d1, d2, d3, d4, d5, d6, d7], axis=0)
wind_generation_2024 = df[(df['Fuel Type'] == 'WIND')]
solar_generation_2024 = df[(df['Fuel Type'] == 'SOLAR')]

In [28]:
wind_generation_2024 = wind_generation_2024.groupby('Date (MPT)')['Volume'].sum().reset_index()
wind_generation_2024.sort_values(by='Date (MPT)', inplace=True)
wind_generation_2024.to_csv('/home/kevin/Downloads/BESS/data/raw/wind_generation_2024.csv')

In [29]:
solar_generation_2024 = solar_generation_2024.groupby('Date (MPT)')['Volume'].sum().reset_index()
solar_generation_2024.sort_values(by='Date (MPT)', inplace=True)
solar_generation_2024.to_csv('/home/kevin/Downloads/BESS/data/raw/solar_generation_2024.csv')

#### AIL

In [16]:
# Load the JSON data from the specified file
input_file = '/home/kevin/Downloads/BESS/data/data_June2024_Dec2024/AIL_01062024_31122024.json'
df = pd.read_json(input_file)

# Initialize an empty DataFrame to store the output
out = pd.DataFrame()

# Flatten the JSON data using the 'return' column
flattened_data = pd.json_normalize(df['return'])
flattened_data = flattened_data.T
out['begin_datetime_mpt'] = flattened_data[0].apply(lambda x: x['begin_datetime_mpt'])
out['alberta_internal_load'] = flattened_data[0].apply(lambda x: x['alberta_internal_load'])
out['forecast_alberta_internal_load'] = flattened_data[0].apply(lambda x: x['forecast_alberta_internal_load'])

out.to_csv('/home/kevin/Downloads/BESS/data/data_June2024_Dec2024/AIL_01062024_31122024.csv')

#### Price

In [21]:
# Load the JSON data from the specified file
input_file = '/home/kevin/Downloads/BESS/data/data_June2024_Dec2024/price_01062024_31122024.json'
df = pd.read_json(input_file)

# Initialize an empty DataFrame to store the output
out = pd.DataFrame()

# Flatten the JSON data using the 'return' column
flattened_data = pd.json_normalize(df['return'])
flattened_data = flattened_data.T
out['begin_datetime_mpt'] = flattened_data[0].apply(lambda x: x['begin_datetime_mpt'])
out['pool_price'] = flattened_data[0].apply(lambda x: x['pool_price'])
out['forecast_pool_price'] = flattened_data[0].apply(lambda x: x['forecast_pool_price'])
out['rolling_30day_avg'] = flattened_data[0].apply(lambda x: x['rolling_30day_avg'])

out.to_csv('/home/kevin/Downloads/BESS/data/data_June2024_Dec2024/price_01062024_31122024.csv')

#### weather: wind_speed_per_city

import requests
import pandas as pd
from datetime import datetime, timezone, timedelta

def get_hourly_windspeed(climate_id, start_date, end_date):
  url = "https://api.weather.gc.ca/collections/climate-hourly/items"
  all_temp_data = []
  limit = 10000
  offset = 0
  

  while True:
    params = {
      "CLIMATE_IDENTIFIER": climate_id,
      "datetime": f"{start_date}/{end_date}",
      "limit": limit,
      "offset": offset,
      "f": "json"
    }

    try:
        response = requests.get(url, params=params)
        response.raise_for_status()
        data = response.json()

        if not data['features']:
          break
        # for f in data['features']:
        #    print(f)
        for feature in data['features']:
            properties = feature['properties']
            all_temp_data.append({
                # 'Timestamp_utc': pd.to_datetime(properties['UTC_DATE'], utc=True),
                'Timestamp_mst': pd.to_datetime(properties['LOCAL_DATE']),
                'WIND_SPEED': properties.get('WIND_SPEED', None)
            })
          
        offset += limit

    except requests.exceptions.HTTPError as err:
        print(f"HTTP error occurred: {err}")
        break
    except Exception as err:
        print(f"Other error occurred: {err}")
        break
  temp_df = pd.DataFrame(all_temp_data)
#   print(temp_df.head())
  temp_df = temp_df.sort_values(by='Timestamp_mst')

  return temp_df
  
# Example
# CALGARY ID : 3031092
# EDMONTON ID: 3012205 or 3012206
# FORT MC ID: 3062696
climate_id = "3012206" 

start_date = "2024-06-01T00:00:00Z"

end_date = "2024-12-31T23:00:00Z"

df = get_hourly_windspeed(climate_id, start_date, end_date)
# print(df)
df.to_csv('/home/kevin/Downloads/BESS/data/data_June2024_Dec2024/windspeed_edmonton_01062024_31122024.csv')
