In [1]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join('..')))

from dotenv import load_dotenv

import pandas as pd
import requests
from pandas.tseries.holiday import USFederalHolidayCalendar
from src.data_processing import get_weather, get_respondent_data, merge_datasets

In [None]:
load_dotenv("../.env") 
api_key_weather = os.getenv("API_KEY_WEATHER") # Weather API key from visualcrossing.com
api_key_energy_consumption = os.getenv("API_KEY_ENERGY_CONSUMPTION") # Energy consumption API key from US EIA


In [3]:
location = "Texas"
respondent = "ERCO"
start_date = "2023-07-01"
end_date = "2023-12-31"

weather_file = "../data/temperature_data_ercot_2023.csv"
energy_consumption_file = "../data/ercot_energy_2023.csv"
merged_data_file = "../data/merged_data_ercot_2023.csv"

In [9]:
# Get energy consumption data
if os.path.exists(energy_consumption_file):
    print("Energy consumption data file already exists. Loading from file...")
    energy_consumption_df = pd.read_csv(energy_consumption_file)
    energy_consumption_df['period'] = pd.to_datetime(energy_consumption_df['period'])
else:
    print("Energy consumption data file not found. Fetching data...")
    energy_consumption_df = get_respondent_data(respondent, api_key_energy_consumption, start_date, end_date)
    energy_consumption_df.to_csv(energy_consumption_file, index=False)
    print("Energy consumption data fetched and saved to 'data/ercot_energy_2023.csv' file.")


Energy consumption data file already exists. Loading from file...


In [11]:
# Get weather data
if os.path.exists(weather_file):
    print(f"Loading existing weather data from {weather_file}")
    weather_data = pd.read_csv(weather_file)
    weather_data["period"] = pd.to_datetime(weather_data["period"])
else:
    print("Fetching weather data (API call)...")
    weather_data = get_weather(api_key_weather, location,start_date, end_date)
    weather_data.to_csv(weather_file, index=False)
    print("Temperature data fetched and saved to 'data/temperature_data_ercot_2023.csv'")

Loading existing weather data from ../data/temperature_data_ercot_2023.csv


In [12]:
merged_data = merge_datasets(energy_consumption_df, weather_data,start_date, end_date)
merged_data.to_csv(merged_data_file, index=True, header=True)
print("Final dataset saved to 'data/merged_data_ercot_2023.csv' file")

Final dataset saved to 'data/merged_data_ercot_2023.csv' file
