In [3]:
import pandas as pd
import os
from prophet import Prophet
import numpy as np
import pickle
import boto3
import io
from io import BytesIO

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
def load_data():
    # Read from an S3 bucket
    bucket_name = 'ecc-eia-data'
    key = 'raw_data_daily/'

    response = s3_client.list_objects_v2(Bucket=bucket_name,Prefix=key)
    
    # Extract the file names
    file_names = [os.path.basename(obj['Key']) for obj in response['Contents']]

    region_data = {}
    for filename in file_names:
        if filename.endswith('.csv'):
            # Extract the region name from the file name
            region = filename.split('.')[0]
            df = read_from_s3(bucket_name,key+f'{region}.csv')
            # Store the data in the dictionary with the region name as the key
            region_data[region] = df
            #write_to_s3(df,'models-prophet','temp/',f'{region}.csv')

    return region_data

def preprocess_data(data, region):
    # Filter data for the specified region
    region_data = data[region]

    # region_data['Date'] = pd.to_datetime(region_data['Date'], format='%Y-%m-%d')
    region_data['Timestamp'] = region_data['Date']

    # Prepare separate datasets for demand and net generation
    demand_data = region_data[['Timestamp', 'Demand']]
    demand_data.columns = ['ds', 'y']

    # Drop rows with missing values
    demand_data = demand_data.dropna()

    generation_data = region_data[['Timestamp', 'Net generation']]
    generation_data.columns = ['ds', 'y']

    # Drop rows with missing values
    generation_data = generation_data.dropna()

    return demand_data, generation_data


In [5]:
# Read function
def read_from_s3(bucket_name,key):
    response = s3_client.get_object(Bucket=bucket_name, Key=key)
    data = response['Body'].read()
    df = io.BytesIO(data)
    df = pd.read_csv(df)
    return df

def write_to_s3(df,bucket_name,key,filename):
    output_data = df.to_csv(index=False)
    # Convert the CSV data to bytes
    output_bytes = output_data.encode('utf-8')
    # Write the CSV data to the bucket
    s3_client.put_object(Body=output_bytes, Bucket=bucket_name, Key=key+filename)

In [42]:
def train_prophet_model(region, data_type, data):
    """
    Train a Prophet model for a given region and data type (demand or net generation).

    Args:
    region (str): The region code.
    data_type (str): 'Demand' or 'Net generation'.
    data (pd.DataFrame): The preprocessed data for the region, with columns 'ds' and 'y'.

    Returns:
    None
    """
    # Create and fit the Prophet model
    model = Prophet()
    model.fit(data)
    model_bytes  = pickle.dumps(model)

    # Specify the bucket name and object key
    bucket_name = 'models-prophet'
    object_key = f'temp/{region}_{data_type}_model.pkl'

    # Write the pickle file to S3 bucket
    s3_client.put_object(Body=model_bytes, Bucket=bucket_name, Key=object_key)

    # Print the confirmation message
    print(f"Prophet model for {region} {data_type} has been stored in S3 bucket.")



In [6]:
# Specify the access keys
access_key_id = 'AKIAZIMSUAOJMLAWL5SF'
secret_access_key = '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'

# Create an S3 client
s3_client = boto3.client('s3', aws_access_key_id=access_key_id, aws_secret_access_key=secret_access_key)

In [7]:
region_data= load_data()            
regions = list(region_data.keys())

In [40]:
# Preprocess data for each region and store it in a dictionary
preprocessed_data = {}
for region in regions:
    demand_data, generation_data = preprocess_data(region_data.copy(), region)
    preprocessed_data[region] = {'demand': demand_data, 'generation': generation_data}

In [43]:
data_types = ['demand', 'generation']

for region in regions:
    for data_type in data_types:
        region_preprocessed_data = preprocessed_data[region][data_type]
        train_prophet_model(region, data_type, region_preprocessed_data)

16:34:24 - cmdstanpy - INFO - Chain [1] start processing
16:34:24 - cmdstanpy - INFO - Chain [1] done processing
16:34:25 - cmdstanpy - INFO - Chain [1] start processing


Prophet model for CAL demand has been stored in S3 bucket.


16:34:25 - cmdstanpy - INFO - Chain [1] done processing
16:34:26 - cmdstanpy - INFO - Chain [1] start processing


Prophet model for CAL generation has been stored in S3 bucket.


16:34:26 - cmdstanpy - INFO - Chain [1] done processing
16:34:26 - cmdstanpy - INFO - Chain [1] start processing


Prophet model for CAR demand has been stored in S3 bucket.


16:34:26 - cmdstanpy - INFO - Chain [1] done processing
16:34:27 - cmdstanpy - INFO - Chain [1] start processing


Prophet model for CAR generation has been stored in S3 bucket.


16:34:27 - cmdstanpy - INFO - Chain [1] done processing
16:34:27 - cmdstanpy - INFO - Chain [1] start processing


Prophet model for CENT demand has been stored in S3 bucket.


16:34:28 - cmdstanpy - INFO - Chain [1] done processing
16:34:28 - cmdstanpy - INFO - Chain [1] start processing


Prophet model for CENT generation has been stored in S3 bucket.


16:34:28 - cmdstanpy - INFO - Chain [1] done processing
16:34:29 - cmdstanpy - INFO - Chain [1] start processing


Prophet model for FLA demand has been stored in S3 bucket.


16:34:29 - cmdstanpy - INFO - Chain [1] done processing
16:34:29 - cmdstanpy - INFO - Chain [1] start processing


Prophet model for FLA generation has been stored in S3 bucket.


16:34:30 - cmdstanpy - INFO - Chain [1] done processing
16:34:30 - cmdstanpy - INFO - Chain [1] start processing


Prophet model for MIDA demand has been stored in S3 bucket.


16:34:30 - cmdstanpy - INFO - Chain [1] done processing
16:34:30 - cmdstanpy - INFO - Chain [1] start processing


Prophet model for MIDA generation has been stored in S3 bucket.


16:34:31 - cmdstanpy - INFO - Chain [1] done processing
16:34:31 - cmdstanpy - INFO - Chain [1] start processing


Prophet model for MIDW demand has been stored in S3 bucket.


16:34:31 - cmdstanpy - INFO - Chain [1] done processing
16:34:32 - cmdstanpy - INFO - Chain [1] start processing


Prophet model for MIDW generation has been stored in S3 bucket.


16:34:32 - cmdstanpy - INFO - Chain [1] done processing
16:34:32 - cmdstanpy - INFO - Chain [1] start processing


Prophet model for NE demand has been stored in S3 bucket.


16:34:32 - cmdstanpy - INFO - Chain [1] done processing
16:34:33 - cmdstanpy - INFO - Chain [1] start processing


Prophet model for NE generation has been stored in S3 bucket.


16:34:33 - cmdstanpy - INFO - Chain [1] done processing
16:34:33 - cmdstanpy - INFO - Chain [1] start processing


Prophet model for NY demand has been stored in S3 bucket.


16:34:34 - cmdstanpy - INFO - Chain [1] done processing
16:34:34 - cmdstanpy - INFO - Chain [1] start processing


Prophet model for NY generation has been stored in S3 bucket.


16:34:34 - cmdstanpy - INFO - Chain [1] done processing
16:34:34 - cmdstanpy - INFO - Chain [1] start processing


Prophet model for SE demand has been stored in S3 bucket.


16:34:35 - cmdstanpy - INFO - Chain [1] done processing
16:34:35 - cmdstanpy - INFO - Chain [1] start processing


Prophet model for SE generation has been stored in S3 bucket.


16:34:35 - cmdstanpy - INFO - Chain [1] done processing
16:34:36 - cmdstanpy - INFO - Chain [1] start processing


Prophet model for SW demand has been stored in S3 bucket.


16:34:36 - cmdstanpy - INFO - Chain [1] done processing
16:34:37 - cmdstanpy - INFO - Chain [1] start processing


Prophet model for SW generation has been stored in S3 bucket.


16:34:37 - cmdstanpy - INFO - Chain [1] done processing
16:34:37 - cmdstanpy - INFO - Chain [1] start processing


Prophet model for TEX demand has been stored in S3 bucket.


16:34:37 - cmdstanpy - INFO - Chain [1] done processing


Prophet model for TEX generation has been stored in S3 bucket.
