# 前提
* sagemaker notebook でロールに AdministratorAccess ポリシーが付与されている

# 概要
* 電力の需要予測
* clientごと、時間 ( hour ) ごとにどれくらいの電力需要があるのかを予測する

# おすすめ
* ForecastのAPIをたたくごとに、GUIで結果を見ながら実施



In [None]:
# small csv ダウンロード
!if [ -e ./electricityusagedata_small.csv ]; then rm ./electricityusagedata_small.csv; fi
!if [ -e ./electricityusagedata_small.zip ]; then rm ./electricityusagedata_small.zip; fi
!if [ -e ./electricityusagedata.csv ]; then rm ./electricityusagedata.csv; fi

!wget https://samejima-handson.s3-ap-northeast-1.amazonaws.com/electricityusagedata_small.zip
!unzip electricityusagedata_small.zip
!mv electricityusagedata.csv electricityusagedata_small.csv

In [None]:
# normal csv ダウンロード
!if [ -e ./electricityusagedata.csv ]; then rm ./electricityusagedata.csv; fi
!if [ -e ./electricityusagedata.zip ]; then rm ./electricityusagedata.zip; fi

!wget https://samejima-handson.s3-ap-northeast-1.amazonaws.com/electricityusagedata.zip
!unzip electricityusagedata.zip

In [None]:
!pip install tzlocal

In [None]:
import sagemaker,boto3,datetime,tzlocal
import pandas as pd
from time import sleep
from IPython.core.display import display, HTML 
display(HTML("<style>.container { width:100% !important; }</style>")) 

In [None]:
# Forecast で使用する S3 の場所
s3_location = 's3://' + sagemaker.Session().default_bucket() + '/forecast'
print(s3_location)

In [None]:
s3_csv_location = sagemaker.s3.S3Uploader.upload('./electricityusagedata_small.csv',s3_location)
print(s3_csv_location)

In [None]:
# Forecast で使用する Role
ROLE_ARN = 'arn:aws:iam::<値をいれてください>'

In [None]:
forecast = boto3.client('forecast')

# データセットの作成

In [None]:
DATASET_NAME = 'target_time_series_electricity_usage_data_small'
response = forecast.create_dataset(
    DatasetName=DATASET_NAME,
    Domain='CUSTOM',
    DatasetType='TARGET_TIME_SERIES',
    DataFrequency='H',
    Schema={
        'Attributes': [
            {
                "AttributeName": "timestamp",
                "AttributeType": "timestamp"
            },{
                "AttributeName": "target_value",
                "AttributeType": "float"
            },{
                "AttributeName": "item_id",
                "AttributeType": "string"
            }
        ]
        
    }
)
DATASET_ARN = response['DatasetArn']
print(response)

In [None]:
forecast.describe_dataset(DatasetArn=DATASET_ARN)

# データセットグループの作成

In [None]:
DATASET_GROUP_NAME = 'electricity_usage_data_small'
response = forecast.create_dataset_group(
    DatasetGroupName=DATASET_GROUP_NAME,
    Domain='CUSTOM',
    DatasetArns=[
        DATASET_ARN,
    ]
)
DATASET_GROUP_ARN = response['DatasetGroupArn']
print(response)

In [None]:
forecast.describe_dataset_group(DatasetGroupArn=DATASET_GROUP_ARN)

# Import job の作成

In [None]:
%%time
# Import job の作成

DATASET_IMPORT_JOB_NAME = 'electricity_usage_data_import_job'
response = forecast.create_dataset_import_job(
    DatasetImportJobName=DATASET_IMPORT_JOB_NAME,
    DatasetArn=DATASET_ARN,
    DataSource={
        'S3Config': {
            'Path': s3_csv_location,
            'RoleArn': ROLE_ARN
        }
    }
)
DATASET_IMPORT_JOB_ARN = response['DatasetImportJobArn']
print(response)
while True:
    response = forecast.describe_dataset_import_job(DatasetImportJobArn=DATASET_IMPORT_JOB_ARN)
    if response['Status']=='ACTIVE':
        print('!')
        print('import job done.')
        print(response)
        break
    else:
        sleep(1)
        print('.',end='')
start_time = response['CreationTime']
end_time = response['LastModificationTime']
total_time = end_time - start_time
print(total_time)

# Predictor の作成

In [None]:
sleep(5)
# 予測子を作成

PREDICTOR_NAME='electricity_usage_data_predictor'
response = forecast.create_predictor(
    PredictorName=PREDICTOR_NAME,
    ForecastHorizon=20,
    PerformAutoML=True,
    PerformHPO=False,
    InputDataConfig={
        'DatasetGroupArn': DATASET_GROUP_ARN,
    },
    FeaturizationConfig={
        "ForecastFrequency": "H"
    },
)
PREDICTOR_ARN = response['PredictorArn']
print(response)
while True:
    response = forecast.describe_predictor(PredictorArn=PREDICTOR_ARN)
    if response['Status']=='ACTIVE':
        print('!')
        print('make predictor done.')
        print(response)
        break
    else:
        print('.',end='')
        sleep(1)
start_time = response['CreationTime']
end_time = response['LastModificationTime']
total_time = end_time - start_time
print(total_time)

# 予測の作成

In [None]:
sleep(5)
FORECAST_NAME='electricity_usage_data_forecast'
response = forecast.create_forecast(
    ForecastName=FORECAST_NAME,
    PredictorArn=PREDICTOR_ARN
)
FORECAST_ARN = response['ForecastArn']
print(response)
while True:
    response = forecast.describe_forecast(ForecastArn=FORECAST_ARN)
    if response['Status']=='ACTIVE':
        print('!')
        print('make forecast done')
        break
    else:
        print('.',end='')
        sleep(1)
start_time = response['CreationTime']
end_time = response['LastModificationTime']
total_time = end_time - start_time
print(total_time)

In [None]:
forecast_query = boto3.client('forecastquery')

In [None]:
forecast_query.query_forecast(
    ForecastArn=FORECAST_ARN,
    StartDate='2015-01-01T02:00:00',
    EndDate='2015-01-01T03:00:00',
    Filters={
        'item_id': 'client_1'
    }
)

In [None]:
FORECAST_EXPORT_JOB_NAME = 'small_data_export_job'
response = forecast.create_forecast_export_job(
    ForecastExportJobName=FORECAST_EXPORT_JOB_NAME,
    ForecastArn=FORECAST_ARN,
    Destination={
        'S3Config': {
            'Path': s3_location + '/predict_result/',
            'RoleArn': 'arn:aws:iam::155580384669:role/service-role/AmazonForecast-ExecutionRole-1587637446878'
        }
    }
)
FORECAST_EXPORT_JOB_ARN = response['ForecastExportJobArn']
print(response)
while True:
    response = forecast.describe_forecast_export_job(ForecastExportJobArn = FORECAST_EXPORT_JOB_ARN)
    if response['Status'] == 'ACTIVE':
        print('')
        print('export job done')
        print(response)
        break
    else:
        print('.',end='')
        sleep(1)
start_time = response['CreationTime']
end_time = response['LastModificationTime']
total_time = end_time - start_time
print(total_time)

# お片付け

In [None]:
forecast.delete_forecast_export_job(ForecastExportJobArn=FORECAST_EXPORT_JOB_ARN)

In [None]:
# Forecast を削除
print('Started to delete Forecast')
forecast.delete_forecast(ForecastArn=FORECAST_ARN)
while True:
    try:
        forecast.describe_forecast(ForecastArn=FORECAST_ARN)
        print('.',end='.')
        sleep(1)
    except:
        print('!')
        print('Finished to delete Forecast')
        break

In [None]:
# Predictor を削除
print('Started to delete Predictor')
forecast.delete_predictor(PredictorArn=PREDICTOR_ARN)
while True:
    try:
        print('.',end='.')
        forecast.describe_predictor(PredictorArn=PREDICTOR_ARN)
        sleep(1)
    except:
        print('!')
        print('Finished to delete Predictor')
        break

In [None]:
%%time
# Dataset(TARGET_TIME_SERIES) を削除
print('Started to delete TARTGET_TIME_SERIES dataset import job')
forecast.delete_dataset_import_job(DatasetImportJobArn=DATASET_IMPORT_JOB_ARN)
while True:
    try:
        print('.',end='.')
        forecast.describe_dataset_import_job(DatasetImportJobArn=DATASET_IMPORT_JOB_ARN)
        sleep(1)
    except:
        print('!')
        print('Finished to delete TARTGET_TIME_SERIES dataset import job')
        break

In [None]:
%%time
# Dataset(TARGET_TIME_SERIES) を削除
print('Started to delete TARTGET_TIME_SERIES dataset')
forecast.delete_dataset(DatasetArn=DATASET_ARN)
while True:
    try:
        print('.',end='.')
        forecast.describe_dataset(DatasetArn=DATASET_ARN)
        sleep(1)
    except:
        print('!')
        print('Finished to delete TARTGET_TIME_SERIES dataset')
        break

In [None]:
%%time
# Dataset(TARGET_TIME_SERIES) を削除
print('Started to delete dataset group')
forecast.delete_dataset_group(DatasetGroupArn=DATASET_GROUP_ARN)
while True:
    try:
        print('.',end='.')
        forecast.describe_dataset_group(DatasetGroupArn=DATASET_GROUP_ARN)
        sleep(1)
    except:
        print('!')
        print('Finished to delete dataset group')
        break