### 建立 S3

In [2]:
import logging
import boto3
from botocore.exceptions import ClientError

def create_bucket(bucket_name, region=None):

    # Create bucket
    try:
        if region is None:
            s3_client = boto3.client('s3')
            s3_client.create_bucket(Bucket=bucket_name)
        else:
            s3_client = boto3.client('s3', region_name=region)
            location = {'LocationConstraint': region}
            s3_client.create_bucket(Bucket=bucket_name,
                                    CreateBucketConfiguration=location)
    except ClientError as e:
        logging.error(e)
        return False
    return True

create_bucket('my-bucket-01')

ERROR:root:An error occurred (IllegalLocationConstraintException) when calling the CreateBucket operation: The unspecified location constraint is incompatible for the region specific endpoint this request was sent to.


False

### 查詢現有儲存桶的列表

In [7]:
s3 = boto3.client('s3')
response = s3.list_buckets()

# Output the bucket names
print('Existing buckets:')
for bucket in response['Buckets']:
    print(f'  {bucket["Name"]}')

Existing buckets:
  my-bucket-623801
  my-bucket-623802
  mybucket-0721


### forecast 專案

In [8]:
import boto3
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import json
import time

# 初始化 Amazon S3 客戶端
# 指定區域
region = 'us-east-1'
s3 = boto3.client('s3', region_name=region)

# 建立 S3 Bucket（如果尚未存在）
# 務必確保名稱唯一
bucket_name = 'my-bucket-623803'


def create_bucket(bucket_name, region):
    try:
        if region == 'us-east-1':
            s3.create_bucket(Bucket=bucket_name)
        else:
            s3.create_bucket(
                Bucket=bucket_name,
                CreateBucketConfiguration={'LocationConstraint': region}
            )
        print(f"S3 Bucket '{bucket_name}' 建立成功")
    except s3.exceptions.BucketAlreadyOwnedByYou:
        print(f"S3 Bucket '{bucket_name}' 已經存在")
    except s3.exceptions.ClientError as e:
        if e.response['Error']['Code'] == 'BucketAlreadyExists':
            print(f"S3 Bucket '{bucket_name}' 已經存在於其他帳戶")
        else:
            raise e

create_bucket(bucket_name, region)

S3 Bucket 'my-bucket-623803' 建立成功


### 模擬數據

In [9]:
# 模擬數據生成函數
def generate_time_series_data(start_date, end_date, freq='D'):
    # 生成日期範圍
    dates = pd.date_range(start_date, end_date, freq=freq)
    # 生成隨機數據
    data = np.random.randint(1, 100, len(dates))
    # 使用相同的 item_id
    item_ids = ['item_1'] * len(dates)
    return pd.DataFrame({'timestamp': dates, 'value': data, 'item_id': item_ids})

# 開始日期
start_date = '2024-01-01'
# 結束日期，縮短時間範圍以減少數據量
end_date = '2024-01-10'
time_series_data = generate_time_series_data(start_date, end_date)

# 將數據保存到本地文件
time_series_data.to_csv('time_series_data.csv', index=False)

### 將數據上傳到 S3

In [10]:
s3_key = 'time_series_data.csv'
s3.upload_file('time_series_data.csv', bucket_name, s3_key)

### 初始化 Forecast 客戶端

In [11]:
forecast = boto3.client('forecast')

# 建立數據集
dataset_name = 'my_dataset'
dataset_group_name = 'my_dataset_group'
schema = {
    "Attributes": [
        {"AttributeName": "timestamp", "AttributeType": "timestamp"},
        {"AttributeName": "value", "AttributeType": "integer"},
        {"AttributeName": "item_id", "AttributeType": "string"}
    ]
}

create_dataset_response = forecast.create_dataset(
    DatasetName=dataset_name,
    Domain='CUSTOM',
    DatasetType='TARGET_TIME_SERIES',
    DataFrequency='D',
    Schema=schema
)
dataset_arn = create_dataset_response['DatasetArn']

# 建立數據集群組
create_dataset_group_response = forecast.create_dataset_group(
    DatasetGroupName=dataset_group_name,
    Domain='CUSTOM',
    DatasetArns=[dataset_arn]
)
dataset_group_arn = create_dataset_group_response['DatasetGroupArn']

# 將數據導入到數據集中
# S3 文件路徑
s3_data_path = f"s3://{bucket_name}/{s3_key}"
# 使用你的 IAM 角色 ARN
role_arn = "arn:aws:iam::your-account-id:role/ForecastRole"

create_dataset_import_job_response = forecast.create_dataset_import_job(
    DatasetImportJobName='my_dataset_import_job',
    DatasetArn=dataset_arn,
    DataSource={
        "S3Config": {
            "Path": s3_data_path,
            "RoleArn": role_arn
        }
    },
    TimestampFormat='yyyy-MM-dd'
)
dataset_import_job_arn = create_dataset_import_job_response['DatasetImportJobArn']

# 等待數據導入完成
while True:
    status = forecast.describe_dataset_import_job(DatasetImportJobArn=dataset_import_job_arn)['Status']
    if status in ('ACTIVE', 'CREATE_FAILED'):
        break
    print(f"數據導入工作狀態: {status}")
    time.sleep(30)

if status == 'CREATE_FAILED':
    raise Exception("數據導入工作建立失敗")

# 建立預測器
predictor_name = 'my_predictor'
algorithm_arn = 'arn:aws:forecast:::algorithm/Prophet'

create_predictor_response = forecast.create_predictor(
    PredictorName=predictor_name,
    # 設置較小的預測範圍
    ForecastHorizon=3,
    PerformAutoML=False,
    PerformHPO=False,
    AlgorithmArn=algorithm_arn,
    InputDataConfig={
        'DatasetGroupArn': dataset_group_arn
    },
    FeaturizationConfig={
        'ForecastFrequency': 'D',
        'Featurizations': [
            {
                'AttributeName': 'value',
                'FeaturizationPipeline': [
                    {
                        'FeaturizationMethodName': 'filling',
                        'FeaturizationMethodParameters': {
                            'aggregation': 'sum',
                            'backfill': 'nan',
                            'frontfill': 'none'
                        }
                    }
                ]
            }
        ]
    }
)
predictor_arn = create_predictor_response['PredictorArn']

# 等待預測器訓練完成
while True:
    status = forecast.describe_predictor(PredictorArn=predictor_arn)['Status']
    if status in ('ACTIVE', 'CREATE_FAILED'):
        break
    print(f"預測器訓練狀態: {status}")
    time.sleep(30)

if status == 'CREATE_FAILED':
    raise Exception("預測器建立失敗")

# 建立預測
forecast_name = 'my_forecast'

create_forecast_response = forecast.create_forecast(
    ForecastName=forecast_name,
    PredictorArn=predictor_arn
)
forecast_arn = create_forecast_response['ForecastArn']

# 等待預測生成完成
while True:
    status = forecast.describe_forecast(ForecastArn=forecast_arn)['Status']
    if status in ('ACTIVE', 'CREATE_FAILED'):
        break
    print(f"預測狀態: {status}")
    time.sleep(30)

if status == 'CREATE_FAILED':
    raise Exception("預測建立失敗")

# 查詢預測結果
forecastquery = boto3.client('forecastquery')
forecast_response = forecastquery.query_forecast(
    ForecastArn=forecast_arn,
    Filters={"item_id": "item_1"}
)

print(forecast_response)

ClientError: An error occurred (AccessDeniedException) when calling the CreateDataset operation: User: arn:aws:iam::891377311393:user/s3user is not authorized to perform: forecast:CreateDataset on resource: arn:aws:forecast:us-east-1:891377311393:dataset/my_dataset because no identity-based policy allows the forecast:CreateDataset action