In [None]:
import sys
import os
import json
import time

import pandas as pd
import boto3

# importing forecast notebook utility from notebooks/common directory
sys.path.insert( 0, os.path.abspath("../../common") )
import util

In [None]:
text_widget_bucket = util.create_text_widget( "bucket_name", "input your S3 bucket name" )
text_widget_region = util.create_text_widget( "region", "input region name.", default_value="eu-west-1" )

In [None]:
bucket_name = "myawsforecastcovid19demo"
assert bucket_name, "bucket_name not set."

region = "eu-west-1"
assert region, "region not set."

In [None]:
### The last part of the setup process is to validate that your account can communicate with Amazon Forecast, the cell below does just that.

In [None]:
session = boto3.Session(region_name=region) 
forecast = session.client(service_name='forecast') 
forecastquery = session.client(service_name='forecastquery')

In [None]:
### To begin, use Pandas to read the CSV and to show a sample of the data.

In [None]:
df = pd.read_csv("../../../data/countries-aggregated.csv", dtype = object, names=['timestamp','item','confirmed','recovered','value'])
df.head(3)

In [None]:
## The Timestamp
## A Value
## An Item
## These are the 3 key required pieces of information to generate a forecast with Amazon Forecast.
## More can be added but these 3 must always remain present

In [None]:
# Select January to April for one dataframe.
jan_to_oct = df[(df['timestamp'] >= '2020-01-31') & (df['timestamp'] <= '2020-04-24')]

# Select the month of December for another dataframe.
df = pd.read_csv("../../../data/countries-aggregated.csv", dtype = object, names=['timestamp','item','confirmed','recovered','value'])
remaining_df = df[(df['timestamp'] >= '2020-10-31') & (df['timestamp'] <= '2020-12-01')]

In [None]:
## Now export them to CSV files and place them into your `data` folder.

In [None]:
jan_to_oct.to_csv("data/covid-19-countries-aggregated-train.csv", header=False, index=False)
remaining_df.to_csv("data/covid-19-countries-aggregated-validation.csv", header=False, index=False)

In [None]:
## At this time the data is ready to be sent to S3 where Forecast will use it later. The following cells will upload the data to S3.

In [None]:
key="data/covid-19-countries-aggregated-train.csv"

boto3.Session().resource('s3').Bucket(bucket_name).Object(key).upload_file("data/covid-19-countries-aggregated-train.csv")

In [None]:
## Creating the Dataset Group and Dataset

In [None]:
DATASET_FREQUENCY = "H" 
TIMESTAMP_FORMAT = "yyyy-MM-dd"

In [None]:
project = 'covid_19_forecastdemo'
datasetName= project+'_ds'
datasetGroupName= project +'_dsg'
s3DataPath = "s3://"+bucket_name+"/"+key

In [None]:
# Now save things 
%store project

In [None]:
### Create the Dataset Group

In [None]:
create_dataset_group_response = forecast.create_dataset_group(DatasetGroupName=datasetGroupName,
                                                              Domain="CUSTOM",
                                                             )
datasetGroupArn = create_dataset_group_response['DatasetGroupArn']

In [None]:
forecast.describe_dataset_group(DatasetGroupArn=datasetGroupArn)

In [None]:
### Create the Schema

In [None]:
# Specify the schema of your dataset here. Make sure the order of columns matches the raw data files.
schema ={
   "Attributes": [
		{
			"AttributeName": "timestamp",
			"AttributeType": "timestamp"
		},
		{
			"AttributeName": "item_id",
			"AttributeType": "string"
		},
		{
			"AttributeName": "Confirmed",
			"AttributeType": "string"
		},
		{
			"AttributeName": "Recovered",
			"AttributeType": "string"
		},
		{
			"AttributeName": "target_value",
			"AttributeType": "float"
		}
	]
}

In [None]:
### Create the Dataset

In [None]:
response=forecast.create_dataset(
                    Domain="CUSTOM",
                    DatasetType='TARGET_TIME_SERIES',
                    DatasetName=datasetName,
                    DataFrequency=DATASET_FREQUENCY, 
                    Schema = schema
)

In [None]:
datasetArn = response['DatasetArn']
forecast.describe_dataset(DatasetArn=datasetArn)

In [None]:
### Add Dataset to Dataset Group

In [None]:
forecast.update_dataset_group(DatasetGroupArn=datasetGroupArn, DatasetArns=[datasetArn])

In [None]:
### Create IAM Role for Forecast

In [None]:
iam = boto3.client("iam")

role_name = "ForecastRoleDemo"
assume_role_policy_document = {
    "Version": "2012-10-17",
    "Statement": [
        {
          "Effect": "Allow",
          "Principal": {
            "Service": "forecast.amazonaws.com"
          },
          "Action": "sts:AssumeRole"
        }
    ]
}

try:
    create_role_response = iam.create_role(
        RoleName = role_name,
        AssumeRolePolicyDocument = json.dumps(assume_role_policy_document)
    )
    role_arn = create_role_response["Role"]["Arn"]
except iam.exceptions.EntityAlreadyExistsException:
    print("The role " + role_name + " exists, ignore to create it")
    role_arn = boto3.resource('iam').Role(role_name).arn
    
# Attaching AmazonForecastFullAccess to access all actions for Amazon Forecast
policy_arn = "arn:aws:iam::aws:policy/AmazonForecastFullAccess"
iam.attach_role_policy(
    RoleName = role_name,
    PolicyArn = policy_arn
)

# Now add S3 support
iam.attach_role_policy(
    PolicyArn='arn:aws:iam::aws:policy/AmazonS3FullAccess',
    RoleName=role_name
)
time.sleep(60) # wait for a minute to allow IAM role policy attachment to propagate

print(role_arn)

In [None]:
### Create Data Import Job

In [None]:
datasetImportJobName = 'EP_DSIMPORT_JOB_TARGET'
ds_import_job_response=forecast.create_dataset_import_job(DatasetImportJobName=datasetImportJobName,
                                                          DatasetArn=datasetArn,
                                                          DataSource= {
                                                              "S3Config" : {
                                                                 "Path":s3DataPath,
                                                                 "RoleArn": role_arn
                                                              } 
                                                          },
                                                          TimestampFormat=TIMESTAMP_FORMAT
                                                         )

In [None]:
ds_import_job_arn=ds_import_job_response['DatasetImportJobArn']
print(ds_import_job_arn)

In [None]:
### Check the status of dataset, when the status change from **CREATE_IN_PROGRESS** to **ACTIVE**, we can continue to next steps. Depending on the data size. It can take 10 mins to be **ACTIVE**. This process will take 5 to 10 minutes.

In [None]:
status_indicator = util.StatusIndicator()

while True:
    status = forecast.describe_dataset_import_job(DatasetImportJobArn=ds_import_job_arn)['Status']
    status_indicator.update(status)
    if status in ('ACTIVE', 'CREATE_FAILED'): break
    time.sleep(10)

status_indicator.end()

In [None]:
forecast.describe_dataset_import_job(DatasetImportJobArn=ds_import_job_arn)

In [None]:
%store datasetGroupArn
%store datasetArn
%store role_name
%store key
%store bucket_name
%store region
%store ds_import_job_arn