# Download data
## Install Kaggle

In [1]:
!pip install kaggle

Collecting kaggle
  Downloading kaggle-1.5.8.tar.gz (59 kB)
[K     |████████████████████████████████| 59 kB 5.4 MB/s eta 0:00:011
[?25hCollecting urllib3<1.25,>=1.21.1
  Downloading urllib3-1.24.3-py2.py3-none-any.whl (118 kB)
[K     |████████████████████████████████| 118 kB 11.6 MB/s eta 0:00:01
Collecting python-slugify
  Downloading python-slugify-4.0.1.tar.gz (11 kB)
Collecting slugify
  Downloading slugify-0.0.1.tar.gz (1.2 kB)
Collecting text-unidecode>=1.3
  Downloading text_unidecode-1.3-py2.py3-none-any.whl (78 kB)
[K     |████████████████████████████████| 78 kB 8.9 MB/s  eta 0:00:01
[?25hBuilding wheels for collected packages: kaggle, python-slugify, slugify
  Building wheel for kaggle (setup.py) ... [?25ldone
[?25h  Created wheel for kaggle: filename=kaggle-1.5.8-py3-none-any.whl size=73274 sha256=6415a396e7b60a7bd14977350de6228cef55068fd5df63227ee670c81bc5a08a
  Stored in directory: /home/ec2-user/.cache/pip/wheels/cf/aa/f0/ed1179bbcd729b29d0dfda59826fb3b55f0a4a0c3f7

### Kaggle Settings

Before running the kaggle download, kaggle configurations should saved locally in the notebook using a terminal prompt. 
```
sh-4.2$ history
sh-4.2$ mkdir ~/.kaggle
sh-4.2$ vi kaggle.json //add the kaggle credentials
sh-4.2$ mv kaggle.json ~/.kaggle/kaggle.json
sh-4.2$ chmod 600 ~/.kaggle/kaggle.json
sh-4.2$ kaggle
```

In [5]:
!kaggle datasets download --force  janiobachmann/bank-marketing-dataset

Downloading bank-marketing-dataset.zip to /home/ec2-user/SageMaker/amazon-personalize
  0%|                                                | 0.00/142k [00:00<?, ?B/s]
100%|████████████████████████████████████████| 142k/142k [00:00<00:00, 20.9MB/s]


In [6]:
!unzip bank-marketing-dataset.zip

Archive:  bank-marketing-dataset.zip
  inflating: bank.csv                


# Prepare Customer Data


## Imports


In [1]:
# Imports
import boto3
import json
import numpy as np
import pandas as pd
import time

In [52]:
df=pd.read_csv("bank.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11162 entries, 0 to 11161
Data columns (total 17 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   age        11162 non-null  int64 
 1   job        11162 non-null  object
 2   marital    11162 non-null  object
 3   education  11162 non-null  object
 4   default    11162 non-null  object
 5   balance    11162 non-null  int64 
 6   housing    11162 non-null  object
 7   loan       11162 non-null  object
 8   contact    11162 non-null  object
 9   day        11162 non-null  int64 
 10  month      11162 non-null  object
 11  duration   11162 non-null  int64 
 12  campaign   11162 non-null  int64 
 13  pdays      11162 non-null  int64 
 14  previous   11162 non-null  int64 
 15  poutcome   11162 non-null  object
 16  deposit    11162 non-null  object
dtypes: int64(7), object(10)
memory usage: 1.4+ MB


### Add a unique customer ID to the data

In [72]:
df['USER_ID'] = np.arange(len(df))+30908
# cols = df.columns.tolist()
# cols = cols[-1:] + cols[:-1]

## max 5 user attributes, get rid of some columns not very useful 
df_custmer = df[['USER_ID','deposit','age', 'job','marital','education']]
df_custmer.to_csv('customer11k.csv')
df_custmer.head()

Unnamed: 0,USER_ID,deposit,age,job,marital,education
0,30908,yes,59,admin.,married,secondary
1,30909,yes,56,admin.,married,secondary
2,30910,yes,41,technician,married,secondary
3,30911,yes,55,services,married,secondary
4,30912,yes,54,admin.,married,tertiary


## Upload to S3

In [2]:
bucket ='personalize-custdata'      # replace with the name of your S3 bucket
filename ='customer11k.csv'
boto3.Session().resource('s3').Bucket(bucket).Object(filename).upload_file(filename)

## Create personalize runtime

In [6]:
# Configure the SDK to Personalize:
personalize = boto3.client('personalize')
personalize_runtime = boto3.client('personalize-runtime')

## Create schema for customer data

In [4]:


customer_schema = {
    "type": "record",
    "name": "Users",
    "namespace": "com.amazonaws.personalize.schema",
    "fields": [
        {
            "name": "USER_ID",
            "type": "string"
        },
        {
            "name": "deposit",
            "type": "string"
        },
        {
            "name": "age",
            "type": "long"
        },
        {
            "name": "job",
            "type": "string"
        },
        {
            "name": "marital",
            "type": "string"
        },
        {
            "name": "education",
            "type": "string"
        }
    ],
    "version": "1.0"
}

create_schema_response = personalize.create_schema(
    name = "personalize-customer-schema",
    schema = json.dumps(customer_schema)
)

customer_schema_arn = create_schema_response['schemaArn']
print(json.dumps(create_schema_response, indent=2))


{
  "schemaArn": "arn:aws:personalize:ap-southeast-1:248025046818:schema/personalize-customer-schema",
  "ResponseMetadata": {
    "RequestId": "35ea07ab-d850-4bb7-92ac-bdb717d3dc65",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Tue, 08 Sep 2020 14:58:45 GMT",
      "x-amzn-requestid": "35ea07ab-d850-4bb7-92ac-bdb717d3dc65",
      "content-length": "98",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


# Create Interactions Data 

In [5]:
import json
import csv
from datetime import datetime
from datetime import timedelta
# from datetime import time
from calendar import monthrange
from random import seed
from random import randint
import numpy as np
import pandas as pd
#import platform
# print(platform.python_version())


###################
## Declare constants 
###################
# customer ID start
starting_cust_id=30908

# customer ID end
ending_cust_id=30908+11000

# how many evemts we need?
max_interactions_sequence_count=8000

# starting date of the dataset
event_start_date = datetime(2019, 3, 3)
# datetime.datetime.utcfromtimestamp(0)
epoch_event_start_date = int(event_start_date.strftime('%s'))
# print(epoch_event_start_date)

event_time_range = 31536000 #seconds in a year. 

max_time_between_events = 1059200  
min_time_between_events = 3900 # 3 days. 


# set of possible interactions 
# Lets say there are 10 different types of events. 
# this array defines a few set of interaction sequence 
# we will use these fixed set of interaction sequence to generate synthetic data
interaction_sequence_length = 5 # number of rows 
number_different_interaction_sequences=6 # number of columns 

newarray = np.empty(shape=(5,0))

temparray = np.arange(5).reshape(5,1)

i=0

while (i < number_different_interaction_sequences):
	temparray = temparray+10
	newarray =	np.append(newarray, temparray, axis=1)
	i+=1

interaction_array = newarray.astype(int)
print (interaction_array)




#data column headers 
# USER_ID (string), ITEM_ID (string), TIMESTAMP (long), IMPRESSION


with open('interactions.csv', 'w', newline='') as file:
    writer = csv.writer(file, delimiter=',')
    writer.writerow(["USER_ID", "ITEM_ID", "TIMESTAMP"])

    ## Loop for interaction sequences 
    interactions_sequence_count=0 
    while ( interactions_sequence_count <= max_interactions_sequence_count ):

        #pic a customer id randomly 
        customer_id = randint(starting_cust_id, ending_cust_id)

        #pic a event sequence randomly 
        sequence_id =  randint(0, number_different_interaction_sequences-1)

        # select a event sequence length 
        max_sequence_length = randint(0, interaction_sequence_length-1)

        sequence_length = 0
        time_lapse_between_event = epoch_event_start_date+event_time_range
        next_event_time =randint(epoch_event_start_date , time_lapse_between_event)

        while (sequence_length <= max_sequence_length):
            writer.writerow([customer_id, interaction_array[sequence_length][sequence_id], next_event_time])
            sequence_length+=1
            next_event_time += randint(min_time_between_events, max_time_between_events)

        interactions_sequence_count+=1

file.close()


interactions_df=pd.read_csv("interactions.csv")
print(interactions_df.describe())
interactions_df.head(20)

[[10 20 30 40 50 60]
 [11 21 31 41 51 61]
 [12 22 32 42 52 62]
 [13 23 33 43 53 63]
 [14 24 34 44 54 64]]
            USER_ID       ITEM_ID     TIMESTAMP
count  24185.000000  24185.000000  2.418500e+04
mean   36397.258631     36.381187  1.567957e+09
std     3164.139522     17.206077  9.152871e+06
min    30908.000000     10.000000  1.551573e+09
25%    33674.000000     21.000000  1.559993e+09
50%    36400.000000     40.000000  1.567843e+09
75%    39099.000000     51.000000  1.575746e+09
max    41907.000000     64.000000  1.586202e+09


Unnamed: 0,USER_ID,ITEM_ID,TIMESTAMP
0,39601,20,1580755329
1,39601,21,1581085766
2,39601,22,1581301669
3,39601,23,1582210997
4,39601,24,1582664093
5,31159,10,1551915049
6,31159,11,1552964831
7,31159,12,1553302712
8,31159,13,1554069133
9,32015,50,1575561532


## Upload the interactions data to S3

In [6]:
interaction_array

array([[10, 20, 30, 40, 50, 60],
       [11, 21, 31, 41, 51, 61],
       [12, 22, 32, 42, 52, 62],
       [13, 23, 33, 43, 53, 63],
       [14, 24, 34, 44, 54, 64]])

In [7]:
bucket ='personalize-custdata'      # replace with the name of your S3 bucket
filename ='interactions.csv'
boto3.Session().resource('s3').Bucket(bucket).Object(filename).upload_file(filename)

## Create a schema definition in Amazon Personalize

In [8]:
interactions_schema =  {
  "type": "record",
  "name": "Interactions",
  "namespace": "com.amazonaws.personalize.schema",
  "fields": [
      {
          "name": "USER_ID",
          "type": "string"
      },
      {
          "name": "ITEM_ID",
          "type": "string"
      },
      {
          "name": "TIMESTAMP",
          "type": "long"
      }
  ],
  "version": "1.0"
}

create_schema_response = personalize.create_schema(
    name = "personalize-intertactions-schema",
    schema = json.dumps(interactions_schema)
)

interactions_schema_arn = create_schema_response['schemaArn']

print(json.dumps(create_schema_response, indent=2))

{
  "schemaArn": "arn:aws:personalize:ap-southeast-1:248025046818:schema/personalize-intertactions-schema",
  "ResponseMetadata": {
    "RequestId": "79f078c8-ed9b-4ef2-8dd5-f7b081fc3313",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Tue, 08 Sep 2020 15:01:10 GMT",
      "x-amzn-requestid": "79f078c8-ed9b-4ef2-8dd5-f7b081fc3313",
      "content-length": "103",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


## Createa a dataset group

In [15]:
create_dataset_group_response = personalize.create_dataset_group(
    name = "personalize-demo-dataset"
)

dataset_group_arn = create_dataset_group_response['datasetGroupArn']
print(json.dumps(create_dataset_group_response, indent=2))

ResourceAlreadyExistsException: An error occurred (ResourceAlreadyExistsException) when calling the CreateDatasetGroup operation: Another resource with Arn arn:aws:personalize:ap-southeast-1:248025046818:dataset-group/personalize-demo-dataset already exists.

In [14]:

max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_dataset_group_response = personalize.describe_dataset_group(
        datasetGroupArn = dataset_group_arn
    )
    status = describe_dataset_group_response["datasetGroup"]["status"]
    print("DatasetGroup: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

DatasetGroup: ACTIVE


### TODO: DELTE THIS BLOCK LATER

In [16]:
#personalize.list_schemas()
interactions_schema_arn = 'arn:aws:personalize:ap-southeast-1:248025046818:schema/personalize-intertactions-schema'
customer_schema_arn = 'arn:aws:personalize:ap-southeast-1:248025046818:schema/personalize-customer-schema'
interactions_dataset_arn = 'arn:aws:personalize:ap-southeast-1:248025046818:dataset/personalize-demo-dataset/INTERACTIONS'
customer_dataset_arn = 'arn:aws:personalize:ap-southeast-1:248025046818:dataset/personalize-demo-dataset/USERS'
dataset_group_arn = 'arn:aws:personalize:ap-southeast-1:248025046818:dataset-group/personalize-demo-dataset'
# solution_arn ='arn:aws:personalize:ap-southeast-1:248025046818:solution/demo-hrnn-solution'
# campaign_arn = 'arn:aws:personalize:ap-southeast-1:248025046818:campaign/demo-hrnn-campaign'

## Create datasets 

In [15]:
dataset_type = "INTERACTIONS"
create_dataset_response = personalize.create_dataset(
    datasetType = dataset_type,
    datasetGroupArn = dataset_group_arn,
    schemaArn = interactions_schema_arn,
    name = "demo-dataset"
)

interactions_dataset_arn = create_dataset_response['datasetArn']
print(json.dumps(create_dataset_response, indent=2))


{
  "datasetArn": "arn:aws:personalize:ap-southeast-1:248025046818:dataset/personalize-demo-dataset/INTERACTIONS",
  "ResponseMetadata": {
    "RequestId": "761f17b2-a2b5-42dc-a2ef-a42c994d0f3a",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Tue, 08 Sep 2020 15:06:00 GMT",
      "x-amzn-requestid": "761f17b2-a2b5-42dc-a2ef-a42c994d0f3a",
      "content-length": "110",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [16]:
dataset_type = "USERS"
create_dataset_response = personalize.create_dataset(
    datasetType = dataset_type,
    datasetGroupArn = dataset_group_arn,
    schemaArn = customer_schema_arn,
    name = "demo-user-dataset"
)

customer_dataset_arn = create_dataset_response['datasetArn']
print(json.dumps(create_dataset_response, indent=2))


{
  "datasetArn": "arn:aws:personalize:ap-southeast-1:248025046818:dataset/personalize-demo-dataset/USERS",
  "ResponseMetadata": {
    "RequestId": "98fcf06e-5fc5-4b03-a330-a6d18f0231c0",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Tue, 08 Sep 2020 15:06:21 GMT",
      "x-amzn-requestid": "98fcf06e-5fc5-4b03-a330-a6d18f0231c0",
      "content-length": "103",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


## Add S3 bucket policy for accessing from personalize

In [3]:
s3 = boto3.client("s3")
bucket ='personalize-custdata' 

policy = {
    "Version": "2012-10-17",
    "Id": "PersonalizeS3BucketAccessPolicy",
    "Statement": [
        {
            "Sid": "PersonalizeS3BucketAccessPolicy",
            "Effect": "Allow",
            "Principal": {
                "Service": "personalize.amazonaws.com"
            },
            "Action": [
                "s3:GetObject",
                "s3:ListBucket"
            ],
            "Resource": [
                "arn:aws:s3:::{}".format(bucket),
                "arn:aws:s3:::{}/*".format(bucket)
            ]
        }
    ]
}

s3.put_bucket_policy(Bucket=bucket, Policy=json.dumps(policy));

## Create Data Import Jobs

In [7]:
role_arn = 'arn:aws:iam::248025046818:role/AmazonPersonalizeRole'
create_dataset_import_job_response = personalize.create_dataset_import_job(
    jobName = "interactions-dataset-import-job1",
    datasetArn = interactions_dataset_arn,
    dataSource = {
        "dataLocation": "s3://{}/{}".format(bucket, 'interactions.csv')
        
    }, roleArn = role_arn
)

interactions_dataset_import_job_arn = create_dataset_import_job_response['datasetImportJobArn']
print(json.dumps(create_dataset_import_job_response, indent=2))

{
  "datasetImportJobArn": "arn:aws:personalize:ap-southeast-1:248025046818:dataset-import-job/interactions-dataset-import-job1",
  "ResponseMetadata": {
    "RequestId": "db7cd908-a61a-41f1-abbf-b1fd8f584831",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Tue, 08 Sep 2020 15:20:30 GMT",
      "x-amzn-requestid": "db7cd908-a61a-41f1-abbf-b1fd8f584831",
      "content-length": "125",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [8]:
role_arn = 'arn:aws:iam::248025046818:role/AmazonPersonalizeRole'
create_dataset_import_job_response = personalize.create_dataset_import_job(
    jobName = "user-dataset-import-job1",
    datasetArn = customer_dataset_arn,
    dataSource = {
        "dataLocation": "s3://{}/{}".format(bucket, 'customer11k.csv')
        
    }, roleArn = role_arn
)

users_dataset_import_job_arn = create_dataset_import_job_response['datasetImportJobArn']
print(json.dumps(create_dataset_import_job_response, indent=2))

{
  "datasetImportJobArn": "arn:aws:personalize:ap-southeast-1:248025046818:dataset-import-job/user-dataset-import-job1",
  "ResponseMetadata": {
    "RequestId": "919c0623-842a-41c8-bcd0-14cc46063ab3",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Tue, 08 Sep 2020 15:21:02 GMT",
      "x-amzn-requestid": "919c0623-842a-41c8-bcd0-14cc46063ab3",
      "content-length": "117",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [9]:
personalize.list_dataset_import_jobs()
#interactions_dataset_import_job_arn = 'arn:aws:personalize:ap-southeast-1:248025046818:dataset-import-job/interactions-dataset-import-job'
#users_dataset_import_job_arn ='arn:aws:personalize:ap-southeast-1:248025046818:dataset-import-job/user-dataset-import-job'

{'datasetImportJobs': [{'datasetImportJobArn': 'arn:aws:personalize:ap-southeast-1:248025046818:dataset-import-job/interactions-dataset-import-job1',
   'jobName': 'interactions-dataset-import-job1',
   'status': 'CREATE IN_PROGRESS',
   'creationDateTime': datetime.datetime(2020, 9, 8, 15, 20, 31, 315000, tzinfo=tzlocal()),
   'lastUpdatedDateTime': datetime.datetime(2020, 9, 8, 15, 20, 53, 521000, tzinfo=tzlocal())},
  {'datasetImportJobArn': 'arn:aws:personalize:ap-southeast-1:248025046818:dataset-import-job/user-dataset-import-job1',
   'jobName': 'user-dataset-import-job1',
   'status': 'CREATE PENDING',
   'creationDateTime': datetime.datetime(2020, 9, 8, 15, 21, 3, 8000, tzinfo=tzlocal()),
   'lastUpdatedDateTime': datetime.datetime(2020, 9, 8, 15, 21, 3, 8000, tzinfo=tzlocal())}],
 'ResponseMetadata': {'RequestId': '7fce6c62-cf29-489d-af86-a37a3d59c65b',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/x-amz-json-1.1',
   'date': 'Tue, 08 Sep 2020 15:21:1

In [10]:
status = None
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_dataset_import_job_response = personalize.describe_dataset_import_job(
        datasetImportJobArn = users_dataset_import_job_arn
    )
    
    dataset_import_job = describe_dataset_import_job_response["datasetImportJob"]
    if "latestDatasetImportJobRun" not in dataset_import_job:
        status = dataset_import_job["status"]
        print("DatasetImportJob: {}".format(status))
    else:
        status = dataset_import_job["latestDatasetImportJobRun"]["status"]
        print("LatestDatasetImportJobRun: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

DatasetImportJob: CREATE PENDING
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: ACTIVE


In [11]:
status = None
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_dataset_import_job_response = personalize.describe_dataset_import_job(
        datasetImportJobArn = interactions_dataset_import_job_arn
    )
    
    dataset_import_job = describe_dataset_import_job_response["datasetImportJob"]
#     print(dataset_import_job)
    if "latestDatasetImportJobRun" not in dataset_import_job:
        status = dataset_import_job["status"]
        print("DatasetImportJob: {}".format(status))
    else:
        status = dataset_import_job["latestDatasetImportJobRun"]["status"]
        print("LatestDatasetImportJobRun: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

DatasetImportJob: ACTIVE


In [12]:
recipe_list = personalize.list_recipes()
for recipe in recipe_list['recipes']:
    print(recipe['recipeArn'])

arn:aws:personalize:::recipe/aws-hrnn
arn:aws:personalize:::recipe/aws-hrnn-coldstart
arn:aws:personalize:::recipe/aws-hrnn-metadata
arn:aws:personalize:::recipe/aws-personalized-ranking
arn:aws:personalize:::recipe/aws-popularity-count
arn:aws:personalize:::recipe/aws-sims
arn:aws:personalize:::recipe/aws-user-personalization


In [13]:
recipe_arn = "arn:aws:personalize:::recipe/aws-hrnn-metadata"

In [17]:
create_solution_response = personalize.create_solution(
    name = 'demo-hrnn-solution',
    datasetGroupArn = dataset_group_arn,
    recipeArn = recipe_arn
)

solution_arn = create_solution_response['solutionArn']
print(json.dumps(create_solution_response, indent=2))

{
  "solutionArn": "arn:aws:personalize:ap-southeast-1:248025046818:solution/demo-hrnn-solution",
  "ResponseMetadata": {
    "RequestId": "385ba433-e411-4618-9beb-8aaf0a120333",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Tue, 08 Sep 2020 15:38:43 GMT",
      "x-amzn-requestid": "385ba433-e411-4618-9beb-8aaf0a120333",
      "content-length": "93",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [18]:
create_solution_version_response = personalize.create_solution_version(
    solutionArn = solution_arn
)

solution_version_arn = create_solution_version_response['solutionVersionArn']
print(json.dumps(create_solution_version_response, indent=2))

{
  "solutionVersionArn": "arn:aws:personalize:ap-southeast-1:248025046818:solution/demo-hrnn-solution/55866e6c",
  "ResponseMetadata": {
    "RequestId": "258a4a9f-146a-4af0-a3e9-cdfc5dd99b89",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Tue, 08 Sep 2020 15:39:06 GMT",
      "x-amzn-requestid": "258a4a9f-146a-4af0-a3e9-cdfc5dd99b89",
      "content-length": "109",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [None]:
status = None
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_solution_version_response = personalize.describe_solution_version(
        solutionVersionArn = solution_version_arn
    )
    status = describe_solution_version_response["solutionVersion"]["status"]
    print("SolutionVersion: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS


In [141]:
create_campaign_response = personalize.create_campaign(
    name = "demo-hrnn-campaign",
    solutionVersionArn = solution_version_arn,
    minProvisionedTPS = 2,  
)

campaign_arn = create_campaign_response['campaignArn']
print(json.dumps(create_campaign_response, indent=2))

{
  "campaignArn": "arn:aws:personalize:ap-southeast-1:248025046818:campaign/demo-hrnn-campaign",
  "ResponseMetadata": {
    "RequestId": "d4f0678c-ac1a-4a64-a97c-cfd5bc5a6269",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Tue, 08 Sep 2020 02:06:58 GMT",
      "x-amzn-requestid": "d4f0678c-ac1a-4a64-a97c-cfd5bc5a6269",
      "content-length": "93",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [142]:
status = None
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_campaign_response = personalize.describe_campaign(
        campaignArn = campaign_arn
    )
    status = describe_campaign_response["campaign"]["status"]
    print("Campaign: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

Campaign: ACTIVE


## Get Recommendations or Intent Prediction using API call

In [147]:
response = personalize_runtime.get_recommendations(
    campaignArn = campaign_arn,
    userId = '31950' )

print("Recommended items")
for item in response['itemList']:
    print (item['itemId'])

Recommended items
5
1
6
8
4
3
7
0
9
2


## TODO: Add an interaction and then retry the recommendation

In [150]:
response = personalizeRt.get_recommendations(
    campaignArn = campaign_arn,
    userId = '31953' )

print("Recommended items")
for item in response['itemList']:
    print (item['itemId'])

Recommended items
4
8
3
9
6
5
2
7
0
1


# TODO: Clean-up

## TODO: Clean-up Personalize Resources

In [158]:
# Delete the campaign 
personalize.delete_campaign(campaignArn=campaign_arn)
time.sleep(60)

ResourceInUseException: An error occurred (ResourceInUseException) when calling the DeleteCampaign operation: campaign is in PENDING or IN_PROGRESS status: arn:aws:personalize:ap-southeast-1:248025046818:campaign/demo-hrnn-campaign

In [161]:
# Delete the solution
personalize.delete_solution(solutionArn=solution_arn)
time.sleep(60)

In [163]:
# Delete the datasets
personalize.delete_dataset(datasetArn=customer_dataset_arn)
time.sleep(60)
personalize.delete_dataset(datasetArn=interactions_dataset_arn)
time.sleep(60)

In [12]:
#Delete dataset group
personalize.delete_dataset_group(datasetGroupArn=dataset_group_arn)

{'ResponseMetadata': {'RequestId': 'b302ba28-2f6a-4729-a470-04aacf58a847',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/x-amz-json-1.1',
   'date': 'Tue, 08 Sep 2020 15:03:46 GMT',
   'x-amzn-requestid': 'b302ba28-2f6a-4729-a470-04aacf58a847',
   'content-length': '0',
   'connection': 'keep-alive'},
  'RetryAttempts': 0}}

In [164]:
# Delete the schema
personalize.delete_schema(schemaArn=customer_schema_arn)
time.sleep(60)
personalize.delete_schema(schemaArn=interactions_schema_arn)
time.sleep(60)

## TODO: Clean-up Data