In [13]:
import os
import boto3
import json
from decimal import Decimal
import pandas as pd
import warnings
from tqdm import tqdm
warnings.filterwarnings('ignore')
s3 = boto3.resource('s3')
myBucket = s3.Bucket('crypto-conven-training')

In [2]:
# download all results csv files
for s3_object in myBucket.objects.all():
    path, filename = os.path.split(s3_object.key)
    if path == 'results':
        myBucket.download_file(s3_object.key, filename)

In [18]:
# send to a dynamoDB
dynamodb = boto3.resource('dynamodb')
table = dynamodb.create_table(
    TableName='return_prediction',
    KeySchema=[
        {
            'AttributeName': 'Task',
            'KeyType': 'HASH'
        }
    ],
    AttributeDefinitions=[
        {
            'AttributeName': 'Task',
            'AttributeType': 'S'
        }
    ],
    ProvisionedThroughput={
        'ReadCapacityUnits': 1,
        'WriteCapacityUnits': 1
    }    
)

# Wait until AWS confirms that table exists before moving on
table.meta.client.get_waiter('table_exists').wait(TableName='return_prediction')

# get data about table (should currently be no items in table)
print(table.item_count)
print(table.creation_date_time)

0
2022-05-30 16:43:45.077000+00:00


In [19]:
# send data to dynamoDB
files = os.listdir('results/')
for file in tqdm(files):
    if file == '.ipynb_checkpoints':
        continue
    result = pd.read_csv('results/' + file)
    currency, model = file.split('_')[1], file.split('_')[2].replace('.csv', '')
    for index, row in result.iterrows():
        type_ = row['type']
        time = row['window_index']
        mae = row['MAE']
        item = json.loads(json.dumps({'Task': currency+'_'+model+'_'+str(time),
                                      'Model': model, 'Currency': currency,
                                      'Type': type_, 'Time Window': time, 'MAE': mae}),
                          parse_float=Decimal)
        table.put_item(
            Item=item)

100%|██████████| 36/36 [57:28<00:00, 95.79s/it] 
