In [20]:
import os
import sys
import boto3
import argparse
from tqdm import tqdm
from botocore.exceptions import ClientError
import logging
from concurrent.futures import ThreadPoolExecutor

In [15]:
ROOT_DIR = "."
ROOT_S3_DIR = "22962256-cloudstorage"
bucket_config = {"LocationConstraint": "ap-southeast-2"}

# Client initialise
s3_client = boto3.client("s3")

In [54]:
def upload_file(filepath):
    filename = os.path.basename(filepath)
    try:
        with tqdm(
            total=os.path.getsize(filepath),
            unit="B",
            unit_scale=True,
            desc=filename,
            dynamic_ncols=True,
        ) as progress:
            s3_client.upload_file(
                filepath,
                ROOT_S3_DIR,
                filename,
                Callback=lambda bytes_transferred: progress.update(bytes_transferred),
            )

    except ClientError as e:
        logging.error(e)
        return False
    return True

In [55]:
files_to_upload = []
for dir_name, subdir_list, file_list in os.walk(ROOT_DIR, topdown=True):
        if dir_name != ROOT_DIR:
            for fname in file_list:
                # upload_file("%s/%s" % (dir_name, fname))
                files_to_upload.append("%s/%s" % (dir_name, fname))

In [56]:
with ThreadPoolExecutor() as executor:
    executor.map(upload_file, files_to_upload)



rootfile.txt:   0%|          | 0.00/45.0 [00:00<?, ?B/s]




[A[A[A
[A

[A[A



[A[A[A[A




[A[A[A[A[A





rootfile copy 2.txt: 100%|██████████| 45.0/45.0 [00:05<00:00, 7.77B/s]
subfile copy 3.txt: 100%|██████████| 45.0/45.0 [00:00<00:00, 338B/s]
subfile copy 2.txt: 100%|██████████| 45.0/45.0 [00:00<00:00, 311B/s]
rootfile.txt: 100%|██████████| 45.0/45.0 [00:06<00:00, 7.07B/s]

internalfile copy.txt: 100%|██████████| 45.0/45.0 [00:06<00:00, 7.16B/s]





[A[A[A[A[A


[A[A[A





[A[A[A[A[A[A



subfile copy.txt: 100%|██████████| 45.0/45.0 [00:06<00:00, 6.97B/s]
subfile.txt: 100%|██████████| 45.0/45.0 [00:06<00:00, 6.99B/s]


internalfile copy 2.txt: 100%|██████████| 45.0/45.0 [00:06<00:00, 6.80B/s]
internalfile.txt: 100%|██████████| 45.0/45.0 [00:06<00:00, 6.66B/s]
rootfile copy.txt: 100%|██████████| 45.0/45.0 [00:06<00:00, 6.75B/s]


In [11]:
import boto3

s3_client = boto3.client("s3")
dynamodb = boto3.resource('dynamodb', endpoint_url='http://localhost:8000')


In [27]:
table = dynamodb.create_table(
    TableName="CloudFiles",
    KeySchema=[
        {"AttributeName": "path", "KeyType": "HASH"},
        {"AttributeName": "userId", "KeyType": "RANGE"},
    ],
    AttributeDefinitions=[
        {"AttributeName": "path", "AttributeType": "S"},
        {"AttributeName": "userId", "AttributeType": "S"},
    ],
    ProvisionedThroughput={"ReadCapacityUnits": 5, "WriteCapacityUnits": 5},
)
table

dynamodb.Table(name='CloudFiles')

In [28]:
files = s3_client.list_objects(Bucket=ROOT_S3_DIR)["Contents"]
for file in files:
    acl = s3_client.get_object_acl(Bucket=ROOT_S3_DIR, Key=file["Key"])
    permissions = acl["Grants"]
    table.put_item(
        Item={
            "userId": file["Owner"]["ID"],
            "fileName": os.path.basename(file["Key"]),
            "path": file["Key"],
            "lastUpdated": str(file["LastModified"]),
            "owner": file["Owner"]["DisplayName"],
            "permissions": str(permissions),
        }
    )

In [29]:
table.scan()

{'Items': [{'owner': 'zhi.zhang',
   'path': 'rootdir/rootfile.txt',
   'lastUpdated': '2023-08-24 08:12:44+00:00',
   'fileName': 'rootfile.txt',
   'userId': '2a5fac7aada1ad2caa48c9ab08cc4e2428d4eb596108daa3b59f1204ae96482e',
   'permissions': "[{'Grantee': {'DisplayName': 'zhi.zhang', 'ID': '2a5fac7aada1ad2caa48c9ab08cc4e2428d4eb596108daa3b59f1204ae96482e', 'Type': 'CanonicalUser'}, 'Permission': 'FULL_CONTROL'}]"},
  {'owner': 'zhi.zhang',
   'path': 'subdir/subfile.txt',
   'lastUpdated': '2023-08-24 08:12:45+00:00',
   'fileName': 'subfile.txt',
   'userId': '2a5fac7aada1ad2caa48c9ab08cc4e2428d4eb596108daa3b59f1204ae96482e',
   'permissions': "[{'Grantee': {'DisplayName': 'zhi.zhang', 'ID': '2a5fac7aada1ad2caa48c9ab08cc4e2428d4eb596108daa3b59f1204ae96482e', 'Type': 'CanonicalUser'}, 'Permission': 'FULL_CONTROL'}]"},
  {'owner': 'zhi.zhang',
   'path': 'rootdir/internaldir/internalfile copy.txt',
   'lastUpdated': '2023-08-24 08:12:44+00:00',
   'fileName': 'internalfile copy.txt'

In [26]:
table.delete()

{'TableDescription': {'AttributeDefinitions': [{'AttributeName': 'userId',
    'AttributeType': 'S'}],
  'TableName': 'CloudFiles',
  'KeySchema': [{'AttributeName': 'userId', 'KeyType': 'HASH'}],
  'TableStatus': 'ACTIVE',
  'CreationDateTime': datetime.datetime(2023, 8, 26, 12, 26, 26, 129000, tzinfo=tzlocal()),
  'ProvisionedThroughput': {'LastIncreaseDateTime': datetime.datetime(1970, 1, 1, 8, 0, tzinfo=tzlocal()),
   'LastDecreaseDateTime': datetime.datetime(1970, 1, 1, 8, 0, tzinfo=tzlocal()),
   'NumberOfDecreasesToday': 0,
   'ReadCapacityUnits': 5,
   'WriteCapacityUnits': 5},
  'TableSizeBytes': 344,
  'ItemCount': 1,
  'TableArn': 'arn:aws:dynamodb:ddblocal:000000000000:table/CloudFiles'},
 'ResponseMetadata': {'RequestId': 'f1bd04e4-56ae-4946-a772-0d40a3ecf0ba',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Sat, 26 Aug 2023 05:11:34 GMT',
   'x-amzn-requestid': 'f1bd04e4-56ae-4946-a772-0d40a3ecf0ba',
   'content-type': 'application/x-amz-json-1.0',
   'x-amz-crc32': '