In [3]:
# S3 bucket creation: Server-side encryption with Amazon S3 managed keys (SSE-S3)

In [1]:
!aws s3 ls

2023-09-25 18:37:22 greatexpectationstestbucket


In [65]:
!pip install boto3



In [None]:
# Multipart Upload:
# Chunked upload:

In [4]:
ls

[0m[01;34maws[0m/  aws_checksum.ipynb  [01;31mawscliv2.zip[0m  data.csv  src_file.csv


In [9]:
import pandas as pd
df = pd.read_csv("src_file.csv",nrows=120000)
df.to_csv("data.csv",index=False)

In [25]:
from boto3.s3.transfer import TransferConfig
import boto3
import os
import threading
import sys 

s3_resource = boto3.resource('s3')

config = TransferConfig(multipart_threshold=1024 * 5, 
                        max_concurrency=10,
                        multipart_chunksize=1024 * 5,
                        use_threads=True)


class ProgressPercentage(object):
    def __init__(self, filename):
        self._filename = filename
        self._size = float(os.path.getsize(filename))
        self._seen_so_far = 0
        self._lock = threading.Lock()

    def __call__(self, bytes_amount):
        with self._lock:
            self._seen_so_far += bytes_amount
            percentage = (self._seen_so_far / self._size) * 100
            sys.stdout.write(
                "\r%s  %s / %s  (%.2f%%)" % (
                    self._filename, self._seen_so_far, self._size,
                    percentage))
            sys.stdout.flush()

In [26]:
bucket_name = 'xxxx'
def multipart_upload_boto3():

    file_path = 'data.csv'
    key = 'data.csv'

    s3_resource.Object(bucket_name, key).upload_file(file_path,
                            ExtraArgs={'ContentType': 'text/pdf'},
                            Config=config,
                            Callback=ProgressPercentage(file_path)
                            )
multipart_upload_boto3()

data.csv  13824034 / 13824034.0  (100.00%)

In [1]:
# Retrieving E-tag

In [14]:
import boto3
import hashlib

In [3]:
s3_cli = boto3.client('s3')

In [4]:
s3_resp = s3_cli.head_object(Bucket='xxxx', Key='data.csv')

In [12]:
s3_resp['ETag'].strip('"')

'1f40a2acd597ff10764fd31a5bdb71d6-3'

In [19]:
def calculate_s3_etag(file_path, chunk_size=6 * 1024 * 1024):
    md5s = []

    with open(file_path, 'rb') as fp:
        while True:
            data = fp.read(chunk_size)
            if not data:
                break
            md5s.append(hashlib.md5(data))

    if len(md5s) < 1:
        return '"{}"'.format(hashlib.md5().hexdigest())

    if len(md5s) == 1:
        return '"{}"'.format(md5s[0].hexdigest())

    digests = b''.join(m.digest() for m in md5s)
    digests_md5 = hashlib.md5(digests)
    return '"{}-{}"'.format(digests_md5.hexdigest(), len(md5s))
calculate_s3_etag('data.csv',chunk_size=6*1024*1024)

'"97c15a899046bab63e56b7191b90fea2-3"'

In [28]:
def md5_checksum(filename):
    m = hashlib.md5()
    with open(filename, 'rb') as f:
        for data in iter(lambda: f.read(1024 * 1024), b''):
            m.update(data)
   
    return m.hexdigest()


def etag_checksum(filename, chunk_size=5 * 1024 * 1024):
    md5s = []
    with open(filename, 'rb') as f:
        for data in iter(lambda: f.read(chunk_size), b''):
            md5s.append(hashlib.md5(data).digest())
    m = hashlib.md5(b"".join(md5s))
    print('{}-{}'.format(m.hexdigest(), len(md5s)))
    return '{}-{}'.format(m.hexdigest(), len(md5s))


checksum = etag_checksum('data.csv')
checksum

1f40a2acd597ff10764fd31a5bdb71d6-3


'1f40a2acd597ff10764fd31a5bdb71d6-3'

In [48]:
# python credit/debit card check - Luhn’s algorithm
import math


In [64]:
def is_valid_card(card_num_str):
    card_num_str = card_num_str.replace("-","").replace(" ","")
    card_nums = [int(val) for val in card_num_str]
    card_nums[::2] = [math.floor((2*val)/10)+((2*val)%10) for val in card_nums[::2]]
    if sum(card_nums) % 10 == 0:
        return True
    else:
        return False

#below card numbers are synthetically generated
for card in ["4100-2463-1118-7264","4281-9400-6897-1166","4078-0400-0914-6113"]:
    if is_valid_card(card):
        print(f"{card} is Valid")
    else:
        print(f"{card} is Invalid")
        

4100-2463-1118-7264 is Valid
4281-9400-6897-1166 is Valid
4078-0400-0914-6113 is Invalid


In [47]:
math.floor(1.9)

1