In [None]:
aws kinesis create-stream --stream-name my-kinesis-stream --shard-count 3
aws kinesis list-streams
aws kinesis describe-stream --stream-name my-kinesis-stream
aws kinesis update-shard-count --stream-name my-kinesis-stream --target-shard-count 5 --scaling-type UNIFORM_SCALING

In [None]:
aws cloudwatch put-metric-alarm \
    --alarm-name KinesisHighRecordCount \ # Name of the alarm 
    --metric-name IncomingRecords \ # Name of the metric
    --namespace AWS/Kinesis \ # Namespace of the metric 
    --statistic Sum \ # Statistic to apply to the metric 
    --period 300 \ # Period of the metric 
    --threshold 10000 \ # Threshold for the alarm 
    --comparison-operator GreaterThanThreshold \ # Comparison operator to use
    --dimensions Name=StreamName,Value=my-kinesis-stream \ # Dimensions to apply to the metric
    --evaluation-periods 1 \ # Number of periods to evaluate the metric
    --alarm-actions arn:aws:sns:region:account-id:my-sns-topic \ # SNS topic to send notifications to 

In [None]:
aws cloudwatch get-metric-data \
    --metric-data-queries '[{"Id":"m1","MetricStat":{"Metric":{"Namespace":"AWS/Kinesis","MetricName":"IncomingRecords","Dimensions":[{"Name":"StreamName","Value":"my-kinesis-stream"}]},"Period":300,"Stat":"Sum"}}]' \
    --start-time 2024-08-01T00:00:00Z \
    --end-time 2024-08-02T00:00:00Z

In [None]:
aws firehose create-delivery-stream \
    --delivery-stream-name my-firehose-stream \
    --s3-destination-configuration RoleARN=arn:aws:iam::123456789012:role/firehose-role,BucketARN=arn:aws:s3:::my-bucket

aws firehose list-delivery-streams
aws firehose describe-delivery-stream --delivery-stream-name my-firehose-stream

In [None]:
Configuring Buffer and Batch Size in Kinesis Firehose:

In [None]:
aws firehose update-destination \
    --delivery-stream-name my-firehose-stream \
    --current-delivery-stream-version-id 1 \
    --s3-destination-update '{"BufferingHints": {"IntervalInSeconds": 300, "SizeInMBs": 5}}'

    # This command will update the buffer interval to 300 seconds and the buffer size to 5 MB. 
    # The buffer interval is the amount of time that Firehose waits before delivering data to the destination, 
    # we can optimize the buffer size and buffer interval to reduce the number of PUT requests to S3. 
    # no of PUT requests = (buffer size / buffer interval) * no of records per second 

In [None]:
aws lambda create-function \
    --function-name my-lambda-function \
    --runtime python3.8 \
    --role arn:aws:iam::123456789012:role/lambda-role \
    --handler lambda_function.lambda_handler \
    --zip-file fileb://function.zip \
    --environment Variables={FERNET_KEY=your-fernet-key}

In [None]:
import json
import base64
import boto3
from cryptography.fernet import Fernet

# Retrieve the key from environment variables
key = bytes(os.environ['FERNET_KEY'], 'utf-8')
cipher_suite = Fernet(key)

s3_client = boto3.client('s3')
bucket_name = 'your-s3-bucket'

def validate_data(data):
    # Check if required fields are present
    if 'id' not in data or 'timestamp' not in data:
        raise ValueError("Invalid data format")

    return data

def lambda_handler(event, context):
    for record in event['Records']:
        payload = base64.b64decode(record['kinesis']['data'])
        decrypted_data = cipher_suite.decrypt(payload).decode('utf-8')
        data = json.loads(decrypted_data)

        # Validate data
        validate_data(data)

        # Process data and save to S3
        s3_key = f"{data['year']}/{data['month']}/{data['day']}/{data['hour']}/{data['id']}.json"
        s3_client.put_object(
            Bucket=bucket_name,
            Key=s3_key,
            Body=json.dumps(data)
        )

    return {
        'statusCode': 200,
        'body': json.dumps('Data processed successfully.')
    }


In [None]:
aws lambda add-permission \
    --function-name my-lambda-function \
    --principal kinesis.amazonaws.com \
    --statement-id some-unique-id \
    --action lambda:InvokeFunction \
    --source-arn arn:aws:kinesis:region:account-id:stream/my-kinesis-stream

In [None]:
aws lambda create-event-source-mapping \
    --function-name my-lambda-function \
    --event-source-arn arn:aws:kinesis:region:account-id:stream/my-kinesis-stream \
    --starting-position TRIM_HORIZON \
    --batch-size 100

In [None]:
aws kinesis start-stream-encryption \
    --stream-name my-kinesis-stream \
    --encryption-type KMS \
    --key-id alias/my-kms-key

In [None]:
Enable S3 Server-Side Encryption:

aws s3api put-bucket-encryption \
    --bucket your-s3-bucket \
    --server-side-encryption-configuration '{"Rules":[{"ApplyServerSideEncryptionByDefault":{"SSEAlgorithm":"AES256"}}]}'

In [None]:
aws lambda update-function-configuration \
    --function-name my-lambda-function \
    --dead-letter-config TargetArn=arn:aws:sqs:region:account-id:my-dlq

In [None]:
aws glue create-crawler \
    --name my-glue-crawler \
    --role arn:aws:iam::123456789012:role/glue-role \
    --database-name my-database \
    --targets '{"s3Targets": [{"path": "s3://your-s3-bucket/"}]}' \
    --table-prefix my_prefix_ \
    --schema-change-policy '{"UpdateBehavior": "UPDATE_IN_DATABASE", "DeleteBehavior": "DELETE_FROM_DATABASE"}'

In [None]:
aws glue start-crawler --name my-glue-crawler
aws glue get-crawler --name my-glue-crawler

In [None]:
REATE EXTERNAL SCHEMA my_extern_schema
FROM DATA CATALOG
DATABASE 'my-database'
IAM_ROLE 'arn:aws:iam::123456789012:role/redshift-role'
CREATE EXTERNAL DATABASE IF NOT EXISTS;

In [None]:
SELECT * FROM my_extern_schema.my_prefix_my_table
WHERE column_name = 'some_value';