In [5]:
# Below is an example of how to write a Python script to send data to your Kinesis stream.

In [None]:
from cryptography.fernet import Fernet # fernet is a symmetric encryption algorithm that uses the same key for both encryption and decryption
import boto3
import json
import time
import os

# Generate and print a key. This should be done once and securely stored.
# key = Fernet.generate_key()
# print(f"Save this key: {key.decode()}")

# Replace with your stored key
key = b'your-encryption-key-here'  # This should be a securely stored key
cipher_suite = Fernet(key)  # Create a cipher suite with the key which will be used to encrypt and decrypt data

# Initialize the Kinesis client
kinesis_client = boto3.client('kinesis', region_name='your-region')

# Stream name
stream_name = 'your-stream-name'

# Path to the folder containing .log files
folder_path = 'path/to/your/folder'

def get_data_from_file(file_path):
    """Read JSON data from a .log file."""
    with open(file_path, 'r') as file:
        return json.load(file) # load is used to convert a JSON string into a Python object 
                               # This is used to read the JSON data from the file and convert it into a Python object

def encrypt_data(data):
    """Encrypt JSON data using Fernet."""
    json_data = json.dumps(data).encode('utf-8')  # Convert data to JSON string and encode it to bytes for encryption
    encrypted_data = cipher_suite.encrypt(json_data)
    return encrypted_data

def send_to_kinesis(data):
    """Encrypt and send data to Kinesis."""
    encrypted_data = encrypt_data(data) # Encrypt the data before sending it to Kinesis
    response = kinesis_client.put_record(
        StreamName=stream_name,
        Data=encrypted_data,
        PartitionKey='partitionKey'
    )
    print(f"Sent encrypted data: {encrypted_data} to Kinesis with response: {response}")

def process_logs():
    """Process .log files in the specified folder."""
    for filename in os.listdir(folder_path): # Here we are listing all the files in the folder 
        if filename.endswith('.log'): # Check if the file is a .log file 
            file_path = os.path.join(folder_path, filename) # Get the full path of the file
            try:
                data = get_data_from_file(file_path)
                send_to_kinesis(data)
            except Exception as e:
                print(f"Error processing file {filename}: {e}")

# Simulate a continuous stream of data
while True:
    process_logs()
    time.sleep(60)  # Check the folder every minute, adjust as needed


In [None]:
# The Kinesis Agent configuration is typically stored in /etc/aws-kinesis/agent.json. 
# You need to edit this file to specify which log files to monitor and where to send the data.

In [None]:
{
    "cloudwatch.emitMetrics": true,
    "firehose.endpoint": "firehose.us-west-2.amazonaws.com",
    "kinesis.endpoint": "kinesis.us-west-2.amazonaws.com",
    "kinesis.streamName": "your-stream-name",
    "kinesis.roleArn": "arn:aws:iam::your-account-id:role/your-kinesis-role",
    "logs": [
        {
            "filePattern": "/path/to/your/folder/*.log",
            # logGroupName: The name of the CloudWatch Logs group (if used with CloudWatch).
            "logGroupName": "your-log-group", # The log group where the logs will be stored.
            # logStreamName: The name of the CloudWatch Logs stream (if used with CloudWatch).
            "logStreamName": "{instance_id}/your-log-stream"
        }
    ]
}



# sudo service amazon-kinesis-agent start ---> To start the Kinesis Agent


In [None]:
# configure Lambda to process the data from the Kinesis stream and store it in an S3 bucket. 

In [None]:
import json

def lambda_handler(event, context): # event is the input data that triggers the Lambda function.
                                    # context provides information about the invocation, function, and execution environment.
    # Iterate through each record in the batch
    for record in event['Records']:
        # Decode the record's data (base64 encoded)
        payload = json.loads(record['kinesis']['data'])
        
        # Process the payload (the actual processing logic will depend on your use case)
        print(f"Processing record: {payload}")
        
        # Example processing (e.g., extracting a field)
        # field = payload.get('field_name', 'default_value')
        # Perform additional processing as needed

    return {
        'statusCode': 200,
        'body': json.dumps('Processed records successfully.') # Return a response indicating successful processing
        # we are using json.dumps to convert the response into a JSON string before returning it
    }

In [None]:
# we can also map the Lambda function to the Kinesis stream so that it automatically processes the data as it arrives.

aws lambda create-event-source-mapping 
--function-name MyLambdaFunction 
--batch-size 100 
--event-source-arn arn:aws:kinesis:your-region:your-account-id:stream/your-stream-name --starting-position TRIM_HORIZON