# Executing AWS CloudWatch Queries

In [2]:
%pip install pandas

Note: you may need to restart the kernel to use updated packages.


In [3]:
import boto3
import subprocess
import json
import os
import time
import pandas as pd

## Step 1: Create session and client

In [35]:
def set_aws_credentials(profile, region_name='us-east-1'):
    result = subprocess.run(f"aws-vault exec {profile} --json", shell=True, capture_output=True)
    credentials = json.loads(result.stdout)

    # Create a session with the retrieved credentials
    session = boto3.session.Session(
        aws_access_key_id=credentials['AccessKeyId'],
        aws_secret_access_key=credentials['SecretAccessKey'],
        aws_session_token=credentials['SessionToken'],
        region_name=region_name        
    )

    return session

# Use the function with your profile to get a session
aws_session = set_aws_credentials('acl-production', 'ca-central-1')

# Create clients using the session
sts_client = aws_session.client('sts')
logs_client = aws_session.client('logs')

# Example usage of the clients
account_id = sts_client.get_caller_identity()["Account"]
print("Current AWS Account ID:", account_id)

Current AWS Account ID: 707785685172


In [12]:
def current_timestamp():
    return int(time.time() * 1000)

def hours_ago(hours):
    return int(time.time() - 60 * 60 * hours)

def days_ago(days):
    return int(time.time() - 60 * 60 * 24 * days)

In [22]:
# Define your query
def execute_query(log_group, query, start_time, end_time):
    response = logs_client.start_query(
        logGroupName=log_group,
        startTime=start_time,
        endTime=end_time,
        queryString=query,
    )
    return response['queryId']

def wait_for_query(query_id):
    done = False
    while not done:
        stats = logs_client.get_query_results(queryId=query_id)
        status = stats['status']
        if status == 'Complete':
            print("Query completed.")
            done = True
        elif status == 'Running':
            print("Query still running...")
        elif status == 'Scheduled':
            print("Query scheduled...")
        else:
            print(f"Query status: {status}")
        time.sleep(1)

def get_query_results(query_id):
    response = logs_client.get_query_results(queryId=query_id)
    return response['results'], int(response['statistics']['recordsScanned']), int(response['statistics']['recordsMatched'])


def cloudwatch_query(log_group, query, start_time=current_timestamp(), end_time=hours_ago(1)):
    query_id = execute_query(log_group, query, start_time, end_time)
    wait_for_query(query_id)
    response, records_scanned, records_matched = get_query_results(query_id)
    print (f'records_scanned: {records_scanned}')
    print (f'records_matched: {records_matched}')

    # Flatten the data
    data = []
    for entry in response:
        row = {item['field']: item['value'] for item in entry}
        data.append(row)
    return data

In [23]:
def parse_messages(data):
    # Parse the @message field in each entry
    for entry in data:
        message_json = entry["@message"]
        
        # Parse the JSON string in @message field
        try:
            message_data = json.loads(message_json)
        except:
            continue
        
        entry["@message"] = message_data


## Collect the request ids

In [37]:
def collect_request_ids(data):
    request_ids = []

    # Iterate over the entries and extract the request_id and x_request_id
    for entry in data:
        message = entry["@message"]
        x_request_id = message.get("x_request_id", None)
        x_nginx_id = message.get("x_nginx_id", None)

        if x_request_id or x_nginx_id:
            if x_request_id:
                request_ids.append(x_request_id)
            if x_nginx_id != x_request_id:
                request_ids.append(x_nginx_id)

    return request_ids


## Collect slow queries and corresponding request ids

In [25]:
log_group = 'projects-main'
#log_group =  'api_proxy-main'

query = "fields @timestamp, @message \
    | filter request_time > 5 \
    | sort @timestamp desc \
    | limit 100"

data = cloudwatch_query(log_group, query, start_time=days_ago(3))
parse_messages(data)
print (json.dumps(data, indent=4))

Query still running...
Query still running...
Query still running...
Query still running...
Query completed.
records_scanned: 4566476
records_matched: 837
[
    {
        "@timestamp": "2023-11-24 12:59:48.898",
        "@message": {
            "time_local": "24/Nov/2023:12:59:48 +0000",
            "client": "10.184.254.250",
            "ACL_Cloudfront": "",
            "method": "GET",
            "scheme": "http",
            "host": "fortunasilverminesinc.projects-ca.highbond.com",
            "request": "GET /task_tracker/request_items?page%5Bnumber%5D=1&filter%5Bstatus%5D=false&filter%5Bresponsible_name%5D=Yvette%20Dona%20Fortuna%20Anaky&filter%5Brequestor_id%5D=5366&sort%5Bcolumn%5D=request_items.due_date&sort%5Border%5D=asc HTTP/1.1",
            "request_length": "4368",
            "status": "200",
            "bytes_sent": "11757",
            "body_bytes_sent": "10276",
            "referrer": "https://fortunasilverminesinc.projects-ca.highbond.com/task_tracker/request_it

In [38]:
request_ids = collect_request_ids(data)

print (json.dumps(request_ids, indent=4))

TypeError: list indices must be integers or slices, not str

In [36]:
log_group = 'projects-main'
#log_group =  'api_proxy-main'

data = []

for request_id in request_ids:
    query = f"fields @timestamp, @message \
        | filter @message like /{request_id}/ \
        | sort @timestamp desc \
        | limit 100"

    data.append(cloudwatch_query(log_group, query, start_time=days_ago(3)))

parse_messages(data)
print (json.dumps(data, indent=4))

Query still running...
Query still running...
Query still running...
Query still running...
Query still running...
Query completed.
records_scanned: 4546153
records_matched: 0
Query still running...
Query still running...
Query still running...
Query still running...
Query still running...
Query completed.
records_scanned: 4545895
records_matched: 0
Query still running...
Query still running...
Query still running...
Query still running...
Query completed.


KeyboardInterrupt: 