# Examine the ELB logs from AWS

In [None]:
import pandas as pd
import glob
import re
import boto3
import io
from boto3 import session
from dateutil import parser

## Extract the data from S3

In [None]:
# get data file names
bucket_name = input('S3 bucket containing the ELB logs: ' )
log_location_prefix = input( 's3 key prefix locating the logs: ' )
log_date = parser.parse(input('date for logs: ') )
aws_region = 'eu-west-1'

# set up some aws api stuff
account = boto3.client('sts').get_caller_identity().get('Account')
session = boto3.session.Session(region_name=aws_region)
s3client = session.client('s3', config = boto3.session.Config())

#to load daily:
#s3_folder = f'{log_location_prefix}/AWSLogs/{account}/elasticloadbalancing/{aws_region}/{log_date:%Y}/{log_date:%m}/{log_date:%d}/'
#to load monthly:
s3_folder = f'{log_location_prefix}/AWSLogs/{account}/elasticloadbalancing/{aws_region}/{log_date:%Y}/{log_date:%m}/'

#TODO: might have to adapt these if looking at ALB or NLBs, these are for CLB
header=['time', 'elb', 'client', 'backend', 'request_processing_time', 'backend_processing_time', 'response_processing_time', 'elb_status_code', 'backend_status_code', 'received_bytes', 'sent_bytes', 'request', 'user_agent', 'ssl_cipher', 'ssl_protocol']
dtypes={'time':'str', 'elb':'str', 'client':'str', 'backend':'str', 'request_processing_time':'float', 'backend_processing_time':'float', 'response_processing_time':'float', 'elb_status_code':'Int64', 'backend_status_code':'Int64', 'received_bytes':'int', 'sent_bytes':'int', 'request':'str', 'user_agent':'str', 'ssl_cipher':'str', 'ssl_protocol':'str'}
parse_dates = ['time']
li = []

print(f'loading all *.log objects in {bucket_name}/{s3_folder}')

paginator = s3client.get_paginator('list_objects_v2')
result = paginator.paginate(Bucket=bucket_name, Prefix=s3_folder)
for page in result:
    if "Contents" in page:
        for key in page[ "Contents" ]:
            keyString = key[ "Key" ]
            response = s3client.get_object(Bucket=bucket_name, Key=key["Key"])
            try:
              df=pd.read_csv(io.BytesIO(response['Body'].read()), sep=' ', index_col=None, names=header, dtype=dtypes, parse_dates=['time'], encoding='utf8', na_values=['-'])
              df['loadbalancer_node'] = re.search('(?:_)(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})(?:_)', key["Key"]).group(1)
            except:
              print(f'failed loading loading {keyString}')
              raise
            li.append(df)

frame = pd.concat(li, axis=0, ignore_index=True)

print(f'total lines loaded {len(frame.index)}')
frame = frame.set_index('time')

frame.info()
frame.sample(5)

In [None]:
# Extract the HTTP URL and the HTTP Verb from the request string

# some useful examples:
# using: movie_title = 'jaws 2 (1986)'
#df['titles'] = df['movie_title'].str.extract('([a-zA-Z ]+)', expand=False).str.strip()
#df['titles1'] = df['movie_title'].str.split('(', 1).str[0].str.strip()
#df['titles2'] = df['movie_title'].str.replace(r'\([^)]*\)', '').str.strip()

frame['verb'] = frame['request'].str.extract(r'([A-Z]*)\s', expand=False).str.strip()
frame['url'] = frame['request'].str.extract(r'[A-Z]*\s(.*)\s', expand=False).str.strip()
frame['protocol'] = frame['request'].str.extract(r'\s.*\s(.*)$', expand=False).str.strip()
frame['route'] = frame['url'].str.extract('.*\:\/\/.*\:[0-9]*\/(.*)', expand=False).str.strip()

print('completed extracting other data')

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
# Use seaborn style defaults and set the default figure size
sns.set(rc={'figure.figsize':(11, 4)})

## Examine the number of requests and any error codes

In [None]:
bucket_frequency = '5S'

all_requests = frame.resample(bucket_frequency).elb.value_counts(sort=False).reset_index(name='request_count').set_index('time')
fig, ax = plt.subplots()
ax.plot(all_requests['request_count'], linestyle='', marker='.', linewidth=0.5, label='Requests')
ax.plot(frame[frame.elb_status_code.eq(503)].resample(bucket_frequency).elb.value_counts(sort=False).reset_index(name='failures').set_index('time')['failures'], linestyle='', marker='.', linewidth=0.5, label='503s')

ax.legend()

all_statuses = frame.groupby('elb_status_code').elb.value_counts(sort=False).reset_index(name='status_count').set_index('elb_status_code').drop(columns=['elb'])
all_statuses.head(10)


## look for failed POST requests

In [None]:
# looking at posts 
posts = frame[frame.verb.eq('POST')] \
    .groupby(['route', 'elb_status_code']) \
    .elb.value_counts(sort=False) \
    .reset_index(name='number_of_requests') \
    .set_index(['route', 'elb_status_code'])

posts.head(10)

In [None]:
print (f'first occurance at {frame[frame.elb_status_code.eq(503)].first_valid_index()}, last at {frame[frame.elb_status_code.eq(503)].last_valid_index()}')


## look at the routes that failed

In [None]:
routes = frame[frame.elb_status_code.eq(503)] \
    .groupby(['loadbalancer_node', 'route']) \
    .elb.value_counts(sort=False) \
    .reset_index(name='number_of_requests') \
    .set_index(['route']).head()

routes.head(10)

## average response for a specific route

In [None]:
import numpy as np

specific_route = input( 'what routes are you interested in: ' )

frame[frame.route.eq(specific_route)] \
    .groupby(['route','loadbalancer_node', 'elb_status_code']) \
    .agg(
        mean_backend_response=('backend_processing_time', np.mean),
        min_backend_response=('backend_processing_time', np.min),
        max_backend_response=('backend_processing_time', np.max),
        std_dev_backend_response=('backend_processing_time', np.std)) \
    .head()

In [None]:
frame[frame.route.eq(specific_route) & frame.elb_status_code.eq(200)] \
    .groupby(['route', 'elb_status_code']) \
    .agg(
        mean_backend_response=('backend_processing_time', np.mean),
        min_backend_response=('backend_processing_time', np.min),
        max_backend_response=('backend_processing_time', np.max),
        std_dev_backend_response=('backend_processing_time', np.std)) \
    .head()

In [None]:
bins = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
frame[frame.route.eq(specific_route)].backend_processing_time.value_counts(bins=bins, sort=False) 

In [None]:
frame[frame.route.eq(specific_route) & frame.elb_status_code.eq(200)] \
    .groupby(['route']) \
    .backend_processing_time.plot(kind='kde')

## Examine any failures and when they occur

Need to set the start and end dates by looking at the data above

In [None]:
failure_period = frame.sort_index().loc['2022-02-17 18:42:00+00:00':'2022-02-17 19:43:00+00:00']
requests = failure_period.resample('1S').elb.value_counts(sort=False).reset_index(name='request_count').set_index('time')
status_503 = failure_period[failure_period.elb_status_code.eq(503)].resample('1S').elb.value_counts(sort=False).reset_index(name='failure_count').set_index('time')

fig, ax = plt.subplots()

ax.plot(requests['request_count'], linestyle='', marker='.', linewidth=0.5, label='Requests')

ax.plot(status_503['failure_count'], linestyle='', marker='.', label='503 errors')
ax.legend()

print(f'total failures: {status_503.failure_count.sum()}')
