# Autoscale Events

I am downloading *Current GWs and SWGs-data* and *Scale Events* from the Grafana AutoScaling Dashboard

The *Scale Events* is the only file used in this script

In [None]:
import warnings
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
import re
from datetime import timedelta

In [None]:
warnings.filterwarnings("ignore")
%matplotlib inline

In [None]:
day_of_the_week = {0:"Monday", 1:"Tuesday", 2:"Wednesday", 3:"Thursday", 4:"Friday", 5:"Saturday", 6:"Sunday"}
pa_regions = {200:"us-west-2", 201:"us-west-201", 210:"ap-south-1", 212:"eu-west-2", 213:"eu-central-1", 214:"us-east-1", 407:"us-south"}

In [None]:
def get_vmid(astring):
    """
    Return the digits in ASTRING
    """
    try:
        vmid = re.findall(r'\d+', astring)
        if len(vmid) > 0:
            return vmid[0].strip()
    except TypeError:
        return None

In [None]:
def get_reason(astring):
    '''
    Return characters in ASTRING after Issue description
    '''
    try:
        reason = re.findall(r'metrics\s+(\w+)', astring)
        if len(reason) > 0:
            return reason[0].strip()
    except TypeError:
        return None

In [None]:
def select_created_deleted(astring):
    """
    Return 2 if the ASTRING is a created event or 1 if it is a deleted
    If it is not either created or deleted, returns 0
    """
    if 'created' in astring:
        return 2
    if 'deleted' in astring:
        return 1
    else:
        return 0

In [None]:
# The only file needed for the script is the Scale Events from Grafana
sca_ev = pd.read_csv(
    "Scale Events (Last 1000)-data-2024-03-05 21_57_29.csv",
    usecols=['create_time', 'scale_type', 'region', 'trigger_reason', 'comments', 'workflow_id'],
    parse_dates=['create_time'])

In [None]:
# classify the type of actions on instances between create, delete or other
sca_ev['action'] = sca_ev['comments'].apply(select_created_deleted)

In [None]:
# select only columns of interest
sca_ev = sca_ev[['create_time', 'region', 'comments', 'action', 'trigger_reason']]

In [None]:
sca_ev = sca_ev.sort_values(by=["create_time"])

In [None]:
sca_ev

In [None]:
# I am not resetting the index on the time series but rather creating additional columns for date and time
sca_ev['date'] = sca_ev["create_time"].apply(lambda x: x.date())
sca_ev['time'] = sca_ev["create_time"].apply(lambda x: x.time())

In [None]:
# remove weird events that are not created or deleted
# extract vmid and trigger reason 
sca_ev = sca_ev.loc[(sca_ev["action"] != 0),:]
sca_ev['vmid'] = sca_ev['comments'].apply(get_vmid)
sca_ev['reason'] = sca_ev['trigger_reason'].apply(get_reason)
sca_ev

In [None]:
# vmid are created every time a GW is spun off and sorting by that number helps id events
sca_ev = sca_ev.sort_values(by=['vmid']).reset_index(drop=True)

In [None]:
# create a new data frame with the duration, vmid, reason and date for each completed autoscale
i = 0
autoscale = pd.DataFrame(columns=["duration", "vmid", "region", "date", "reason"])

while i < sca_ev.shape[0] - 1:
    vmid = sca_ev.iloc[i, 7]
    if sca_ev.iloc[i + 1, 7] == vmid:
        if sca_ev.iloc[i,3] == 2:
            autoscale.loc[i] = (sca_ev.iloc[i+1,0] - sca_ev.iloc[i,0], sca_ev.iloc[i,7], 
                                sca_ev.iloc[i,1], sca_ev.iloc[i,5], sca_ev.iloc[i,8])
        else:
            autoscale.loc[i] = (sca_ev.iloc[i,0] - sca_ev.iloc[i+1,0], sca_ev.iloc[i,7], 
                                sca_ev.iloc[i,1], sca_ev.iloc[i,5], sca_ev.iloc[i,8])
        i += 2    
    else:
        i += 1

In [None]:
# sorting the new dataframe by date
autoscale = autoscale.sort_values(by=["date"], ascending=False).reset_index(drop=True)
autoscale["year"] = autoscale["date"].apply(lambda x: x.year)
autoscale["month"] = autoscale["date"].apply(lambda x: x.month)
autoscale["day"] = autoscale["date"].apply(lambda x: x.day)
autoscale["weekday"] = autoscale["date"].apply(lambda x: x.weekday())
autoscale["autohours"] = autoscale["duration"].apply(lambda x: x.total_seconds() // 3600)

In [None]:
# filtering by events from 2024
autoscale2024 = autoscale.loc[(autoscale["year"] >= 2024),:]
autoscale2024.replace({'region': pa_regions}, inplace=True)
autoscale2024.replace({'weekday': day_of_the_week}, inplace=True)
autoscale2024

In [None]:
# summarizing the events
autoscale2024.groupby(by=["region"]).agg(
    count = pd.NamedAgg("region", aggfunc="count"))

In [None]:
autoscale2024["region"].unique()

In [None]:
fig, ax = plt.subplots(figsize=(16, 5))
ax.plot(autoscale2024.loc[(autoscale2024["region"] == "eu-west-2"), :]["date"], 
        autoscale2024.loc[(autoscale2024["region"] == "eu-west-2"), :]["autohours"], 'bo', label="eu-west-2")
ax.plot(autoscale2024.loc[(autoscale2024["region"] == "ap-south-1"), :]["date"], 
        autoscale2024.loc[(autoscale2024["region"] == "ap-south-1"), :]["autohours"], 'go', label="ap-south-1")

ax.legend()
ax.grid(ls='--')

fontdict_title = {'fontsize': 'x-large',
 'fontweight': 'regular',
 'color': 'salmon',
 'verticalalignment': 'baseline',
 'horizontalalignment': 'center'}

fontdict_x = {'fontsize': 'large',
 'fontweight': 'regular',
 'color': 'navy',
 'verticalalignment': 'top',
 'horizontalalignment': 'center'}

fontdict_y = {'fontsize': 'large',
 'fontweight': 'regular',
 'color': 'navy',
 'verticalalignment': 'bottom',
 'horizontalalignment': 'center'}


ax.set_title("AutoScale Events", fontdict=fontdict_title)
ax.set_xlabel("Date", fontdict=fontdict_x)
ax.set_ylabel("Hours of Active AutoScale", fontdict=fontdict_y)


fig.savefig("autoscaleRegion.png")



In [None]:
# summarizing the events
autoscale2024.groupby(by=["reason"]).agg(
    count = pd.NamedAgg("reason", aggfunc="count"))

In [None]:
fig, ax = plt.subplots(figsize=(16, 5))
ax.plot(autoscale2024.loc[(autoscale2024["reason"] == "expected_dcpu"), :]["date"], 
        autoscale2024.loc[(autoscale2024["reason"] == "expected_dcpu"), :]["autohours"], 'bo', label="expected_dcpu")
ax.plot(autoscale2024.loc[(autoscale2024["reason"] == "user_count"), :]["date"], 
        autoscale2024.loc[(autoscale2024["reason"] == "user_count"), :]["autohours"], 'go', label="user_count")

ax.legend()
ax.grid(ls='--')

fontdict_title = {'fontsize': 'x-large',
 'fontweight': 'regular',
 'color': 'salmon',
 'verticalalignment': 'baseline',
 'horizontalalignment': 'center'}

fontdict_x = {'fontsize': 'large',
 'fontweight': 'regular',
 'color': 'navy',
 'verticalalignment': 'top',
 'horizontalalignment': 'center'}

fontdict_y = {'fontsize': 'large',
 'fontweight': 'regular',
 'color': 'navy',
 'verticalalignment': 'bottom',
 'horizontalalignment': 'center'}


ax.set_title("AutoScale Events", fontdict=fontdict_title)
ax.set_xlabel("Date", fontdict=fontdict_x)
ax.set_ylabel("Hours of Active AutoScale", fontdict=fontdict_y)


fig.savefig("autoscaleReason.png")



In [None]:
summary_reason = autoscale2024.groupby(by=["region", "reason"]).agg(
    count = pd.NamedAgg("reason", aggfunc="count"))

In [None]:
summary_reason

In [None]:
summary_reason.to_excel("summary_reason.xlsx")

In [None]:
autoscale2024["autohours"].mean()

In [None]:
fig, ax = plt.subplots(figsize=(16, 5))
x = autoscale2024['weekday']
y = autoscale2024['autohours']
ax.scatter(x, y)

ax.grid(ls='--')

fontdict_title = {'fontsize': 'x-large',
 'fontweight': 'regular',
 'color': 'salmon',
 'verticalalignment': 'baseline',
 'horizontalalignment': 'center'}

fontdict_x = {'fontsize': 'large',
 'fontweight': 'regular',
 'color': 'navy',
 'verticalalignment': 'top',
 'horizontalalignment': 'center'}

fontdict_y = {'fontsize': 'large',
 'fontweight': 'regular',
 'color': 'navy',
 'verticalalignment': 'bottom',
 'horizontalalignment': 'center'}


ax.set_title("AutoScale Events", fontdict=fontdict_title)
ax.set_xlabel("Day Of The Week", fontdict=fontdict_x)
ax.set_ylabel("Hours of Active AutoScale", fontdict=fontdict_y)


fig.savefig("autoscaleWeek.png")



In [None]:
summary_days = autoscale2024.groupby(by=["weekday"]).agg(
    count = pd.NamedAgg("weekday", aggfunc="count"))
summary_days

In [None]:
summary_days.to_excel("summary_days.xlsx")