## KPI Mock data creation and SQL Logic v5 #10 (10.1, 10.2)

### Create Mock Data

Daily Batch Feed 28
Service Now Vulnerability

Includes the following columns:
- ticket_id
- created_timestamp
- criticality
- closed_timestamps

In [1]:
# Mock data for Batch #28 - Request For Information

import pandas as pd
import numpy as np
from datetime import datetime


### Create Batch 28
# Set conditions
np.random.seed(0)
batch28_output_filename = 'servicenow_vulnerability.parquet'
num_samples = 2000

length_min_days = 1
length_max_days = 18
criticality_types_list = ['Critical','High','Medium','Low']

ticket_ids = ['TID' + f'{i:08}' for i in range(1, num_samples + 1)]

start_date = pd.to_datetime('2024-01-01')
end_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d')) 
timestamps = pd.to_datetime(np.random.randint(start_date.value, end_date.value, num_samples), unit='ns')

df = pd.DataFrame({
    'ticket_id': ticket_ids,
    'created_timestamp': timestamps
})

df['criticality'] = np.random.choice(criticality_types_list, size=len(df))

df['created_timestamp'] = pd.to_datetime(df['created_timestamp']).dt.round('s')

random_days = pd.to_timedelta(np.random.randint(length_min_days, length_max_days, size=len(df)), unit='D')
df['closed_timestamp'] = df['created_timestamp'] + random_days
df['closed_timestamp'] = pd.to_datetime(df['closed_timestamp']).dt.round('s')

df['created_timestamp'] = df['created_timestamp'].astype(str)
df['closed_timestamp'] = df['closed_timestamp'].astype(str)


df.to_parquet(batch28_output_filename, engine='pyarrow', index=False)

print(df.head())
print(f"Data saved to '{batch28_output_filename}'.")
print(df.dtypes)
print('------------------------')




     ticket_id    created_timestamp criticality     closed_timestamp
0  TID00000001  2024-03-14 18:57:55        High  2024-03-15 18:57:55
1  TID00000002  2024-02-17 21:22:34         Low  2024-02-19 21:22:34
2  TID00000003  2024-03-23 11:27:07        High  2024-04-09 11:27:07
3  TID00000004  2024-01-19 14:52:21      Medium  2024-01-20 14:52:21
4  TID00000005  2024-02-06 14:28:16    Critical  2024-02-07 14:28:16
Data saved to 'servicenow_vulnerability.parquet'.
ticket_id            object
created_timestamp    object
criticality          object
closed_timestamp     object
dtype: object
------------------------


### SQL Logic for KPI 10.1

10.1 Batch 28
for today-30 to today \
where criticality_rating = 'Critical' \
s1. count records \
s2. count records where closed_timestamp - created_timestamp <= 5 days \
s3. s2/s1*100

In [None]:
# SQL logic used in ETL_KPI_data_KPI10.1 in AWS Glue
# Note, some functions have been changed to accomdate the change from SQLite to Postgres

WITH current_date_id AS (
    SELECT myDimDate.pk_id
    FROM myDimDate
    WHERE myDimDate.date = CURRENT_DATE
)
,
current_kpi_id AS (
    SELECT pk_id
    FROM myDimKPI
    WHERE kpi_reference = '10.1'
)
,
selected_df AS (
    SELECT *
    ,EXTRACT(DAY FROM (closed_timestamp - created_timestamp)) AS days_to_process
    FROM (
        SELECT *
        FROM df
        WHERE criticality  = 'Critical'
        AND (created_timestamp BETWEEN (CURRENT_DATE-31) AND (CURRENT_DATE-1))
    )
)

SELECT (SELECT pk_id FROM current_date_id) AS fk_date_id
,(SELECT pk_id FROM current_kpi_id) AS fk_kpi_id
,COUNT(CASE WHEN days_to_process <= 5 THEN 1 END) * 100.0 / COUNT(*) AS Value
FROM selected_df

### SQL Logic for KPI 10.2

10.2 Batch 28
for today-30 to today \
where criticality_rating = 'High' \
s1. count records \
s2. count records where closed_timestamp - created_timestamp <= 14 days \
s3. s2/s1*100

In [None]:
# SQL logic used in ETL_KPI_data_KPI10.2 in AWS Glue
# Note, some functions have been changed to accomdate the change from SQLite to Postgres

WITH current_date_id AS (
    SELECT myDimDate.pk_id
    FROM myDimDate
    WHERE myDimDate.date = CURRENT_DATE
)
,
current_kpi_id AS (
    SELECT pk_id
    FROM myDimKPI
    WHERE kpi_reference = '10.2'
)
,
selected_df AS (
    SELECT *
    ,EXTRACT(DAY FROM (closed_timestamp - created_timestamp)) AS days_to_process
    FROM (
        SELECT *
        FROM df
        WHERE criticality  = 'High'
        AND (created_timestamp BETWEEN (CURRENT_DATE-31) AND (CURRENT_DATE-1))
    )
)

SELECT (SELECT pk_id FROM current_date_id) AS fk_date_id
,(SELECT pk_id FROM current_kpi_id) AS fk_kpi_id
,SUM(CASE WHEN days_to_process <= 14 THEN 1 ELSE 0 END) * 100.0 / COUNT(*) AS Value
FROM selected_df