In [7]:
import psycopg2
import psycopg2.extras as extras
import pandas as pd
import numpy as np

#https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/datasets/get_started_bq_datasets.ipynb
#http://localhost:8888/lab/tree/Yit/SmartExcelReport/Incident_Json_BQ.ipynb


In [8]:
def get_postgres_conn():
 try:
  conn = psycopg2.connect(
         database='SMartDB', user='postgres',
      password='P@ssw0rd', host='localhost', 
     )
  return conn

 except Exception as error:
  print(error)      
  raise error
    
def list_data(sql,params,connection):
 df=None   
 with connection.cursor() as cursor:
    
    if params is None:
       cursor.execute(sql)
    else:
       cursor.execute(sql,params) 
    
#     print(sql)
#     print(params)
    
    columns = [col[0] for col in cursor.description]
    dataList = [dict(zip(columns, row)) for row in cursor.fetchall()]
    df = pd.DataFrame(data=dataList) 
 return df 

In [9]:
start_date_query='2020-01-01'
end_date_query='2023-12-31'

sql_all="""
select
severity.severity_name as  severity_label,service_level.sla_name as sla,
incident.id as id, incident.incident_no as incident_no,
product_type.productype_name as product_type,brand.brand_name as brand,model.model_name as mode,

TO_CHAR(inventory.customer_warranty_start  AT TIME ZONE 'Asia/Bangkok','YYYY-MM-DD HH24:MI') as customer_warranty_start,
TO_CHAR(inventory.customer_warranty_end AT TIME ZONE 'Asia/Bangkok','YYYY-MM-DD HH24:MI') as customer_warranty_end,


xtype.incident_type_name as incident_type,status.incident_status_name as status,
service.service_type_name service_type,
CASE WHEN failure_type IS NULL
            THEN  'false'
            ELSE 'true'
END AS is_failure_type,
(select count(*) from  app_incident_detail  as detail where  detail.incident_master_id=incident.id ) as count_detail



,TO_CHAR(incident.incident_datetime  AT TIME ZONE 'Asia/Bangkok','YYYY-MM-DD HH24:MI') as open_datetime
,TO_CHAR(incident.incident_close_datetime  AT TIME ZONE 'Asia/Bangkok','YYYY-MM-DD HH24:MI') as close_datetime

,TO_CHAR(incident.incident_problem_start  AT TIME ZONE 'Asia/Bangkok','YYYY-MM-DD HH24:MI') as response_datetime
,TO_CHAR(incident.incident_problem_end  AT TIME ZONE 'Asia/Bangkok','YYYY-MM-DD HH24:MI') as resolved_datetime



from app_incident as incident
inner join app_incident_type as  xtype on incident.incident_type_id = xtype.id
inner join  app_incident_status as status on incident.incident_status_id = status.id
inner join  app_incident_severity as severity on  incident.incident_severity_id = severity.id
inner join  app_service_type as service on incident.service_type_id= service.id

inner join app_inventory as inventory on incident.inventory_id = inventory.id

inner join app_brand as brand on inventory.brand_id = brand.id
inner join app_model as model on inventory.model_id = model.id
inner join app_product_type as product_type on inventory.product_type_id = product_type.id
inner join app_sla as service_level on inventory.customer_sla_id = service_level.id

and incident.incident_datetime>=%(start_date_param)s
and  incident.incident_datetime<=%(end_date_param)s
and incident.incident_status_id =4

order by incident.incident_datetime desc


"""

In [10]:
print("Create all issues dataframe")

dict_params={"start_date_param":start_date_query,"end_date_param":end_date_query}

df_all=list_data(sql_all,dict_params,get_postgres_conn())


Create all issues dataframe


In [11]:
dateTimeCols=['open_datetime','response_datetime','resolved_datetime','close_datetime','customer_warranty_start','customer_warranty_end']
for col in dateTimeCols:
 df_all[col]=pd.to_datetime(df_all[col], format='%Y-%m-%d %H:%M', errors='coerce')

start_end_list=[
    ['open_datetime','close_datetime'],['open_datetime','response_datetime'],
    ['response_datetime','resolved_datetime'],['resolved_datetime','close_datetime']
]
for item  in  start_end_list:
   diff_str=f"{item[0]}_to_{item[1]}" 
   diff_str=diff_str.replace('_datetime','')
   print(diff_str)
   df_all[diff_str]=df_all[item[1]]-df_all[item[0]]
   df_all[f'{diff_str}_hour'] = df_all[diff_str].apply(lambda x:  x.total_seconds() / (60*60) if x is not np.nan else np.nan  )

open_to_close
open_to_response
response_to_resolved
resolved_to_close


In [12]:
df_all.dropna(inplace=True)
print(df_all.info())
df_all.head()
    

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2069 entries, 0 to 2095
Data columns (total 26 columns):
 #   Column                     Non-Null Count  Dtype          
---  ------                     --------------  -----          
 0   severity_label             2069 non-null   object         
 1   sla                        2069 non-null   object         
 2   id                         2069 non-null   int64          
 3   incident_no                2069 non-null   object         
 4   product_type               2069 non-null   object         
 5   brand                      2069 non-null   object         
 6   mode                       2069 non-null   object         
 7   customer_warranty_start    2069 non-null   datetime64[ns] 
 8   customer_warranty_end      2069 non-null   datetime64[ns] 
 9   incident_type              2069 non-null   object         
 10  status                     2069 non-null   object         
 11  service_type               2069 non-null   object       

Unnamed: 0,severity_label,sla,id,incident_no,product_type,brand,mode,customer_warranty_start,customer_warranty_end,incident_type,...,response_datetime,resolved_datetime,open_to_close,open_to_close_hour,open_to_response,open_to_response_hour,response_to_resolved,response_to_resolved_hour,resolved_to_close,resolved_to_close_hour
0,Major,24x7 4Hrs Response Time,2241,SR-ES-23-2241,Server,HPE,3PAR StoreServ 8200,2022-12-21,2025-12-21,Hard Disk Drive Failure,...,2023-02-01 07:00:00,2023-02-01 12:00:00,0 days 05:20:00,5.333333,0 days 00:20:00,0.333333,0 days 05:00:00,5.0,0 days 00:00:00,0.0
1,Major,8x5 4Hrs Response Time,2239,SR-ES-23-2239,Storage,EMC,VNX5300,2023-01-01,2023-12-31,Hard Disk Drive Failure,...,2023-01-31 11:19:00,2023-01-31 22:00:00,1 days 06:27:00,30.45,0 days 00:05:00,0.083333,0 days 10:41:00,10.683333,0 days 19:41:00,19.683333
2,Major,24x7 4Hrs Response Time,2226,SR-ES-23-2226,Server,HPE,Synergy 480 Gen10,2019-03-22,2024-03-22,Memory Failure,...,2023-01-29 11:30:00,2023-01-29 18:00:00,0 days 06:50:00,6.833333,0 days 00:20:00,0.333333,0 days 06:30:00,6.5,0 days 00:00:00,0.0
3,Minor,24x7 4Hrs Resolution Time,2234,SR-ES-23-2234,Server,HPE,ProLiant DL560 Gen10,2021-05-17,2026-05-27,General Incident,...,2023-01-28 20:17:00,2023-01-30 20:48:00,2 days 00:42:00,48.7,0 days 00:11:00,0.183333,2 days 00:31:00,48.516667,0 days 00:00:00,0.0
4,Major,24x7 4Hrs Response Time,2238,SR-ES-23-2238,Storage,NetApp,FAS8040,2022-01-01,2023-01-31,Hard Disk Drive Failure,...,2023-01-28 20:22:00,2023-01-28 20:34:00,0 days 22:39:00,22.65,0 days 01:01:00,1.016667,0 days 00:12:00,0.2,0 days 21:26:00,21.433333


In [12]:
df_all.to_csv("incident_data.csv",index=False)