In [1]:
import psycopg2
import psycopg2.extras as extras
import pandas as pd
import json
from datetime import datetime,timezone

from dotenv import dotenv_values

from google.cloud import bigquery
from google.cloud.exceptions import NotFound
from google.api_core.exceptions import BadRequest
from google.oauth2 import service_account

from dotenv import dotenv_values
# https://cloud.google.com/bigquery/docs/quickstarts/quickstart-client-libraries
# https://codelabs.developers.google.com/codelabs/cloud-bigquery-python#0

# https://cloud.google.com/iam/docs/service-accounts-create#iam-service-accounts-create-console
# https://cloud.google.com/python/docs/setup
# https://cloud.google.com/apis/docs/getting-started#enabling_apis

# https://cloud.google.com/bigquery/docs/reference/libraries
# https://cloud.google.com/python/docs/reference/bigquery/latest
# https://gcloud.readthedocs.io/en/latest/bigquery-client.html


In [2]:
# credentials = service_account.Credentials.from_service_account_file(r'C:\Windows\xxxxxx.json')
# print(credentials.project_id)

In [4]:
start_date_query='2024-01-01'

projectId='pongthorn'  # smart-data-ml
dataset_id='SMartDW'
table_id = f"{projectId}.{dataset_id}.incident"

env_path='.env'
config = dotenv_values(dotenv_path=env_path)



In [5]:
# client = bigquery.Client(credentials= credentials,project=projectId)
client = bigquery.Client(project=projectId)

In [9]:
# D:\PythonDev\MyQuantFinProject\SMart-AI\data\Incident_PostgresToBQ_Schema.txt
try:
    table=client.get_table(table_id)  # Make an API request.
    print("Table {} already exists.".format(table_id))
    print(table.schema)
except NotFound:
    raise Exception("Table {} is not found, please manually create table named incident including partition and clustering".format(table_id))

Table pongthorn.SMartDW.incident already exists.
[SchemaField('id', 'INTEGER', 'REQUIRED', None, None, (), None), SchemaField('incident_no', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('severity_id', 'INTEGER', 'REQUIRED', None, None, (), None), SchemaField('severity_name', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('sla', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('product_type', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('brand', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('model', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('incident_type', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('status', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('service_type', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('is_failure_type', 'INTEGER', 'REQUIRED', None, None, (), None), SchemaField('count_detail', 'INTEGER', 'REQUIRED', None, None, (), None), SchemaField('open_datetime', 'DAT

In [10]:
dt_imported=datetime.now(timezone.utc) # utc
#dt_imported=datetime.now() # utc
str_imported=dt_imported.strftime('%Y-%m-%d %H:%M:%S')
print(f"Imported DateTime: {str_imported}" )

Imported DateTime: 2024-07-12 14:48:05


In [11]:
sql_lastImport=f"SELECT max(imported_at) as last_imported from `{table_id}` where open_datetime>='{start_date_query}' "
job_lastImported=client.query(sql_lastImport)
str_lastImported=None
for row in job_lastImported:    
    if row.last_imported is not None: 
        str_lastImported=row.last_imported.strftime('%Y-%m-%d %H:%M:%S')
print(f"Last Imported DateTime: {str_lastImported}" )

if str_lastImported is not None:
  start_date_query=str_lastImported

print(f"Start Import on update_at of last imported date : {start_date_query}" )

Last Imported DateTime: None
Start Import on update_at of last imported date : 2024-01-01


In [12]:
def get_postgres_conn():
 try:
  conn = psycopg2.connect(
        database=config['DATABASES_NAME'], user=config['DATABASES_USER'],
      password=config['DATABASES_PASSWORD'], host=config['DATABASES_HOST']
     )
  return conn

 except Exception as error:
  print(error)      
  raise error
def list_data(sql,params,connection):
 df=None   
 with connection.cursor() as cursor:
    
    if params is None:
       cursor.execute(sql)
    else:
       cursor.execute(sql,params)
    
    columns = [col[0] for col in cursor.description]
    dataList = [dict(zip(columns, row)) for row in cursor.fetchall()]
    df = pd.DataFrame(data=dataList) 
 return df 

In [13]:
# check schema 
#,timezone('UTC', incident.updated_at) as updated_at_utc
# and incident.updated_at>=%(start_date_param)s
sql_incident=f"""

select
incident.id as id, incident.incident_no as incident_no,

severity.id as  severity_id,
severity.severity_name as  severity_name,

service_level.sla_name as sla,

product_type.productype_name as product_type,brand.brand_name as brand,

xtype.incident_type_name as incident_type,
status.incident_status_name as status,
service.service_type_name service_type,

model.model_name as model,CASE WHEN failure_type IS NULL THEN  0 ELSE 1 END AS is_failure_type,


(select count(*) from  app_incident_detail  as detail where  detail.incident_master_id=incident.id ) as count_detail


,TO_CHAR(incident.incident_datetime  AT TIME ZONE 'Asia/Bangkok','YYYY-MM-DD HH24:MI') as open_datetime
,TO_CHAR(incident.incident_close_datetime  AT TIME ZONE 'Asia/Bangkok','YYYY-MM-DD HH24:MI') as close_datetime

,TO_CHAR(incident.incident_problem_start  AT TIME ZONE 'Asia/Bangkok','YYYY-MM-DD HH24:MI') as response_datetime
,TO_CHAR(incident.incident_problem_end  AT TIME ZONE 'Asia/Bangkok','YYYY-MM-DD HH24:MI') as resolved_datetime

,company.company_name as company
,     (SELECT emp.employee_name
        FROM app_employee emp
        WHERE emp.id = incident.incident_owner_id) AS case_owner
,TO_CHAR(incident.updated_at,'YYYY-MM-DD HH24:MI:SS') as updated_at 

from app_incident as incident
inner join app_incident_type as  xtype on incident.incident_type_id = xtype.id
inner join  app_incident_status as status on incident.incident_status_id = status.id
inner join  app_incident_severity as severity on  incident.incident_severity_id = severity.id
inner join  app_service_type as service on incident.service_type_id= service.id

inner join app_inventory as inventory on incident.inventory_id = inventory.id

inner join app_brand as brand on inventory.brand_id = brand.id
inner join app_model as model on inventory.model_id = model.id
inner join app_product_type as product_type on inventory.product_type_id = product_type.id
inner join app_sla as service_level on inventory.customer_sla_id = service_level.id

inner join app_project as project on inventory.project_id = project.id
inner join app_company as company on project.company_id = company.id

where incident.incident_status_id =4
and timezone('UTC', incident.updated_at)>='{start_date_query}'

order by incident.updated_at 

"""

# and timezone('UTC', incident.updated_at)>=%(start_date_param)s
print(sql_incident)



select
incident.id as id, incident.incident_no as incident_no,

severity.id as  severity_id,
severity.severity_name as  severity_name,

service_level.sla_name as sla,

product_type.productype_name as product_type,brand.brand_name as brand,

xtype.incident_type_name as incident_type,
status.incident_status_name as status,
service.service_type_name service_type,

model.model_name as model,CASE WHEN failure_type IS NULL THEN  0 ELSE 1 END AS is_failure_type,


(select count(*) from  app_incident_detail  as detail where  detail.incident_master_id=incident.id ) as count_detail


,TO_CHAR(incident.incident_datetime  AT TIME ZONE 'Asia/Bangkok','YYYY-MM-DD HH24:MI') as open_datetime
,TO_CHAR(incident.incident_close_datetime  AT TIME ZONE 'Asia/Bangkok','YYYY-MM-DD HH24:MI') as close_datetime

,TO_CHAR(incident.incident_problem_start  AT TIME ZONE 'Asia/Bangkok','YYYY-MM-DD HH24:MI') as response_datetime
,TO_CHAR(incident.incident_problem_end  AT TIME ZONE 'Asia/Bangkok','YYYY-MM-DD HH24:MI'

In [15]:
print("Create all issues dataframe")
#and timezone('UTC', incident.updated_at)>=%(start_date_param)s
# dict_params={"start_date_param":start_date_query}
# df_all=list_data(sql_incident,dict_params,get_postgres_conn())


df_all=list_data(sql_incident,None,get_postgres_conn())

if df_all.empty==True:
    print("no transsaction update")
    exit()
else:
    print("import data to bigquery")
print(df_all.info())
df_all.tail()

Create all issues dataframe
import data to bigquery
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1217 entries, 0 to 1216
Data columns (total 20 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   id                 1217 non-null   int64 
 1   incident_no        1217 non-null   object
 2   severity_id        1217 non-null   int64 
 3   severity_name      1217 non-null   object
 4   sla                1217 non-null   object
 5   product_type       1217 non-null   object
 6   brand              1217 non-null   object
 7   incident_type      1217 non-null   object
 8   status             1217 non-null   object
 9   service_type       1217 non-null   object
 10  model              1217 non-null   object
 11  is_failure_type    1217 non-null   int64 
 12  count_detail       1217 non-null   int64 
 13  open_datetime      1217 non-null   object
 14  close_datetime     1217 non-null   object
 15  response_datetime  1217 non-null   ob

Unnamed: 0,id,incident_no,severity_id,severity_name,sla,product_type,brand,incident_type,status,service_type,model,is_failure_type,count_detail,open_datetime,close_datetime,response_datetime,resolved_datetime,company,case_owner,updated_at
1212,5177,SR-ES-24-5177,3,Minor,24x7 4Hrs Response Time,Software,Red Hat,General Incident,Closed,Incident,"OpenShift Container Platform, Premium",0,14,2024-05-13 15:04,2024-07-11 20:22,2024-05-13 15:04,2024-05-13 15:04,AIS (TAM Red Hat),Jitchanok Nakeiam,2024-07-12 09:19:01
1213,5515,SR-ES-24-5515,2,Major,24x7 4Hrs Resolution Time,Firewall,Palo Alto,General Incident,Closed,Incident,PA-5250,0,1,2024-07-11 06:52,2024-07-11 07:30,2024-07-11 06:55,2024-07-11 07:30,New-GFMIS,Titiphun Paisanphong,2024-07-12 16:14:59
1214,5508,SR-ES-24-5508,2,Major,24x7 6Hrs Resolution Time,Software,SAPB1,Software,Closed,Incident,SAPB1 Accounting and Finance Application,0,1,2024-07-10 09:00,2024-07-10 17:00,2024-07-10 09:00,2024-07-10 17:00,SRTET,Thanikaphat Lertdamrongwong,2024-07-12 16:23:46
1215,5507,SR-ES-24-5507,2,Major,24x7 6Hrs Resolution Time,Software,eUnite,Software,Closed,Incident,eUnite Human Resource Application,0,1,2024-07-09 09:00,2024-07-10 16:30,2024-07-09 09:04,2024-07-10 16:30,SRTET,Thanikaphat Lertdamrongwong,2024-07-12 16:52:59
1216,5463,SR-ES-24-5463,2,Major,24x7 6Hrs Resolution Time,Software,eUnite,Software,Closed,Incident,eUnite Human Resource Application,0,1,2024-07-05 09:24,2024-07-08 16:30,2024-07-05 09:30,2024-07-08 11:50,SRTET,Thanikaphat Lertdamrongwong,2024-07-12 17:06:17


In [16]:
# convert object to datetime
dateTimeCols=['open_datetime','response_datetime','resolved_datetime','close_datetime']
for col in dateTimeCols:
 df_all[col]=pd.to_datetime(df_all[col], format='%Y-%m-%d %H:%M',errors= 'coerce')

dateTimeCols2=['updated_at']
for col in dateTimeCols2:
 df_all[col]=pd.to_datetime(df_all[col], format='%Y-%m-%d %H:%M:%S',errors= 'coerce')

# UTC Import
df_all['imported_at']=dt_imported

In [17]:
df_all.dropna(inplace=True)

#df_all=df_all.head(10)
print(df_all.info())
df_all.head(10)

<class 'pandas.core.frame.DataFrame'>
Index: 1215 entries, 0 to 1216
Data columns (total 21 columns):
 #   Column             Non-Null Count  Dtype              
---  ------             --------------  -----              
 0   id                 1215 non-null   int64              
 1   incident_no        1215 non-null   object             
 2   severity_id        1215 non-null   int64              
 3   severity_name      1215 non-null   object             
 4   sla                1215 non-null   object             
 5   product_type       1215 non-null   object             
 6   brand              1215 non-null   object             
 7   incident_type      1215 non-null   object             
 8   status             1215 non-null   object             
 9   service_type       1215 non-null   object             
 10  model              1215 non-null   object             
 11  is_failure_type    1215 non-null   int64              
 12  count_detail       1215 non-null   int64             

Unnamed: 0,id,incident_no,severity_id,severity_name,sla,product_type,brand,incident_type,status,service_type,...,is_failure_type,count_detail,open_datetime,close_datetime,response_datetime,resolved_datetime,company,case_owner,updated_at,imported_at
0,4298,SR-ES-24-4298,2,Major,24x7 4Hrs Resolution Time,Firewall,Palo Alto,General Incident,Closed,Incident,...,0,1,2023-12-30 12:08:00,2023-12-30 13:00:00,2023-12-30 12:09:00,2023-12-30 13:00:00,New-GFMIS,Titiphun Paisanphong,2024-01-02 21:05:50,2024-07-12 14:48:05.894081+00:00
1,4299,SR-ES-24-4299,2,Major,24x7 4Hrs Resolution Time,Firewall,Palo Alto,General Incident,Closed,Incident,...,0,1,2023-12-30 18:20:00,2023-12-30 19:00:00,2023-12-30 18:20:00,2023-12-30 19:00:00,New-GFMIS,Titiphun Paisanphong,2024-01-02 21:09:53,2024-07-12 14:48:05.894081+00:00
2,4302,SR-ES-24-4302,3,Minor,24x7 4Hrs Response Time,Storage,NetApp,Network Adapter Failure,Closed,Incident,...,0,3,2023-12-27 09:55:00,2024-01-03 02:00:00,2023-12-27 10:00:00,2024-01-03 02:00:00,BBL,Jessadang Panjakhan,2024-01-03 13:56:39,2024-07-12 14:48:05.894081+00:00
3,4303,SR-ES-24-4303,4,Cosmetic,24x7 4Hrs Response Time,Software,Red Hat,General Incident,Closed,Request,...,0,1,2024-01-03 09:00:00,2024-01-03 12:00:00,2024-01-03 09:00:00,2024-01-03 09:00:00,AIS (TAM Red Hat),Rudrawee Kachangsri,2024-01-03 16:56:07,2024-07-12 14:48:05.894081+00:00
4,4304,SR-ES-24-4304,2,Major,24x7 4Hrs Resolution Time,Firewall,Palo Alto,General Incident,Closed,Incident,...,0,1,2024-01-03 09:44:00,2024-01-03 10:20:00,2024-01-03 09:44:00,2024-01-03 10:20:00,New-GFMIS,Titiphun Paisanphong,2024-01-04 21:06:17,2024-07-12 14:48:05.894081+00:00
5,4204,SR-ES-23-4204,4,Cosmetic,24x7 4Hrs Response Time,Software,Red Hat,General Incident,Closed,Request,...,0,5,2023-11-30 15:17:00,2024-01-04 23:35:00,2023-11-30 15:17:00,2024-01-04 22:00:00,AIS (TAM Red Hat),Jitchanok Nakeiam,2024-01-05 09:34:37,2024-07-12 14:48:05.894081+00:00
6,4197,SR-ES-23-4197,2,Major,24x7 4Hrs Response Time,Software,Red Hat,General Incident,Closed,Incident,...,0,7,2023-12-12 10:06:00,2024-01-01 15:00:00,2023-12-12 10:06:00,2023-12-13 11:00:00,AIS (TAM Red Hat),Jitchanok Nakeiam,2024-01-05 16:35:45,2024-07-12 14:48:05.894081+00:00
7,4307,SR-ES-24-4307,4,Cosmetic,24x7 4Hrs Response Time,Software,Red Hat,General Incident,Closed,Request,...,0,1,2024-01-05 09:00:00,2024-01-05 11:30:00,2024-01-05 09:00:00,2024-01-05 09:00:00,AIS (TAM Red Hat),Rudrawee Kachangsri,2024-01-05 16:36:22,2024-07-12 14:48:05.894081+00:00
8,4308,SR-ES-24-4308,4,Cosmetic,24x7 4Hrs Response Time,Software,Red Hat,General Incident,Closed,Request,...,0,1,2024-01-02 09:00:00,2024-01-02 11:30:00,2024-01-02 09:00:00,2024-01-02 09:00:00,AIS (TAM Red Hat),Rudrawee Kachangsri,2024-01-05 16:38:51,2024-07-12 14:48:05.894081+00:00
9,4309,SR-ES-24-4309,4,Cosmetic,24x7 4Hrs Response Time,Software,Red Hat,General Incident,Closed,Request,...,0,1,2024-01-04 09:00:00,2024-01-04 12:30:00,2024-01-04 09:00:00,2024-01-04 09:00:00,AIS (TAM Red Hat),Rudrawee Kachangsri,2024-01-05 16:46:27,2024-07-12 14:48:05.894081+00:00


In [18]:
def insertDataFrameToBQ(df_trasns):
    try:
        job_config = bigquery.LoadJobConfig(
            write_disposition="WRITE_APPEND",
        )

        job = client.load_table_from_dataframe(
            df_trasns, table_id, job_config=job_config
        )
        job.result()  # Wait for the job to complete.
        print("Total ", len(df_trasns), "Imported closed incident to bigquery successfully")

    except BadRequest as e:
        print("Bigquery Error\n")
        print(e) 

try:
    insertDataFrameToBQ(df_all)
except Exception as ex:
    raise ex




Total  1215 Imported closed incident to bigquery successfully


In [19]:
importDate=dt_imported.strftime("%Y-%m-%d")

def load_data_bq(sql:str):
 query_result=client.query(sql)
 dfx=query_result.to_dataframe()
 return dfx
sql_query=f""" SELECT * FROM `{table_id}` WHERE imported_at >= '{importDate}' LIMIT 10 """
df=load_data_bq(sql_query)
df.head()

Unnamed: 0,id,incident_no,severity_id,severity_name,sla,product_type,brand,model,incident_type,status,...,is_failure_type,count_detail,open_datetime,close_datetime,response_datetime,resolved_datetime,company,case_owner,updated_at,imported_at
0,4752,SR-ES-24-4752,4,Cosmetic,24x7 4Hrs Response Time,Software,Red Hat,"OpenShift Container Platform, Premium",General Incident,Closed,...,0,0,2023-03-31 09:47:00,2023-03-31 17:52:00,2023-03-31 10:51:00,2023-03-31 16:51:00,AIS (TAM Red Hat),Anuwat Charoensuk,2024-03-18 02:52:44,2024-07-12 14:48:05.894081
1,4753,SR-ES-24-4753,4,Cosmetic,24x7 4Hrs Response Time,Software,Red Hat,"OpenShift Container Platform, Premium",General Incident,Closed,...,0,0,2023-03-27 09:54:00,2023-03-27 16:55:00,2023-03-27 09:55:00,2023-03-27 15:55:00,AIS (TAM Red Hat),Anuwat Charoensuk,2024-03-18 03:00:42,2024-07-12 14:48:05.894081
2,4754,SR-ES-24-4754,4,Cosmetic,24x7 4Hrs Response Time,Software,Red Hat,"OpenShift Container Platform, Premium",General Incident,Closed,...,0,0,2023-03-20 09:04:00,2023-03-20 17:06:00,2023-03-20 09:05:00,2023-03-20 16:05:00,AIS (TAM Red Hat),Wannaporn Saw-Khow,2024-03-18 03:07:55,2024-07-12 14:48:05.894081
3,4750,SR-ES-24-4750,4,Cosmetic,24x7 4Hrs Response Time,Software,Red Hat,"OpenShift Container Platform, Premium",General Incident,Closed,...,0,0,2023-03-17 09:27:00,2023-03-17 18:33:00,2023-03-17 09:28:00,2023-03-17 17:28:00,AIS (TAM Red Hat),Wannaporn Saw-Khow,2024-03-18 02:33:55,2024-07-12 14:48:05.894081
4,3980,SR-ES-23-3980,1,Critical,24x7 4Hrs Response Time,Software,Red Hat,"OpenShift Container Platform, Premium",General Incident,Closed,...,0,1,2023-03-14 16:00:00,2023-06-15 12:00:00,2023-06-14 15:00:00,2023-06-14 15:00:00,AIS (TAM Red Hat),Jitchanok Nakeiam,2024-03-19 11:34:14,2024-07-12 14:48:05.894081
