## KPI Mock data creation for Batch 2 & 3

### Create Mock Data

Daily Batch Feed 2
Cherished Documents Received

Includes the following columns:
- cherish_doc_id
- cherish_doc_type
- customer_id	
- received_timestamp


In [8]:
# Mock data for Batch #2 - Cherished Documents Received

import pandas as pd
import numpy as np
from datetime import datetime

### Create Batch 2
# Set conditions
np.random.seed(0)
filepath = 'C:/Projects/NS&I/new_mockfiles/'
batch2_output_filename = filepath + 'sps_cherisheddocumentsreceived.parquet'
num_samples = 3000

# length_min_days = 1
# length_max_days = 18
cherish_doc_type_list = ['Income_Certificate','Death_Certificate','Marriage_Certificate','Passport','Birth_Certificate']

customer_id = ['CUST' + f'{i:08}' for i in range(1, num_samples + 1)]
cherish_doc_id = ['CDOC' + f'{i:03}' for i in range(1, num_samples + 1)]

start_date = pd.to_datetime('2023-09-01')
end_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d')) 
received_timestamp = pd.to_datetime(np.random.randint(start_date.value, end_date.value, num_samples,dtype='int64'), unit='ns')

df = pd.DataFrame({
    'customer_id': customer_id,
    'cherish_doc_id': cherish_doc_id,
    'received_timestamp': received_timestamp
})

df['cherish_doc_type'] = np.random.choice(cherish_doc_type_list, size=len(df))

df['received_timestamp'] = pd.to_datetime(df['received_timestamp']).dt.round('s')
df

# random_days = pd.to_timedelta(np.random.randint(length_min_days, length_max_days, size=len(df)), unit='D')
# df['closed_timestamp'] = df['created_timestamp'] + random_days
# df['closed_timestamp'] = pd.to_datetime(df['closed_timestamp']).dt.round('s')

# df['created_timestamp'] = df['created_timestamp'].astype(str)
# df['closed_timestamp'] = df['closed_timestamp'].astype(str)


df.to_parquet(batch2_output_filename, engine='pyarrow', index=False)

print(df.head())
print(f"Data saved to '{batch2_output_filename}'.")
print(df.dtypes)
print('------------------------')




    customer_id cherish_doc_id  received_timestamp      cherish_doc_type
0  CUST00000001        CDOC001 2023-11-13 18:57:55              Passport
1  CUST00000002        CDOC002 2023-09-19 14:52:21     Death_Certificate
2  CUST00000003        CDOC003 2024-02-12 20:30:53     Death_Certificate
3  CUST00000004        CDOC004 2024-01-13 07:09:57  Marriage_Certificate
4  CUST00000005        CDOC005 2024-01-30 11:04:39     Death_Certificate
Data saved to 'C:/Projects/NS&I/new_mockfiles/sps_cherisheddocumentsreceived.parquet'.
customer_id                   object
cherish_doc_id                object
received_timestamp    datetime64[ns]
cherish_doc_type              object
dtype: object
------------------------


In [13]:
df_check = pd.read_parquet(batch2_output_filename)
df_check.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3000 entries, 0 to 2999
Data columns (total 4 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   customer_id         3000 non-null   object        
 1   cherish_doc_id      3000 non-null   object        
 2   received_timestamp  3000 non-null   datetime64[ns]
 3   cherish_doc_type    3000 non-null   object        
dtypes: datetime64[ns](1), object(3)
memory usage: 93.9+ KB


### Create Mock Data

Daily Batch Feed 3
Cherished Documents Despatched

Includes the following columns:
- cherish_doc_id
- cherish_doc_type
- customer_id
- despatched_timestamp

In [40]:
# Mock data for Batch #3 - Cherished Documents Despatched

import pandas as pd
import numpy as np
from datetime import datetime

### Create Batch 3
# Set conditions
np.random.seed(0)
filepath = 'C:/Projects/NS&I/new_mockfiles/'
batch3_output_filename = filepath + 'sps_cherisheddocumentsdespatched.parquet'
num_samples = 3000

length_min_days = 1
length_max_days = 18
cherish_doc_type_list = ['Income_Certificate','Death_Certificate','Marriage_Certificate','Passport','Birth_Certificate']

customer_id = ['CUST' + f'{i:08}' for i in range(1, num_samples + 1)]
cherish_doc_id = ['CDOC' + f'{i:03}' for i in range(1, num_samples + 1)]

start_date = pd.to_datetime('2023-09-01')
end_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d')) 
despatched_timestamp = pd.to_datetime(np.random.randint(start_date.value, end_date.value, num_samples,dtype='int64'), unit='ns')

df = pd.DataFrame({
    'customer_id': customer_id,
    'cherish_doc_id': cherish_doc_id,
    'despatched_timestamp': despatched_timestamp 
})

df['cherish_doc_type'] = np.random.choice(cherish_doc_type_list, size=len(df))


df['despatched_timestamp'] = pd.to_datetime(df['despatched_timestamp']).dt.round('s') + pd.Timedelta(hours=np.random.randint(20,50))
df

# random_days = pd.to_timedelta(np.random.randint(length_min_days, length_max_days, size=len(df)), unit='D')
# df['closed_timestamp'] = df['created_timestamp'] + random_days
# df['closed_timestamp'] = pd.to_datetime(df['closed_timestamp']).dt.round('s')

# df['created_timestamp'] = df['created_timestamp'].astype(str)
# df['closed_timestamp'] = df['closed_timestamp'].astype(str)


df.to_parquet(batch3_output_filename, engine='pyarrow', index=False)

print(df.head())
print(f"Data saved to '{batch3_output_filename}'.")
print(df.dtypes)
print('------------------------')




    customer_id cherish_doc_id despatched_timestamp      cherish_doc_type
0  CUST00000001        CDOC001  2023-11-14 16:57:55              Passport
1  CUST00000002        CDOC002  2023-09-20 12:52:21     Death_Certificate
2  CUST00000003        CDOC003  2024-02-13 18:30:53     Death_Certificate
3  CUST00000004        CDOC004  2024-01-14 05:09:57  Marriage_Certificate
4  CUST00000005        CDOC005  2024-01-31 09:04:39     Death_Certificate
Data saved to 'C:/Projects/NS&I/new_mockfiles/sps_cherisheddocumentsdespatched.parquet'.
customer_id                     object
cherish_doc_id                  object
despatched_timestamp    datetime64[ns]
cherish_doc_type                object
dtype: object
------------------------


In [72]:
df_check3 = pd.read_parquet(batch3_output_filename)
df_check3['despatched_timestamp'] + pd.Timedelta(hours=np.random.randint(20,50))
#df_check3[df_check3['despatched_timestamp'] + pd.Timedelta(hours=10)]

KeyError: 'despatched_timestamp'

### Create Mock Data

Daily Batch Feed 5 Change Incidents

Includes the following columns:
- incident_id
- date_of_incident
- change_id
- incident_priority
- date_of_change

In [48]:
# Mock data for Batch #5 - Change Incidents

import pandas as pd
import numpy as np
from datetime import datetime

### Create Batch 5
# Set conditions
np.random.seed(0)
filepath = 'C:/Projects/NS&I/new_mockfiles/'
batch5_output_filename = filepath + 'servicenow_changeincidents.parquet'
num_samples = 3000

length_min_days = 1
length_max_days = 18
incident_priority_list = [1,2]

incident_id = ['INC' + f'{i:08}' for i in range(1, num_samples + 1)]
change_id = ['CHG' + f'{i:08}' for i in range(1, num_samples + 1)]

start_date = pd.to_datetime('2023-09-01')
end_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d')) 
date_of_incident = pd.to_datetime(np.random.randint(start_date.value, end_date.value, num_samples,dtype='int64'), unit='ns')
date_of_change = pd.to_datetime(np.random.randint(start_date.value, end_date.value, num_samples,dtype='int64'), unit='ns') + pd.Timedelta(hours=np.random.randint(12,24))

df = pd.DataFrame({
    'incident_id': incident_id,
    'change_id': change_id,
    'date_of_incident': date_of_incident,
    'date_of_change': date_of_change
})

df['incident_priority'] = np.random.choice(incident_priority_list, size=len(df))
df['date_of_incident'] = pd.to_datetime(df['date_of_incident']).dt.round('s')
df['date_of_change'] = pd.to_datetime(df['date_of_change']).dt.round('s')

df

df.to_parquet(batch5_output_filename, engine='pyarrow', index=False)

print(df.head())
print(f"Data saved to '{batch5_output_filename}'.")
print(df.dtypes)
print('------------------------')




   incident_id    change_id    date_of_incident      date_of_change  \
0  INC00000001  CHG00000001 2023-11-13 18:57:55 2024-02-21 01:13:29   
1  INC00000002  CHG00000002 2023-09-19 14:52:21 2024-04-10 00:15:31   
2  INC00000003  CHG00000003 2024-02-12 20:30:53 2023-12-04 08:18:53   
3  INC00000004  CHG00000004 2024-01-13 07:09:57 2024-02-12 14:29:51   
4  INC00000005  CHG00000005 2024-01-30 11:04:39 2023-11-24 23:31:37   

   incident_priority  
0                  2  
1                  2  
2                  1  
3                  1  
4                  1  
Data saved to 'C:/Projects/NS&I/new_mockfiles/servicenow_changeincidents.parquet'.
incident_id                  object
change_id                    object
date_of_incident     datetime64[ns]
date_of_change       datetime64[ns]
incident_priority             int32
dtype: object
------------------------


### Create Mock Data

Daily Batch Feed 6 Call Analytics

Includes the following columns:
- calls_disconnected
- created_timestamp
- calls_answered_in_60_seconds
- calls_received

In [51]:
# Mock data for Batch #6 - nicecxone_callsanalytics

import pandas as pd
import numpy as np
from datetime import datetime

### Create Batch 5
# Set conditions
np.random.seed(0)
filepath = 'C:/Projects/NS&I/new_mockfiles/'
batch6_output_filename = filepath + 'nicecxone_callsanalytics.parquet'
num_samples = 3000

start_date = pd.to_datetime('2023-09-01')
end_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d')) 
created_timestamp = pd.to_datetime(np.random.randint(start_date.value, end_date.value, num_samples,dtype='int64'), unit='ns')
calls_disconnected = np.random.randint(30, 50, num_samples,dtype='int64')
calls_answered_in_60_seconds = np.random.randint(350, 400, num_samples,dtype='int64')

df = pd.DataFrame({
    'calls_disconnected': calls_disconnected,
    'calls_answered_in_60_seconds': calls_answered_in_60_seconds,
    'created_timestamp': created_timestamp
})

df['created_timestamp'] = pd.to_datetime(df['created_timestamp']).dt.round('s')
df['calls_received'] = df['calls_disconnected'] + df['calls_answered_in_60_seconds']

df

df.to_parquet(batch6_output_filename, engine='pyarrow', index=False)

print(df.head())
print(f"Data saved to '{batch5_output_filename}'.")
print(df.dtypes)
print('------------------------')




   calls_disconnected  calls_answered_in_60_seconds   created_timestamp  \
0                  49                           380 2023-11-13 18:57:55   
1                  45                           383 2023-09-19 14:52:21   
2                  47                           376 2024-02-12 20:30:53   
3                  32                           388 2024-01-13 07:09:57   
4                  39                           391 2024-01-30 11:04:39   

   calls_received  
0             429  
1             428  
2             423  
3             420  
4             430  
Data saved to 'C:/Projects/NS&I/new_mockfiles/servicenow_changeincidents.parquet'.
calls_disconnected                       int64
calls_answered_in_60_seconds             int64
created_timestamp               datetime64[ns]
calls_received                           int64
dtype: object
------------------------


### Create Mock Data

Daily Batch Feed 7 sps_postoutrequests and 24 ppa_postoutbulkrequests

7 Includes the following columns:
- file_id
- post_out_id
- timestamp

24 Includes the following columns:
- post_out_type
- file_received_timestamp
- file_id

In [60]:
# Mock data for Batch #7 - Post Out Requests

# Required fields:
#   file_id
#   post_out_id
#   timestamp


import pandas as pd
import numpy as np
from datetime import datetime


### Create Batch 7
# Set conditions
np.random.seed(0)
filepath = 'C:/Projects/NS&I/new_mockfiles/'
batch7_output_filename = filepath + 'sps_postoutrequests.parquet'
num_samples = 3000

file_ids = ['FID' + f'{i:08}' for i in range(1, num_samples + 1)]
post_out_ids = ['POID' + f'{i:08}' for i in range(1, num_samples + 1)]

start_date = pd.to_datetime('2023-09-01')
end_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d')) 
timestamps = pd.to_datetime(np.random.randint(start_date.value, end_date.value, num_samples,dtype='int64'), unit='ns')

df = pd.DataFrame({
    'file_id': file_ids,
    'post_out_id': post_out_ids,
    'timestamp': timestamps
})

df['timestamp'] = pd.to_datetime(df['timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S'))
df.to_parquet(batch7_output_filename, index=False)

print(df.head())
print(f"Data saved to '{batch7_output_filename}'.")
print(df.dtypes)
print('------------------------')



### Create Batch 24
# Set conditions
batch24_output_filename = filepath + 'ppa_postoutbulkrequests.parquet'
post_out_types_list = ['Prize Warrant', 'daily_files', 'Scheduled Statements']

length_min_days = 1
length_max_days = 14


df['post_out_type'] = np.random.choice(post_out_types_list, size=len(df))

random_days = pd.to_timedelta(np.random.randint(length_min_days, length_max_days, size=len(df)), unit='D')
df['file_received_timestamp'] = df['timestamp'] - random_days
df['file_received_timestamp'] = pd.to_datetime(df['file_received_timestamp'])

df = df.drop(columns=['timestamp','post_out_id'])

df.to_parquet(batch24_output_filename, index=False)

print(df.head())
print(f"Data saved to '{batch24_output_filename}'.")
print(df.dtypes)


       file_id   post_out_id           timestamp
0  FID00000001  POID00000001 2023-11-13 18:57:54
1  FID00000002  POID00000002 2023-09-19 14:52:21
2  FID00000003  POID00000003 2024-02-12 20:30:52
3  FID00000004  POID00000004 2024-01-13 07:09:57
4  FID00000005  POID00000005 2024-01-30 11:04:39
Data saved to 'C:/Projects/NS&I/new_mockfiles/sps_postoutrequests.parquet'.
file_id                object
post_out_id            object
timestamp      datetime64[ns]
dtype: object
------------------------
       file_id         post_out_type file_received_timestamp
0  FID00000001           daily_files     2023-11-10 18:57:54
1  FID00000002           daily_files     2023-09-16 14:52:21
2  FID00000003           daily_files     2024-01-31 20:30:52
3  FID00000004  Scheduled Statements     2024-01-04 07:09:57
4  FID00000005  Scheduled Statements     2024-01-29 11:04:39
Data saved to 'C:/Projects/NS&I/new_mockfiles/ppa_postoutbulkrequests.parquet'.
file_id                            object
post_out_type

### Create Mock Data

Daily Batch Feed 8 servicenow_firstcontactresolution

Includes the following columns:
- crm_id
- created_timestamp
- status_timestamp
- status

In [58]:
# Mock data for Batch #8 - servicenow_firstcontactresolution

import pandas as pd
import numpy as np
from datetime import datetime

### Create Batch 8
# Set conditions
np.random.seed(0)
filepath = 'C:/Projects/NS&I/new_mockfiles/'
batch8_output_filename = filepath + 'servicenow_firstcontactresolution.parquet'
num_samples = 3000

status_list = ['Pending','Passed to case management','Completed']


crm_id = ['CUST' + f'{i:08}' for i in range(1, num_samples + 1)]

start_date = pd.to_datetime('2023-09-01')
end_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d')) 
created_timestamp = pd.to_datetime(np.random.randint(start_date.value, end_date.value, num_samples,dtype='int64'), unit='ns')

df = pd.DataFrame({
    'crm_id': crm_id,
    'created_timestamp': created_timestamp
})

df['created_timestamp'] = pd.to_datetime(df['created_timestamp']).dt.round('s')
df['status_timestamp'] = pd.to_datetime(df['created_timestamp']).dt.round('s') + pd.Timedelta(hours=np.random.randint(20,50))

df['status'] = np.random.choice(status_list, size=len(df))

df

df.to_parquet(batch8_output_filename, engine='pyarrow', index=False)

print(df.head())
print(f"Data saved to '{batch8_output_filename}'.")
print(df.dtypes)
print('------------------------')




         crm_id   created_timestamp    status_timestamp  \
0  CUST00000001 2023-11-13 18:57:55 2023-11-15 09:57:55   
1  CUST00000002 2023-09-19 14:52:21 2023-09-21 05:52:21   
2  CUST00000003 2024-02-12 20:30:53 2024-02-14 11:30:53   
3  CUST00000004 2024-01-13 07:09:57 2024-01-14 22:09:57   
4  CUST00000005 2024-01-30 11:04:39 2024-02-01 02:04:39   

                      status  
0  Passed to case management  
1  Passed to case management  
2  Passed to case management  
3                  Completed  
4                  Completed  
Data saved to 'C:/Projects/NS&I/new_mockfiles/servicenow_firstcontactresolution.parquet'.
crm_id                       object
created_timestamp    datetime64[ns]
status_timestamp     datetime64[ns]
status                       object
dtype: object
------------------------


### Create Mock Data

Daily Batch Feed 9 - sps_chequereceived,10 - sps_chequeprocessed,11 - sps_chequedestroyed 

9 Includes the following columns:
- Cheque_id	
- received timestamp	
- valid status	
- type

10 Includes the following columns:
- cheque_id	
- processed_timestamp

11 Includes the following columns:
- cheque_id	
- destroyed_timestamp		


In [59]:
# Mock data for Batch #9 - sps_chequereceived

import pandas as pd
import numpy as np
from datetime import datetime

### Create Batch 9
# Set conditions
np.random.seed(0)
filepath = 'C:/Projects/NS&I/new_mockfiles/'

batch9_output_filename = filepath + 'sps_chequereceived.parquet'
num_samples = 3000

valid_status_list = ['Valid','Invalid']
type_list = ['cheque','warrant']


start_date = pd.to_datetime('2023-09-01')
end_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d')) 
received_timestamp = pd.to_datetime(np.random.randint(start_date.value, end_date.value, num_samples,dtype='int64'), unit='ns')
cheque_id = np.random.randint(10000001, 10003000, num_samples)

df = pd.DataFrame({
    'cheque_id' : cheque_id,
    'received_timestamp': received_timestamp
})

df['valid_status'] = np.random.choice(valid_status_list, size=len(df))
df['type'] = np.random.choice(type_list, size=len(df))
df['received_timestamp'] = pd.to_datetime(df['received_timestamp']).dt.round('s')

df

df.to_parquet(batch9_output_filename, engine='pyarrow', index=False)

print(df.head())
print(f"Data saved to '{batch9_output_filename}'.")
print(df.dtypes)
print('------------------------')

### Create Batch 10
batch10_output_filename = filepath + 'sps_chequeprocessed.parquet'

df = pd.DataFrame({
    'cheque_id' : cheque_id,
    'processed_timestamp': received_timestamp
})

df['processed_timestamp'] = pd.to_datetime(df['processed_timestamp']).dt.round('s') + pd.Timedelta(hours=np.random.randint(5,24))

df

df.to_parquet(batch10_output_filename, engine='pyarrow', index=False)

print(df.head())
print(f"Data saved to '{batch10_output_filename}'.")
print(df.dtypes)
print('------------------------')


### Create Batch 11
batch11_output_filename = filepath + 'sps_chequedestoyed.parquet'

df = pd.DataFrame({
    'cheque_id' : cheque_id,
    'destroyed_timestamp': received_timestamp
})

df['destroyed_timestamp'] = pd.to_datetime(df['destroyed_timestamp']).dt.round('s') + pd.Timedelta(hours=np.random.randint(48,480))

df

df.to_parquet(batch11_output_filename, engine='pyarrow', index=False)

print(df.head())
print(f"Data saved to '{batch11_output_filename}'.")
print(df.dtypes)
print('------------------------')




   cheque_id  received_timestamp valid_status     type
0   10000052 2023-11-13 18:57:55      Invalid  warrant
1   10001914 2023-09-19 14:52:21      Invalid  warrant
2   10002622 2024-02-12 20:30:53      Invalid   cheque
3   10001938 2024-01-13 07:09:57        Valid   cheque
4   10001859 2024-01-30 11:04:39      Invalid  warrant
Data saved to 'C:/Projects/NS&I/new_mockfiles/sps_chequereceived.parquet'.
cheque_id                      int32
received_timestamp    datetime64[ns]
valid_status                  object
type                          object
dtype: object
------------------------
   cheque_id processed_timestamp
0   10000052 2023-11-14 15:57:55
1   10001914 2023-09-20 11:52:21
2   10002622 2024-02-13 17:30:53
3   10001938 2024-01-14 04:09:57
4   10001859 2024-01-31 08:04:39
Data saved to 'C:/Projects/NS&I/new_mockfiles/sps_chequeprocessed.parquet'.
cheque_id                       int32
processed_timestamp    datetime64[ns]
dtype: object
------------------------
   cheque_id destro

### Create Mock Data

Daily Batch Feed 12 - ssl_training,15 - ssl_training_schedule 

12 Includes the following columns:
- employee_id	
- Status	
- training_id	
- training_completion_date

15 Includes the following columns:
- training_id
- training_course
- schedule_date	
- training_due_date


In [62]:
# Mock data for Batch #12 - SSL Training

import pandas as pd
import numpy as np
from datetime import datetime

### Create Batch 12
# Set conditions
np.random.seed(0)
filepath = 'C:/Projects/NS&I/new_mockfiles/'
batch12_output_filename = filepath + 'ssl_training.parquet'
num_samples = 3000


status_list = ['Not Started','In-Progress','Complete']
training_course_list = ['']

employee_id = ['EMP' + f'{i:08}' for i in range(1, num_samples + 1)]
training_id = ['TRN' + f'{i:08}' for i in range(1, num_samples + 1)]

start_date = pd.to_datetime('2023-09-01')
end_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d')) 
training_completion_date = pd.to_datetime(np.random.randint(start_date.value, end_date.value, num_samples,dtype='int64'), unit='ns')

df = pd.DataFrame({
    'employee_id': employee_id,
    'training_id': training_id,
    'training_completion_date': training_completion_date
})

df['status'] = np.random.choice(status_list, size=len(df))
df['training_completion_date'] = pd.to_datetime(df['training_completion_date']).dt.round('s')

df

df.to_parquet(batch12_output_filename, engine='pyarrow', index=False)

print(df.head())
print(f"Data saved to '{batch12_output_filename}'.")
print(df.dtypes)
print('------------------------')


### Create Batch 15
# Set conditions
batch15_output_filename = filepath + 'ssl_trainingschedule.parquet'


training_course_list = ['Soft Skills','ITSM']

training_id = ['TRN' + f'{i:08}' for i in range(1, num_samples + 1)]

start_date = pd.to_datetime('2023-09-01')
end_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d')) 
schedule_date = pd.to_datetime(np.random.randint(start_date.value, end_date.value, num_samples,dtype='int64'), unit='ns')

df = pd.DataFrame({
    'training_id': training_id,
    'schedule_date': schedule_date
})

df['training_course'] = np.random.choice(training_course_list, size=len(df))
df['schedule_date'] = pd.to_datetime(df['schedule_date']).dt.round('s')
df['training_due_date'] = pd.to_datetime(df['schedule_date']) + pd.Timedelta(hours=720)


df

df.to_parquet(batch15_output_filename, engine='pyarrow', index=False)

print(df.head())
print(f"Data saved to '{batch15_output_filename}'.")
print(df.dtypes)
print('------------------------')



   employee_id  training_id training_completion_date       status
0  EMP00000001  TRN00000001      2023-11-13 18:57:55  In-Progress
1  EMP00000002  TRN00000002      2023-09-19 14:52:21  In-Progress
2  EMP00000003  TRN00000003      2024-02-12 20:30:53  In-Progress
3  EMP00000004  TRN00000004      2024-01-13 07:09:57     Complete
4  EMP00000005  TRN00000005      2024-01-30 11:04:39     Complete
Data saved to 'C:/Projects/NS&I/new_mockfiles/ssl_training.parquet'.
employee_id                         object
training_id                         object
training_completion_date    datetime64[ns]
status                              object
dtype: object
------------------------
   training_id       schedule_date training_course   training_due_date
0  TRN00000001 2023-12-19 18:45:42     Soft Skills 2024-01-18 18:45:42
1  TRN00000002 2024-03-20 20:42:16     Soft Skills 2024-04-19 20:42:16
2  TRN00000003 2024-03-07 08:07:03            ITSM 2024-04-06 08:07:03
3  TRN00000004 2023-11-20 20:32:44     S

### Create Mock Data

Daily Batch Feed 13 - ssl_employee_base,14 - ssl_employee_jl 

13 Includes the following columns:
- employee_id	
- account_join_date

14 Includes the following columns:
- employee_id	
- account_join_date


In [63]:
# Mock data for Batch #12 - SSL Training

import pandas as pd
import numpy as np
from datetime import datetime

### Create Batch 13
# Set conditions
np.random.seed(0)
filepath = 'C:/Projects/NS&I/new_mockfiles/'
batch13_output_filename = filepath + 'ssl_employee_base.parquet'
num_samples = 3000



employee_id = ['EMP' + f'{i:08}' for i in range(1, num_samples + 1)]

start_date = pd.to_datetime('2023-09-01')
end_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d')) 
account_join_date = pd.to_datetime(np.random.randint(start_date.value, end_date.value, num_samples,dtype='int64'), unit='ns')

df = pd.DataFrame({
    'employee_id': employee_id,
    'account_join_date': training_completion_date
})


df['account_join_date'] = pd.to_datetime(df['account_join_date']).dt.round('s')

df

df.to_parquet(batch13_output_filename, engine='pyarrow', index=False)

print(df.head())
print(f"Data saved to '{batch13_output_filename}'.")
print(df.dtypes)
print('------------------------')


### Create Batch 14
# Set conditions
batch14_output_filename = filepath + 'ssl_employee_jl.parquet'

employee_id = ['EMP' + f'{i:08}' for i in range(1, num_samples + 1)]

start_date = pd.to_datetime('2023-09-01')
end_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d')) 
account_join_date = pd.to_datetime(np.random.randint(start_date.value, end_date.value, num_samples,dtype='int64'), unit='ns')

df = pd.DataFrame({
    'employee_id': employee_id,
    'account_join_date': training_completion_date
})


df['account_join_date'] = pd.to_datetime(df['account_join_date']).dt.round('s')

df

df.to_parquet(batch14_output_filename, engine='pyarrow', index=False)

print(df.head())
print(f"Data saved to '{batch15_output_filename}'.")
print(df.dtypes)
print('------------------------')



   employee_id   account_join_date
0  EMP00000001 2023-11-13 18:57:55
1  EMP00000002 2023-09-19 14:52:21
2  EMP00000003 2024-02-12 20:30:53
3  EMP00000004 2024-01-13 07:09:57
4  EMP00000005 2024-01-30 11:04:39
Data saved to 'C:/Projects/NS&I/new_mockfiles/ssl_employee_base.parquet'.
employee_id                  object
account_join_date    datetime64[ns]
dtype: object
------------------------
   employee_id   account_join_date
0  EMP00000001 2023-11-13 18:57:55
1  EMP00000002 2023-09-19 14:52:21
2  EMP00000003 2024-02-12 20:30:53
3  EMP00000004 2024-01-13 07:09:57
4  EMP00000005 2024-01-30 11:04:39
Data saved to 'C:/Projects/NS&I/new_mockfiles/ssl_trainingschedule.parquet'.
employee_id                  object
account_join_date    datetime64[ns]
dtype: object
------------------------


### Create Mock Data

Daily Batch Feed 16 servicenow_bonding

16 Includes the following columns:
- priority
- communication_time
- created_timestamp
- incident_id
- bonded_timestamp

In [65]:
# Mock data for Batch #16 - SSL Training

import pandas as pd
import numpy as np
from datetime import datetime

### Create Batch 16
# Set conditions
np.random.seed(0)
filepath = 'C:/Projects/NS&I/new_mockfiles/'
batch16_output_filename = filepath + 'servicenow_bonding.parquet'
num_samples = 3000
priority_list = [1,2]



incident_id = ['INC' + f'{i:08}' for i in range(1, num_samples + 1)]

start_date = pd.to_datetime('2023-09-01')
end_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d')) 
created_timestamp = pd.to_datetime(np.random.randint(start_date.value, end_date.value, num_samples,dtype='int64'), unit='ns')
communication_time = np.random.randint(10, 60, num_samples,dtype='int64')

df = pd.DataFrame({
    'incident_id': incident_id,
    'communication_time': communication_time,
    'created_timestamp': created_timestamp
})

df['priority'] = np.random.choice(priority_list, size=len(df))
df['created_timestamp'] = pd.to_datetime(df['created_timestamp']).dt.round('s')
df['bonded_timestamp'] = pd.to_datetime(df['created_timestamp']).dt.round('s') + pd.Timedelta(hours=np.random.randint(12,36))

df

df.to_parquet(batch16_output_filename, engine='pyarrow', index=False)

print(df.head())
print(f"Data saved to '{batch16_output_filename}'.")
print(df.dtypes)
print('------------------------')

   incident_id  communication_time   created_timestamp  priority  \
0  INC00000001                  53 2023-11-13 18:57:55         2   
1  INC00000002                  30 2023-09-19 14:52:21         2   
2  INC00000003                  39 2024-02-12 20:30:53         1   
3  INC00000004                  54 2024-01-13 07:09:57         2   
4  INC00000005                  12 2024-01-30 11:04:39         1   

     bonded_timestamp  
0 2023-11-14 09:57:55  
1 2023-09-20 05:52:21  
2 2024-02-13 11:30:53  
3 2024-01-13 22:09:57  
4 2024-01-31 02:04:39  
Data saved to 'C:/Projects/NS&I/new_mockfiles/servicenow_bonding.parquet'.
incident_id                   object
communication_time             int64
created_timestamp     datetime64[ns]
priority                       int32
bonded_timestamp      datetime64[ns]
dtype: object
------------------------


### Create Mock Data

Daily Batch Feed 17 servicenow_applicationrejection

17 Includes the following columns:
- rejection_id
- case_id
- contact_channel
- rejected_timestamp
- rejection_reason
- created_timestamp
- total_active_time

In [125]:
# Mock data for Batch #17

import pandas as pd
import numpy as np
from datetime import datetime

### Create Batch 17
# Set conditions
np.random.seed(0)
filepath = 'C:/Projects/NS&I/new_mockfiles/'
batch17_output_filename = filepath + 'servicenow_applicationrejection.parquet'
num_samples = 3000

length_min_days=1
length_max_days=10
random_days = pd.to_timedelta(np.random.randint(length_min_days, length_max_days, size=len(df)), unit='D')


rejection_id_list = [1,2,3,4]
rejection_reason_list = ['unavailable_product','Not Valid','unacceptable_cheque','Other']
contact_channel_list = ['Email','Phone','Chat']

case_id = ['CAS' + f'{i:08}' for i in range(1, num_samples + 1)]

start_date = pd.to_datetime('2023-09-01')
end_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d')) 
created_timestamp = pd.to_datetime(np.random.randint(start_date.value, end_date.value, num_samples,dtype='int64'), unit='ns')

df = pd.DataFrame({
    'case_id': case_id,
    'total_active_time': communication_time,
    'created_timestamp': created_timestamp
})

df['rejection_id'] = np.random.choice(rejection_id_list, size=len(df))
df['rejection_reason'] = np.random.choice(rejection_reason_list, size=len(df))
df['contact_channel'] = np.random.choice(contact_channel_list, size=len(df))
df['created_timestamp'] = pd.to_datetime(df['created_timestamp']).dt.round('s')
df['bonded_timestamp'] = pd.to_datetime(df['created_timestamp']).dt.round('s') + random_days
df['total_active_time'] = df['bonded_timestamp'] -  df['created_timestamp']
df['total_active_time'] = (df['total_active_time']*24).dt.days.astype('int32')

df

df.to_parquet(batch17_output_filename, engine='pyarrow', index=False)

print(df.head())
print(f"Data saved to '{batch17_output_filename}'.")
print(df.dtypes)
print('------------------------')

       case_id  total_active_time   created_timestamp  rejection_id  \
0  CAS00000001                144 2024-04-25 18:10:17             2   
1  CAS00000002                 24 2024-04-25 23:51:48             2   
2  CAS00000003                 96 2024-01-18 03:06:00             1   
3  CAS00000004                 96 2024-01-26 02:41:52             1   
4  CAS00000005                192 2023-12-15 03:40:45             1   

      rejection_reason contact_channel    bonded_timestamp  
0                Other           Phone 2024-05-01 18:10:17  
1  unavailable_product           Phone 2024-04-26 23:51:48  
2  unavailable_product           Email 2024-01-22 03:06:00  
3                Other           Phone 2024-01-30 02:41:52  
4            Not Valid           Phone 2023-12-23 03:40:45  
Data saved to 'C:/Projects/NS&I/new_mockfiles/servicenow_applicationrejection.parquet'.
case_id                      object
total_active_time             int32
created_timestamp    datetime64[ns]
rejection_i

### Create Mock Data

Daily Batch Feed 18 servicenow_applicationcancellation

18 Includes the following columns:
- rejection_id
- case_id
- contact_channel
- cancelled_timestamp
- rejection_reason

In [133]:
# Mock data for Batch #18

import pandas as pd
import numpy as np
from datetime import datetime

### Create Batch 18
# Set conditions
np.random.seed(0)
filepath = 'C:/Projects/NS&I/new_mockfiles/'
batch18_output_filename = filepath + 'servicenow_applicationcancellation.parquet'
num_samples = 3000

length_min_days=1
length_max_days=10
random_days = pd.to_timedelta(np.random.randint(length_min_days, length_max_days, size=len(df)), unit='D')


rejection_id_list = [1,2,3,4]
rejection_reason_list = ['Cancelled']
contact_channel_list = ['Email','Phone','Chat']

case_id = ['CAS' + f'{i:08}' for i in range(1, num_samples + 1)]

start_date = pd.to_datetime('2023-09-01')
end_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d')) 
cancelled_timestamp = pd.to_datetime(np.random.randint(start_date.value, end_date.value, num_samples,dtype='int64'), unit='ns')

df = pd.DataFrame({
    'case_id': case_id,
    'cancelled_timestamp': created_timestamp
})

df['rejection_id'] = np.random.choice(rejection_id_list, size=len(df))
df['rejection_reason'] = 'Cancelled'
df['contact_channel'] = np.random.choice(contact_channel_list, size=len(df))
df['cancelled_timestamp'] = pd.to_datetime(df['cancelled_timestamp']).dt.round('s')

df

df.to_parquet(batch18_output_filename, engine='pyarrow', index=False)

print(df.head())
print(f"Data saved to '{batch18_output_filename}'.")
print(df.dtypes)
print('------------------------')

       case_id cancelled_timestamp  rejection_id rejection_reason  \
0  CAS00000001 2024-04-25 18:10:17             2        Cancelled   
1  CAS00000002 2024-04-25 23:51:48             2        Cancelled   
2  CAS00000003 2024-01-18 03:06:00             1        Cancelled   
3  CAS00000004 2024-01-26 02:41:52             1        Cancelled   
4  CAS00000005 2023-12-15 03:40:45             1        Cancelled   

  contact_channel  
0           Email  
1           Email  
2           Phone  
3           Phone  
4           Email  
Data saved to 'C:/Projects/NS&I/new_mockfiles/servicenow_applicationcancellation.parquet'.
case_id                        object
cancelled_timestamp    datetime64[ns]
rejection_id                    int32
rejection_reason               object
contact_channel                object
dtype: object
------------------------


### Create Mock Data

Daily Batch Feed 19 ppa_cancellationnotification

19 Includes the following columns:
- case_id
- notification_timestamp

In [134]:
# Mock data for Batch #19

import pandas as pd
import numpy as np
from datetime import datetime

### Create Batch 18
# Set conditions
np.random.seed(0)
filepath = 'C:/Projects/NS&I/new_mockfiles/'
batch19_output_filename = filepath + 'ppa_cancellationnotification.parquet'
num_samples = 3000

length_min_days=1
length_max_days=3
random_days = pd.to_timedelta(np.random.randint(length_min_days, length_max_days, size=len(df)), unit='D')


case_id = ['CAS' + f'{i:08}' for i in range(1, num_samples + 1)]

start_date = pd.to_datetime('2023-09-01')
end_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d')) 

#notification_timestamp = pd.to_datetime(np.random.randint(start_date.value, end_date.value, num_samples,dtype='int64'), unit='ns')
notification_timestamp = df['cancelled_timestamp'] # from data feed 18
df = pd.DataFrame({
    'case_id': case_id,
    'notification_timestamp': notification_timestamp
})

df['notification_timestamp'] = pd.to_datetime(df['notification_timestamp']).dt.round('s') + random_days

df

df.to_parquet(batch19_output_filename, engine='pyarrow', index=False)

print(df.head())
print(f"Data saved to '{batch19_output_filename}'.")
print(df.dtypes)
print('------------------------')

       case_id notification_timestamp
0  CAS00000001    2024-04-26 18:10:17
1  CAS00000002    2024-04-27 23:51:48
2  CAS00000003    2024-01-20 03:06:00
3  CAS00000004    2024-01-27 02:41:52
4  CAS00000005    2023-12-17 03:40:45
Data saved to 'C:/Projects/NS&I/new_mockfiles/ppa_cancellationnotification.parquet'.
case_id                           object
notification_timestamp    datetime64[ns]
dtype: object
------------------------


### Create Mock Data

Daily Batch Feed 20 servicenow_caseclosed

20 Includes the following columns:
- fos_flag
- closed_timestamp
- case_id
- inbound_channel
- time_to_action
- case_type
- created_timestamp
- extension_flag

In [140]:
# Mock data for Batch #20

import pandas as pd
import numpy as np
from datetime import datetime

### Create Batch 20
# Set conditions
np.random.seed(0)
filepath = 'C:/Projects/NS&I/new_mockfiles/'
batch20_output_filename = filepath + 'servicenow_caseclosed.parquet'
num_samples = 3000

length_min_days=1
length_max_days=10
random_days = pd.to_timedelta(np.random.randint(length_min_days, length_max_days, size=len(df)), unit='D')


case_type_list = ['Change_of_Detail','Subject_Access_Request','Sales_Application','Account_Trace',
                         'Account_Query','Complaint','Telephony_Registration','Online_Registration','Bereavement']
fos_flag_list = ['Y','N']
extension_flag_list = ['Y','N']

case_id = ['CAS' + f'{i:08}' for i in range(1, num_samples + 1)]

start_date = pd.to_datetime('2023-09-01')
end_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d')) 
created_timestamp = pd.to_datetime(np.random.randint(start_date.value, end_date.value, num_samples,dtype='int64'), unit='ns')

df = pd.DataFrame({
    'case_id': case_id,
    'created_timestamp': created_timestamp
})

df['fos_flag'] = np.random.choice(fos_flag_list, size=len(df))
df['extension_flag'] = np.random.choice(extension_flag_list, size=len(df))
df['Inbound_channel'] = 'Post'
df['created_timestamp'] = pd.to_datetime(df['created_timestamp']).dt.round('s')
df['closed_timestamp'] = pd.to_datetime(df['created_timestamp']).dt.round('s') + random_days
df['time_to_action'] = df['closed_timestamp'] -  df['created_timestamp']
df['time_to_action'] = df['time_to_action'].dt.days.astype('int32')

df

df.to_parquet(batch20_output_filename, engine='pyarrow', index=False)

print(df.head())
print(f"Data saved to '{batch20_output_filename}'.")
print(df.dtypes)
print('------------------------')

       case_id   created_timestamp fos_flag extension_flag Inbound_channel  \
0  CAS00000001 2024-04-25 18:10:17        N              N            Post   
1  CAS00000002 2024-04-25 23:51:48        N              Y            Post   
2  CAS00000003 2024-01-18 03:06:00        Y              Y            Post   
3  CAS00000004 2024-01-26 02:41:52        Y              N            Post   
4  CAS00000005 2023-12-15 03:40:45        Y              N            Post   

     closed_timestamp  time_to_action  
0 2024-05-01 18:10:17               6  
1 2024-04-26 23:51:48               1  
2 2024-01-22 03:06:00               4  
3 2024-01-30 02:41:52               4  
4 2023-12-23 03:40:45               8  
Data saved to 'C:/Projects/NS&I/new_mockfiles/servicenow_caseclosed.parquet'.
case_id                      object
created_timestamp    datetime64[ns]
fos_flag                     object
extension_flag               object
Inbound_channel              object
closed_timestamp     datetime64[n

### Create Mock Data

Daily Batch Feed 21 servicenow_responded

21 Includes the following columns:
- requested_further_information_timestamp
- case_id
- time_to_action
- case_type
- created_timestamp

In [141]:
# Mock data for Batch #21

import pandas as pd
import numpy as np
from datetime import datetime

### Create Batch 21
# Set conditions
np.random.seed(0)
filepath = 'C:/Projects/NS&I/new_mockfiles/'
batch21_output_filename = filepath + 'servicenow_responded.parquet'
num_samples = 3000

length_min_days=1
length_max_days=10
random_days = pd.to_timedelta(np.random.randint(length_min_days, length_max_days, size=len(df)), unit='D')


case_type_list = ['Change_of_Detail','Subject_Access_Request','Sales_Application','Account_Trace',
                         'Account_Query','Complaint','Telephony_Registration','Online_Registration','Bereavement']

case_id = ['CAS' + f'{i:08}' for i in range(1, num_samples + 1)]

start_date = pd.to_datetime('2023-09-01')
end_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d')) 
created_timestamp = df['created_timestamp'] + pd.Timedelta(days=1) # from datafeed 20

df = pd.DataFrame({
    'case_id': case_id,
    'created_timestamp': created_timestamp
})

df['created_timestamp'] = pd.to_datetime(df['created_timestamp']).dt.round('s')
df['requested_further_information_timestamp'] = pd.to_datetime(df['created_timestamp']).dt.round('s') + random_days
df['time_to_action'] = df['requested_further_information_timestamp'] -  df['created_timestamp']
df['time_to_action'] = df['time_to_action'].dt.days.astype('int32')

df

df.to_parquet(batch21_output_filename, engine='pyarrow', index=False)

print(df.head())
print(f"Data saved to '{batch21_output_filename}'.")
print(df.dtypes)
print('------------------------')

       case_id   created_timestamp requested_further_information_timestamp  \
0  CAS00000001 2024-04-26 18:10:17                     2024-05-02 18:10:17   
1  CAS00000002 2024-04-26 23:51:48                     2024-04-27 23:51:48   
2  CAS00000003 2024-01-19 03:06:00                     2024-01-23 03:06:00   
3  CAS00000004 2024-01-27 02:41:52                     2024-01-31 02:41:52   
4  CAS00000005 2023-12-16 03:40:45                     2023-12-24 03:40:45   

   time_to_action  
0               6  
1               1  
2               4  
3               4  
4               8  
Data saved to 'C:/Projects/NS&I/new_mockfiles/servicenow_responded.parquet'.
case_id                                            object
created_timestamp                          datetime64[ns]
requested_further_information_timestamp    datetime64[ns]
time_to_action                                      int32
dtype: object
------------------------


### Create Mock Data

Daily Batch Feed 22 nicecxone_messageresponses

22 Includes the following columns:
- message_type
- number_of_messages_received
- number_of_messages_responded_to_in_sla
- created_timestamp

In [144]:
# Mock data for Batch #22

import pandas as pd
import numpy as np
from datetime import datetime

### Create Batch 22
# Set conditions
np.random.seed(0)
filepath = 'C:/Projects/NS&I/new_mockfiles/'
batch22_output_filename = filepath + 'nicecxone_messageresponses.parquet'
num_samples = 3000

message_type_list = ['Email','Twitter','Secure Message','Webchat','Facebook']

start_date = pd.to_datetime('2023-09-01')
end_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d')) 
created_timestamp = pd.to_datetime(np.random.randint(start_date.value, end_date.value, num_samples,dtype='int64'), unit='ns')
number_of_messages_received = np.random.randint(70, 99, num_samples,dtype='int64')
number_of_messages_responded_to_in_sla = number_of_messages_received - np.random.randint(5, 15, num_samples,dtype='int64')

df = pd.DataFrame({
    'number_of_messages_received': number_of_messages_received,
    'number_of_messages_responded_to_in_sla': number_of_messages_responded_to_in_sla,
    'created_timestamp': created_timestamp
})

df['created_timestamp'] = pd.to_datetime(df['created_timestamp']).dt.round('s')


df

df.to_parquet(batch22_output_filename, engine='pyarrow', index=False)

print(df.head())
print(f"Data saved to '{batch22_output_filename}'.")
print(df.dtypes)
print('------------------------')

   number_of_messages_received  number_of_messages_responded_to_in_sla  \
0                           81                                      75   
1                           90                                      78   
2                           82                                      74   
3                           72                                      59   
4                           84                                      79   

    created_timestamp  
0 2023-11-13 18:57:55  
1 2023-09-19 14:52:21  
2 2024-02-12 20:30:53  
3 2024-01-13 07:09:57  
4 2024-01-30 11:04:39  
Data saved to 'C:/Projects/NS&I/new_mockfiles/nicecxone_messageresponses.parquet'.
number_of_messages_received                        int64
number_of_messages_responded_to_in_sla             int64
created_timestamp                         datetime64[ns]
dtype: object
------------------------


### Create Mock Data

Daily Batch Feed 23 api_logs

23 Includes the following columns:
- api_id
- message_received_timestamp
- api_description
- message_call_timestamp

In [151]:
# Mock data for Batch #23

import pandas as pd
import numpy as np
from datetime import datetime

### Create Batch 23
# Set conditions
np.random.seed(0)
filepath = 'C:/Projects/NS&I/new_mockfiles/'
batch23_output_filename = filepath + 'api_logs.parquet'
num_samples = 3000

length_min_ms=1000
length_max_ms=2100
random_ms = pd.to_timedelta(np.random.randint(length_min_ms, length_max_ms, size=len(df)), unit='ms')

api_id = ['API' + f'{i:08}' for i in range(1, num_samples + 1)]
api_description_list = ['Payment Gateway API','Product Catalog API','User Management API','Document_View']

start_date = pd.to_datetime('2023-09-01')
end_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d')) 
message_call_timestamp = pd.to_datetime(np.random.randint(start_date.value, end_date.value, num_samples,dtype='int64'), unit='ns')
message_received_timestamp = message_call_timestamp + random_ms

df = pd.DataFrame({
    'api_id':api_id,
    'message_call_timestamp': message_call_timestamp,
    'message_received_timestamp': message_received_timestamp
})

df['api_description'] = np.random.choice(api_description_list, size=len(df))
df['message_call_timestamp'] = pd.to_datetime(df['message_call_timestamp']).dt.round('ms')
df['message_received_timestamp'] = pd.to_datetime(df['message_received_timestamp']).dt.round('ms')


df

df.to_parquet(batch23_output_filename, engine='pyarrow', index=False)

print(df.head())
print(f"Data saved to '{batch23_output_filename}'.")
print(df.dtypes)
print('------------------------')

        api_id  message_call_timestamp message_received_timestamp  \
0  API00000001 2024-03-28 02:24:17.782    2024-03-28 02:24:19.466   
1  API00000002 2023-10-31 23:34:25.546    2023-10-31 23:34:27.105   
2  API00000003 2023-09-22 21:11:45.583    2023-09-22 21:11:47.418   
3  API00000004 2023-10-04 16:04:19.955    2023-10-04 16:04:21.718   
4  API00000005 2023-12-03 05:34:57.772    2023-12-03 05:34:59.805   

       api_description  
0  User Management API  
1  User Management API  
2  User Management API  
3  User Management API  
4        Document_View  
Data saved to 'C:/Projects/NS&I/new_mockfiles/api_logs.parquet'.
api_id                                object
message_call_timestamp        datetime64[ns]
message_received_timestamp    datetime64[ns]
api_description                       object
dtype: object
------------------------


### Create Mock Data

Daily Batch Feed 25 servicenow_requestforinformation

25 Includes the following columns:
- blocked_timestamp
- request_type
- case_id
- case_type
- created_timestamp
- blocked_sequence

In [157]:
# Mock data for Batch #25 - Request For Information

import pandas as pd
import numpy as np
from datetime import datetime


### Create Batch 25
# Set conditions
np.random.seed(0)
filepath = 'C:/Projects/NS&I/new_mockfiles/'
batch25_output_filename = filepath + 'servicenow_requestforinformation.parquet'
num_samples = 3000

length_min_days = 1
length_max_days = 6
case_types_list = ['Sales_Application','Postal Payment']
request_types_list = ['Online','Mobile','ID&V']
blocked_sequence_list = [1,2]

case_ids = ['CAS' + f'{i:08}' for i in range(1, num_samples + 1)]

start_date = pd.to_datetime('2023-09-01')
end_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d')) 
created_timestamp = pd.to_datetime(np.random.randint(start_date.value, end_date.value, num_samples,dtype='int64'), unit='ns')

df = pd.DataFrame({
    'case_id': case_ids,
    'created_timestamp': created_timestamp
})

df['case_type'] = np.random.choice(case_types_list, size=len(df))
df['request_type'] = np.random.choice(request_types_list, size=len(df))
df['blocked_sequence'] = np.random.choice(blocked_sequence_list, size=len(df)).astype('int64')
df['created_timestamp'] = pd.to_datetime(df['created_timestamp']).dt.round('s')


random_days = pd.to_timedelta(np.random.randint(length_min_days, length_max_days, size=len(df)), unit='D')
df['blocked_timestamp'] = df['created_timestamp'] + random_days
df['blocked_timestamp'] = pd.to_datetime(df['blocked_timestamp']).dt.round('s')

df

df.to_parquet(batch25_output_filename, engine='pyarrow', index=False)

print(df.head())
print(f"Data saved to '{batch25_output_filename}'.")
print(df.dtypes)
print('------------------------')




       case_id   created_timestamp          case_type request_type  \
0  CAS00000001 2023-11-13 18:57:55     Postal Payment         ID&V   
1  CAS00000002 2023-09-19 14:52:21  Sales_Application       Online   
2  CAS00000003 2024-02-12 20:30:53     Postal Payment       Online   
3  CAS00000004 2024-01-13 07:09:57  Sales_Application       Online   
4  CAS00000005 2024-01-30 11:04:39  Sales_Application       Mobile   

   blocked_sequence   blocked_timestamp  
0                 2 2023-11-18 18:57:55  
1                 1 2023-09-23 14:52:21  
2                 1 2024-02-15 20:30:53  
3                 2 2024-01-17 07:09:57  
4                 2 2024-02-02 11:04:39  
Data saved to 'C:/Projects/NS&I/new_mockfiles/servicenow_requestforinformation.parquet'.
case_id                      object
created_timestamp    datetime64[ns]
case_type                    object
request_type                 object
blocked_sequence              int64
blocked_timestamp    datetime64[ns]
dtype: object
--------

### Create Mock Data

Daily Batch Feed 26 ssl_audittracking

26 Includes the following columns:
- audit_id
- resolution_due_date
- date_raised

In [158]:
# Mock data for Batch #26

import pandas as pd
import numpy as np
from datetime import datetime

### Create Batch 26
# Set conditions
np.random.seed(0)
filepath = 'C:/Projects/NS&I/new_mockfiles/'
batch26_output_filename = filepath + 'ssl_audittracking.parquet'
num_samples = 3000

length_min_days=30
length_max_days=31
random_days = pd.to_timedelta(np.random.randint(length_min_days, length_max_days, size=len(df)), unit='D')

audit_id = ['AUD' + f'{i:04}' for i in range(1, num_samples + 1)]

start_date = pd.to_datetime('2023-09-01')
end_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d')) 
date_raised = pd.to_datetime(np.random.randint(start_date.value, end_date.value, num_samples,dtype='int64'), unit='ns')
resolution_due_date = date_raised + random_days

df = pd.DataFrame({
    'audit_id':audit_id,
    'date_raised': date_raised,
    'resolution_due_date': resolution_due_date
})

df['date_raised'] = pd.to_datetime(df['date_raised']).dt.round('s')
df['resolution_due_date'] = pd.to_datetime(df['resolution_due_date']).dt.round('s')


df

df.to_parquet(batch26_output_filename, engine='pyarrow', index=False)

print(df.head())
print(f"Data saved to '{batch26_output_filename}'.")
print(df.dtypes)
print('------------------------')

  audit_id         date_raised resolution_due_date
0  AUD0001 2023-11-13 18:57:55 2023-12-13 18:57:55
1  AUD0002 2023-09-19 14:52:21 2023-10-19 14:52:21
2  AUD0003 2024-02-12 20:30:53 2024-03-13 20:30:53
3  AUD0004 2024-01-13 07:09:57 2024-02-12 07:09:57
4  AUD0005 2024-01-30 11:04:39 2024-02-29 11:04:39
Data saved to 'C:/Projects/NS&I/new_mockfiles/ssl_audittracking.parquet'.
audit_id                       object
date_raised            datetime64[ns]
resolution_due_date    datetime64[ns]
dtype: object
------------------------


### Create Mock Data

Daily Batch Feed 27 ssl_availability

27 Includes the following columns:
- call_id
- call_system
- call_response
- created_timestamp

In [160]:
# Mock data for Batch #27

import pandas as pd
import numpy as np
from datetime import datetime

### Create Batch 27
# Set conditions
np.random.seed(0)
filepath = 'C:/Projects/NS&I/new_mockfiles/'
batch27_output_filename = filepath + 'ssl_availability.parquet'
num_samples = 3000

length_min_days=30
length_max_days=31
random_days = pd.to_timedelta(np.random.randint(length_min_days, length_max_days, size=len(df)), unit='D')

call_system_list = ['Poly AI','Alfresco']
call_response_list = ['Neutral','Negative','Positive']

call_id = ['CALL' + f'{i:05}' for i in range(1, num_samples + 1)]

start_date = pd.to_datetime('2023-09-01')
end_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d')) 
created_timestamp = pd.to_datetime(np.random.randint(start_date.value, end_date.value, num_samples,dtype='int64'), unit='ns')

df = pd.DataFrame({
    'call_id':call_id,
    'created_timestamp': created_timestamp
})

df['call_system'] = np.random.choice(call_system_list, size=len(df))
df['call_response'] = np.random.choice(call_response_list, size=len(df))
df['created_timestamp'] = pd.to_datetime(df['created_timestamp']).dt.round('s')


df

df.to_parquet(batch27_output_filename, engine='pyarrow', index=False)

print(df.head())
print(f"Data saved to '{batch27_output_filename}'.")
print(df.dtypes)
print('------------------------')

     call_id   created_timestamp call_system call_response
0  CALL00001 2023-11-13 18:57:55    Alfresco      Positive
1  CALL00002 2023-09-19 14:52:21     Poly AI       Neutral
2  CALL00003 2024-02-12 20:30:53    Alfresco       Neutral
3  CALL00004 2024-01-13 07:09:57     Poly AI       Neutral
4  CALL00005 2024-01-30 11:04:39     Poly AI      Negative
Data saved to 'C:/Projects/NS&I/new_mockfiles/ssl_availability.parquet'.
call_id                      object
created_timestamp    datetime64[ns]
call_system                  object
call_response                object
dtype: object
------------------------


### Create Mock Data

Daily Batch Feed 28 servicenow_vulnerability

28 Includes the following columns:
- ticket_id
- criticality
- closed_timestamp
- created_timestamp

In [161]:
# Mock data for Batch #28

import pandas as pd
import numpy as np
from datetime import datetime

### Create Batch 28
# Set conditions
np.random.seed(0)
filepath = 'C:/Projects/NS&I/new_mockfiles/'
batch28_output_filename = filepath + 'servicenow_vulnerability.parquet'
num_samples = 3000

length_min_days=1
length_max_days=20
random_days = pd.to_timedelta(np.random.randint(length_min_days, length_max_days, size=len(df)), unit='D')

criticality_list = ['Low','Medium','High','Critical']

ticket_id = ['TID' + f'{i:08}' for i in range(1, num_samples + 1)]

start_date = pd.to_datetime('2023-09-01')
end_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d')) 
created_timestamp = pd.to_datetime(np.random.randint(start_date.value, end_date.value, num_samples,dtype='int64'), unit='ns')
closed_timestamp = created_timestamp + random_days


df = pd.DataFrame({
    'ticket_id':call_id,
    'created_timestamp': created_timestamp,
    'closed_timestamp': closed_timestamp,    
})

df['criticality'] = np.random.choice(criticality_list, size=len(df))
df['created_timestamp'] = pd.to_datetime(df['created_timestamp']).dt.round('s')
df['closed_timestamp'] = pd.to_datetime(df['closed_timestamp']).dt.round('s')


df

df.to_parquet(batch28_output_filename, engine='pyarrow', index=False)

print(df.head())
print(f"Data saved to '{batch28_output_filename}'.")
print(df.dtypes)
print('------------------------')

   ticket_id   created_timestamp    closed_timestamp criticality
0  CALL00001 2023-12-06 17:31:37 2023-12-19 17:31:37    Critical
1  CALL00002 2024-03-19 14:46:33 2024-04-04 14:46:33        High
2  CALL00003 2024-01-31 21:08:44 2024-02-01 21:08:44        High
3  CALL00004 2024-01-20 05:33:28 2024-01-24 05:33:28      Medium
4  CALL00005 2023-09-16 03:45:35 2023-09-20 03:45:35         Low
Data saved to 'C:/Projects/NS&I/new_mockfiles/servicenow_vulnerability.parquet'.
ticket_id                    object
created_timestamp    datetime64[ns]
closed_timestamp     datetime64[ns]
criticality                  object
dtype: object
------------------------


### Create Mock Data

Daily Batch Feed 29 servicenow_complaintresponse

29 Includes the following columns:
- case_id
- case_type
- resolved_timestamp
- recourse_received_timestamp

In [166]:
# Mock data for Batch #29

import pandas as pd
import numpy as np
from datetime import datetime

### Create Batch 29
# Set conditions
np.random.seed(0)
filepath = 'C:/Projects/NS&I/new_mockfiles/'
batch29_output_filename = filepath + 'servicenow_complaintresponse.parquet'
num_samples = 3000

length_min_days=20
length_max_days=31
random_days = pd.to_timedelta(np.random.randint(length_min_days, length_max_days, size=len(df)), unit='D')

case_type_list = ['Change_of_Detail','Subject_Access_Request','Sales_Application','Account_Trace',
                         'Account_Query','Complaint','Telephony_Registration','Online_Registration','Bereavement']

case_id = ['CAS' + f'{i:08}' for i in range(1, num_samples + 1)]

start_date = pd.to_datetime('2023-09-01')
end_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d')) 
resolved_timestamp = pd.to_datetime(np.random.randint(start_date.value, end_date.value, num_samples,dtype='int64'), unit='ns')
recourse_received_timestamp = resolved_timestamp + random_days


df = pd.DataFrame({
    'case_id':case_id,
    'resolved_timestamp': resolved_timestamp,
    'recourse_received_timestamp': recourse_received_timestamp,    
})

df['case_type'] = np.random.choice(case_type_list, size=len(df))
df['resolved_timestamp'] = pd.to_datetime(df['resolved_timestamp']).dt.round('s')
df['recourse_received_timestamp'] = pd.to_datetime(df['recourse_received_timestamp']).dt.round('s')


df

df.to_parquet(batch29_output_filename, engine='pyarrow', index=False)

print(df.head())
print(f"Data saved to '{batch29_output_filename}'.")
print(df.dtypes)
print('------------------------')

       case_id  resolved_timestamp recourse_received_timestamp  \
0  CAS00000001 2024-01-03 12:47:49         2024-01-28 12:47:49   
1  CAS00000002 2023-11-21 21:34:09         2023-12-11 21:34:09   
2  CAS00000003 2023-09-23 01:29:35         2023-10-16 01:29:35   
3  CAS00000004 2023-09-24 11:24:13         2023-10-17 11:24:13   
4  CAS00000005 2023-11-06 13:50:35         2023-12-03 13:50:35   

             case_type  
0            Complaint  
1  Online_Registration  
2        Account_Query  
3     Change_of_Detail  
4        Account_Trace  
Data saved to 'C:/Projects/NS&I/new_mockfiles/servicenow_complaintresponse.parquet'.
case_id                                object
resolved_timestamp             datetime64[ns]
recourse_received_timestamp    datetime64[ns]
case_type                              object
dtype: object
------------------------


### Create Mock Data

Daily Batch Feed 30 servicenow_resolvedissues

30 Includes the following columns:
- incident_id
- time_to_action
- priority
- created_timestamp
- resolved_timestamp

In [235]:
# Mock data for Batch #30

import pandas as pd
import numpy as np
from datetime import datetime

### Create Batch 30
# Set conditions
np.random.seed(0)
filepath = 'C:/Projects/NS&I/new_mockfiles/'
batch30_output_filename = filepath + 'servicenow_resolvedissues.parquet'
num_samples = 3000
priority_list = [1,2,3,4]

length_min_hrs=124
length_max_hrs=720
random_hrs = pd.to_timedelta(np.random.randint(length_min_hrs, length_max_hrs, size=len(df)), unit='h')

incident_id = ['INC' + f'{i:08}' for i in range(1, num_samples + 1)]

start_date = pd.to_datetime('2023-09-01')
end_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d')) 
created_timestamp = pd.to_datetime(np.random.randint(start_date.value, end_date.value, num_samples,dtype='int64'), unit='ns')
resolved_timestamp = created_timestamp
time_to_action = random_hrs

df = pd.DataFrame({
    'incident_id': incident_id,
    'created_timestamp': created_timestamp,
    'resolved_timestamp': resolved_timestamp
})

df['priority'] = np.random.choice(priority_list, size=len(df))
df['created_timestamp'] = pd.to_datetime(df['created_timestamp']).dt.round('s')
df['resolved_timestamp'] = pd.to_datetime(df['resolved_timestamp']).dt.round('s')


df.loc[df['priority'] == 4, 'resolved_timestamp'] = df['created_timestamp'] + random_hrs
df.loc[df['priority'] == 3, 'resolved_timestamp'] = df['created_timestamp'] + pd.to_timedelta(np.random.randint(5, 24, size=len(df)), unit='h')
df.loc[df['priority'] == 2, 'resolved_timestamp'] = df['created_timestamp'] + pd.to_timedelta(np.random.randint(2, 5, size=len(df)), unit='h')
df.loc[df['priority'] == 1, 'resolved_timestamp'] = df['created_timestamp'] + pd.to_timedelta(np.random.randint(1, 3, size=len(df)), unit='h')
df['time_to_action'] =(pd.to_datetime(df['resolved_timestamp']) - pd.to_datetime(df['created_timestamp'])).dt.total_seconds()/3600
df
df.to_parquet(batch30_output_filename, engine='pyarrow', index=False)

print(df.head())
print(f"Data saved to '{batch30_output_filename}'.")
print(df.dtypes)
print('------------------------')

   incident_id   created_timestamp  resolved_timestamp  priority  \
0  INC00000001 2024-03-22 10:43:33 2024-03-22 12:43:33         2   
1  INC00000002 2024-01-15 23:02:16 2024-01-16 02:02:16         2   
2  INC00000003 2023-09-10 02:53:30 2023-09-10 10:53:30         3   
3  INC00000004 2023-09-26 14:04:55 2023-09-26 17:04:55         2   
4  INC00000005 2024-02-10 03:41:30 2024-02-26 20:41:30         4   

   time_to_action  
0             2.0  
1             3.0  
2             8.0  
3             3.0  
4           401.0  
Data saved to 'C:/Projects/NS&I/new_mockfiles/servicenow_resolvedissues.parquet'.
incident_id                   object
created_timestamp     datetime64[ns]
resolved_timestamp    datetime64[ns]
priority                       int32
time_to_action               float64
dtype: object
------------------------


### Create Mock Data

Daily Batch Feed 31 servicenow_deployedfixes

31 Includes the following columns:
- incident_id
- time_to_action
- priority
- created_timestamp
- deployed_timestamp

In [236]:
# Mock data for Batch #31

import pandas as pd
import numpy as np
from datetime import datetime

### Create Batch 31
# Set conditions
np.random.seed(0)
filepath = 'C:/Projects/NS&I/new_mockfiles/'
batch31_output_filename = filepath + 'servicenow_deployedfixes.parquet'
num_samples = 3000
priority_list = [1,2,3,4]

length_min_days=10
length_max_days=70
random_days = pd.to_timedelta(np.random.randint(length_min_days, length_max_days, size=len(df)), unit='D')

incident_id = ['INC' + f'{i:08}' for i in range(1, num_samples + 1)]

start_date = pd.to_datetime('2023-09-01')
end_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d')) 
created_timestamp = pd.to_datetime(np.random.randint(start_date.value, end_date.value, num_samples,dtype='int64'), unit='ns')
deployed_timestamp = created_timestamp + random_days

df = pd.DataFrame({
    'incident_id': incident_id,
    'created_timestamp': created_timestamp,
    'deployed_timestamp': deployed_timestamp
})

df['priority'] = np.random.choice(priority_list, size=len(df))
df['created_timestamp'] = pd.to_datetime(df['created_timestamp']).dt.round('s')
df['deployed_timestamp'] = pd.to_datetime(df['deployed_timestamp']).dt.round('s')
df['time_to_action'] =(pd.to_datetime(df['deployed_timestamp']) - pd.to_datetime(df['created_timestamp'])).dt.days
df

df.to_parquet(batch31_output_filename, engine='pyarrow', index=False)

print(df.head())
print(f"Data saved to '{batch31_output_filename}'.")
print(df.dtypes)
print('------------------------')

   incident_id   created_timestamp  deployed_timestamp  priority  \
0  INC00000001 2023-09-02 15:12:53 2023-10-26 15:12:53         2   
1  INC00000002 2024-04-23 15:08:50 2024-06-19 15:08:50         2   
2  INC00000003 2023-12-29 09:03:39 2024-03-01 09:03:39         4   
3  INC00000004 2024-02-24 14:20:39 2024-03-05 14:20:39         3   
4  INC00000005 2023-09-22 05:09:15 2023-10-05 05:09:15         2   

   time_to_action  
0              54  
1              57  
2              63  
3              10  
4              13  
Data saved to 'C:/Projects/NS&I/new_mockfiles/servicenow_deployedfixes.parquet'.
incident_id                   object
created_timestamp     datetime64[ns]
deployed_timestamp    datetime64[ns]
priority                       int32
time_to_action                 int64
dtype: object
------------------------


### Create Mock Data

Daily Batch Feed 32 servicenow_rca

32 Includes the following columns:
- incident_id
- time_to_action
- priority
- created_timestamp
- rca_timestamp

In [240]:
# Mock data for Batch #32

import pandas as pd
import numpy as np
from datetime import datetime

### Create Batch 32
# Set conditions
np.random.seed(0)
filepath = 'C:/Projects/NS&I/new_mockfiles/'
batch32_output_filename = filepath + 'servicenow_rca.parquet'
num_samples = 3000
priority_list = [1,2,3,4]

length_min_days=4
length_max_days=35
random_days = pd.to_timedelta(np.random.randint(length_min_days, length_max_days, size=len(df)), unit='D')

incident_id = ['INC' + f'{i:08}' for i in range(1, num_samples + 1)]

start_date = pd.to_datetime('2023-09-01')
end_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d')) 
created_timestamp = pd.to_datetime(np.random.randint(start_date.value, end_date.value, num_samples,dtype='int64'), unit='ns')
rca_timestamp = created_timestamp 

df = pd.DataFrame({
    'incident_id': incident_id,
    'created_timestamp': created_timestamp,
    'rca_timestamp': rca_timestamp
})

df['priority'] = np.random.choice(priority_list, size=len(df))
df['created_timestamp'] = pd.to_datetime(df['created_timestamp']).dt.round('s')
df['rca_timestamp'] = pd.to_datetime(df['rca_timestamp']).dt.round('s')


df.loc[df['priority'] == 4, 'rca_timestamp'] = df['created_timestamp'] + random_hrs
df.loc[df['priority'] == 3, 'rca_timestamp'] = df['created_timestamp'] + pd.to_timedelta(np.random.randint(3, 30, size=len(df)), unit='D')
df.loc[df['priority'] == 2, 'rca_timestamp'] = df['created_timestamp'] + pd.to_timedelta(np.random.randint(1, 2, size=len(df)), unit='D')
df.loc[df['priority'] == 1, 'rca_timestamp'] = df['created_timestamp'] + pd.to_timedelta(np.random.randint(1, 2, size=len(df)), unit='D')
df['time_to_action'] =(pd.to_datetime(df['rca_timestamp']) - pd.to_datetime(df['created_timestamp'])).dt.days
df
df.to_parquet(batch32_output_filename, engine='pyarrow', index=False)

print(df.head())
print(f"Data saved to '{batch32_output_filename}'.")
print(df.dtypes)
print('------------------------')

   incident_id   created_timestamp       rca_timestamp  priority  \
0  INC00000001 2024-02-26 22:43:10 2024-03-14 22:43:10         3   
1  INC00000002 2024-03-22 11:18:34 2024-04-05 11:18:34         3   
2  INC00000003 2024-04-14 09:03:14 2024-05-03 09:03:14         3   
3  INC00000004 2023-10-04 13:41:23 2023-10-05 13:41:23         1   
4  INC00000005 2023-12-18 12:13:59 2023-12-19 12:13:59         2   

   time_to_action  
0              17  
1              14  
2              19  
3               1  
4               1  
Data saved to 'C:/Projects/NS&I/new_mockfiles/servicenow_rca.parquet'.
incident_id                  object
created_timestamp    datetime64[ns]
rca_timestamp        datetime64[ns]
priority                      int32
time_to_action                int64
dtype: object
------------------------


### Create Mock Data

Daily Batch Feed 33 servicenow_permanentfixavailable

33 Includes the following columns:
- incident_id
- time_to_action
- created_timestamp
- fix_available_timestamp

In [242]:
# Mock data for Batch #33

import pandas as pd
import numpy as np
from datetime import datetime

### Create Batch 31
# Set conditions
np.random.seed(0)
filepath = 'C:/Projects/NS&I/new_mockfiles/'
batch33_output_filename = filepath + 'servicenow_permanentfixavailable.parquet'
num_samples = 3000

length_min_days=10
length_max_days=70
random_days = pd.to_timedelta(np.random.randint(length_min_days, length_max_days, size=len(df)), unit='D')

incident_id = ['INC' + f'{i:08}' for i in range(1, num_samples + 1)]

start_date = pd.to_datetime('2023-09-01')
end_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d')) 
created_timestamp = pd.to_datetime(np.random.randint(start_date.value, end_date.value, num_samples,dtype='int64'), unit='ns')
fix_available_timestamp = created_timestamp + random_days

df = pd.DataFrame({
    'incident_id': incident_id,
    'created_timestamp': created_timestamp,
    'fix_available_timestamp': fix_available_timestamp
})

df['created_timestamp'] = pd.to_datetime(df['created_timestamp']).dt.round('s')
df['fix_available_timestamp'] = pd.to_datetime(df['fix_available_timestamp']).dt.round('s')
df['time_to_action'] =(pd.to_datetime(df['fix_available_timestamp']) - pd.to_datetime(df['created_timestamp'])).dt.days
df

df.to_parquet(batch33_output_filename, engine='pyarrow', index=False)

print(df.head())
print(f"Data saved to '{batch33_output_filename}'.")
print(df.dtypes)
print('------------------------')

   incident_id   created_timestamp fix_available_timestamp  time_to_action
0  INC00000001 2023-09-02 15:12:53     2023-10-26 15:12:53              54
1  INC00000002 2024-04-23 15:08:50     2024-06-19 15:08:50              57
2  INC00000003 2023-12-29 09:03:39     2024-03-01 09:03:39              63
3  INC00000004 2024-02-24 14:20:39     2024-03-05 14:20:39              10
4  INC00000005 2023-09-22 05:09:15     2023-10-05 05:09:15              13
Data saved to 'C:/Projects/NS&I/new_mockfiles/servicenow_permanentfixavailable.parquet'.
incident_id                        object
created_timestamp          datetime64[ns]
fix_available_timestamp    datetime64[ns]
time_to_action                      int64
dtype: object
------------------------


### Create Mock Data

Daily Batch Feed 34 servicenow_foscomplaint

34 Includes the following columns:
- case_id
- created_timestamp
- fos_timestamp

In [243]:
# Mock data for Batch #34

import pandas as pd
import numpy as np
from datetime import datetime

### Create Batch 31
# Set conditions
np.random.seed(0)
filepath = 'C:/Projects/NS&I/new_mockfiles/'
batch34_output_filename = filepath + 'servicenow_foscomplaint.parquet'
num_samples = 3000

length_min_days=5
length_max_days=20
random_days = pd.to_timedelta(np.random.randint(length_min_days, length_max_days, size=len(df)), unit='D')

case_id = ['CAS' + f'{i:08}' for i in range(1, num_samples + 1)]

start_date = pd.to_datetime('2023-09-01')
end_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d')) 
created_timestamp = pd.to_datetime(np.random.randint(start_date.value, end_date.value, num_samples,dtype='int64'), unit='ns')
fos_timestamp = created_timestamp + random_days

df = pd.DataFrame({
    'case_id': case_id,
    'created_timestamp': created_timestamp,
    'fos_timestamp': fos_timestamp
})

df['created_timestamp'] = pd.to_datetime(df['created_timestamp']).dt.round('s')
df['fos_timestamp'] = pd.to_datetime(df['fos_timestamp']).dt.round('s')
df

df.to_parquet(batch34_output_filename, engine='pyarrow', index=False)

print(df.head())
print(f"Data saved to '{batch34_output_filename}'.")
print(df.dtypes)
print('------------------------')

       case_id   created_timestamp       fos_timestamp
0  CAS00000001 2023-12-12 10:01:47 2023-12-29 10:01:47
1  CAS00000002 2024-02-01 00:02:41 2024-02-11 00:02:41
2  CAS00000003 2023-09-25 02:07:04 2023-09-30 02:07:04
3  CAS00000004 2024-01-29 00:42:27 2024-02-06 00:42:27
4  CAS00000005 2023-09-02 15:12:53 2023-09-18 15:12:53
Data saved to 'C:/Projects/NS&I/new_mockfiles/servicenow_foscomplaint.parquet'.
case_id                      object
created_timestamp    datetime64[ns]
fos_timestamp        datetime64[ns]
dtype: object
------------------------
