# Package Pilot Testing
A full pilot experiment will be conducted on two sets of data to test the package performance and accuracy
- `rawdata` folder which contains the actual data and will not be disclosed
- `data` folder which contains fake data and will be used for documentation purpose

# 1. Real Data Pilot Experiment

## 1.1 Blue Automata With `automata_execution`

In [2]:
from blueautomata.data_compilation import BlueAutomata

test = BlueAutomata(
    folder_path=r"C:\Users\Khor Kean Teng\Downloads\AUP Automata\rawdata",
    checklist = 'rawdata/Department Checklist.xlsx',
    staff_data = 'rawdata/stafflist.xlsx',
    name_key= ['DP', 'INSIDES', 'SAS', 'STATSMART1', 'STATSMARTCUBE', 'CCRIS', 'ESSP',
               'RBCIFAS', 'FEMS', 'TOSS', 'USERSBANKING','ADMIN'],
    name_code= [1, 1, 1, 1, 1, 1, 1, 6, 10, 10, 10, 10],
)
df = test.automata_execution()
df.head()

Unnamed: 0,Department,Dept,User ID,Name,System1,Cube
0,Jabatan Pengurusan Data dan Statistik,JPS,sskila,Roshakila binti Ahmad,NAPIC,
1,Jabatan Pengurusan Data dan Statistik,JPS,sshocc,Dr. Ho Chiung Ching,NAPIC,
2,Jabatan Rangkuman Kewangan,FID,desharul,Shahrul Fitri bin Shafii,NAPIC,
3,Jabatan Pengurusan Data dan Statistik,JPS,sskila,Roshakila binti Ahmad,BSH-BPN,
4,Jabatan Dasar Monetari,JDM,sseilyn,Eilyn Chong Yee Lin,BSH-BPN,


In [3]:
# check the number of rows
len(df)

3964

In [3]:
# export for further usage
#df.to_excel('checklist.xlsx', index=False)

## 1.2 AutomataReport With `automata_report_summary`

In [4]:
from blueautomata.automation_report import AutomataReport

test = AutomataReport(
    folder_path=r"C:\Users\Khor Kean Teng\Downloads\AUP Automata\rawdata",
    checklist = 'rawdata/Department Checklist.xlsx',
    staff_data = 'rawdata/stafflist.xlsx',
    name_key= ['DP', 'INSIDES', 'SCRIBS', 'SAS', 'STATSMART1', 'STATSMARTCUBE', 'CCRIS', 'ESSP',
               'RBCIFAS', 'FEMS', 'TOSS', 'USERSBANKING','ADMIN'],
    name_code= [1, 1, 1, 1, 1, 1, 1, 1, 6, 10, 10, 10, 10],
)

df = test.automata_report_summary()
df.head(n = len(df))

Unnamed: 0,System,Match IDs,Not Match IDs,Total IDs
0,NAPIC,3,1,4
1,BSH-BPN,3,1,4
2,EPF,23,1,24
3,BR1M,5,1,6
4,HIES,11,3,14
5,CCRIS,18,1,19
6,DP,79,4,83
7,ESSP,48,4,52
8,FEMS,84,1,85
9,FEMS_MACRO1,63,0,63


In [2]:
# check the number of matched ids
# this should be the same as the number of rows in the the `automata_execution` output
df['Match IDs'].sum()

3964

In [5]:
# export the report summary for reference
#df.to_excel('report.xlsx', index=False)

## 1.3 Inconsistency With `fix_inconsistency`

## 1.4 SystemCubeChecker With `system_cube_update`

## 1.5 BatchExport With `batch_export`

In [2]:
import pandas as pd
from blueautomata.batch_export import BatchExport

df = pd.read_excel('data/automata_output.xlsx')

temp = BatchExport(destination = r"C:\Users\Khor Kean Teng\Downloads\AUP Automata\data\dept", masterlist = df)
temp.batch_export()

Export Completed


## 1.6 AutomateVBA With `templatetize`

In [5]:
from blueautomata.to_vba import automate_vba
temp = automate_vba(
    filepath=r'C:\\Users\\Khor Kean Teng\\Downloads\\AUP Automata\\vbanew.xlsm',
    macro = 'vbanew.xlsm!Module1.kt_template' 
)
temp.templatetize()

# 2. Fake Data Pilot Experiment

In [3]:
test = BlueAutomata(
    folder_path=r"C:\Users\Khor Kean Teng\Downloads\AUP Automata\data\fakesystem",
    checklist = 'data/checklist.xlsx',
    staff_data = 'data/fake_hr_data.xlsx',
    name_key= ['BSE', 'HKEX', 'KLSE', 'LSE1' 'NASDAQ', 'NYSE', 'SGX', 'SSE', 'TSE'],
    name_code= [1, 1, 1, 1, 1, 1, 1, 1, 1],
)
df = test.automata_execution()
df.head()

Unnamed: 0,Department,Dept,User ID,Name,System1,Cube
0,Google,GOOG,ewbzm6yv67w33mxy9,Christopher Bruce,BSE,
1,Apple,AAPL,wrzzdur66uzwvb0dh,Michael Perez,BSE,
2,Apple,AAPL,aurg7k1rxgmnsylsv,Stephanie Carr,BSE,
3,Apple,AAPL,np81yvtz4zcum4h39,Julia Vargas,BSE,
4,Apple,AAPL,99ttcx5t43r1rrwr1,Michelle Walker,BSE,


In [4]:
df.shape

(2652, 6)