# Call Azure ML for PY Framework web service - Payroll Anomaly Detection

### Imports

In [None]:
import json
import requests
from ngamlfpy.utils import pulled_df_to_json 
from ngamlfpy.hrxmlconfig import MLModelConfig
from ngamlfpy.pipeline import FileFinder
import os

### Settings

In [None]:
web_service_url = "http://b03d85f2-b070-497e-a0f8-03ec2315c0af.westus.azurecontainer.io/score"

### Parameters

In [None]:
ml_service = 'PAD'
model_code =  'A001' 
model_version = '001' 
use_csv_input = True
base_folder = './data'
input_file_name= 'PAD_A001_001_WDY_ALT_TST_ALTBM_WDT_000_sm_202003_input.csv'  #<== csv input file to use for predictions
num_emps_to_clip = 10  # <== Restrict to this number of employees to use from input file. (-1 = use all emps in file) 
save_csv_file_as_json = True

### Find input file in relevant data folder

In [None]:
finder = FileFinder(ml_service, use_model_name_in=True, use_model_name_out=False, model_name=model_code, model_version = model_version,
                         base_folder=base_folder, relative_input_folder=FileFinder.MLFOLDER_INPUT,
                         relative_output_folder=FileFinder.MLFOLDER_PREDICT)

full_path_in =  finder.get_full_input_file_name(input_file_name)  

try:
    print('Reading: ',full_path_in)
    df = pd.read_csv(full_path_in)
    display(df.head())

    _,_,_,payroll_service, gcc, lcc, group, system, rest = finder.parse_input_file_name(input_file_name,include_remainder=True)
    client, abkrs, period, other = rest.split('_')
    ml_config = MLModelConfig.get_model_config_from_web_service_for_cust(ml_service, system=system,gcc=gcc,lcc=lcc,payroll_area=abkrs)
except:
    print('Error - file read not successful: ',full_path_in)


### Convert input csv file to json ready for posting to prediction web service

In [None]:
if use_csv_input:
    print('Using csv input')
    j_predict = pulled_df_to_json(df,ml_config,period,use_first_data_line_as_selection=True,use_value_title_format=True,clip_emps = num_emps_to_clip)

    raw_data = {}
    raw_data['data'] = j_predict

    if save_csv_file_as_json:
       predict_json_file_name =  input_file_name.split('.')[0] + '.json'
       full_path_json = os.path.join(finder.get_output_folder(),predict_json_file_name)
       print('Writing json predict in file: ', full_path_json)
       with open(full_path_json, 'w') as outfile:
           json.dump(raw_data, outfile, indent=4)
           
            
else:        
    import pprint
    input_json_file_name = '' # <-- supply json file name here (including path)
    print ('Using Json predict file: ',input_json_file_name)

    with open(input_json_file_name) as json_data:
            j_predict = json.load(json_data)

print('Num emps: ',len(j_predict['values']))
print('')
print('First emp: ',j_predict['values'][0])
print('')
print('Selection: ',j_predict['selection'])
print('')     

### Prepare headers

In [None]:
headers = {"Content-Type":"application/json"}
test_samples = json.dumps({'data':j_predict})
test_samples = bytes(test_samples, encoding='utf8')        
        
print (json.dumps(j_predict['selection'], sort_keys=False, indent=4, separators=(',', ': ')))

# Call Prediction Web Service

In [None]:
response = requests.post(web_service_url, test_samples, headers=headers)  

if response.status_code == 200:
    json_predictions = response.json()
    print('Prediction Web Service call successful - ' + str(len(json_predictions['Predictions'])) + ' emps processed' + ' Model used: ' + json_predictions['info']['azure_model_name'] + ' Model config ws status: ' + json_predictions['info']['config_web_service_call_status'])

    print ('First 5 anomaly predictions: ')
    for pred in json_predictions['Predictions'][:5]:
        print('   Anomaly Score: ' + str(pred))
else:
    print('web service failed. Status: ' + str(response.status_code) + ' message: ' +  str(response.text))

In [None]:
json_predictions['info']

In [None]:
json_predictions['selection']

In [None]:
json_predictions['Predictions'][:5]

## Write json predictions to output file

In [None]:
predict_json_output_file_name = os.path.split(full_path_in)[-1]
file_name_parts = predict_json_output_file_name.split('_')
if file_name_parts[-1] == 'input.csv':
    file_name_parts[-1] = 'predictions'
else:
    file_name_parts.append('predictions')
predict_json_output_file_name = '_'.join(file_name_parts)  + '.json'
full_path_json_out = os.path.join(finder.get_output_folder(),predict_json_output_file_name)
print('writing prediction out: ',full_path_json_out)
with open(full_path_json_out, 'w') as outfile:
    json.dump(json_predictions, outfile, indent=4)

### Analyse predictions

In [None]:
scores = [x['Score'] for x in json_predictions['Predictions']]
scores = scores[:num_emps_to_clip]
scores[:2]

In [None]:
important_features = [x['Important_Features'] for x in json_predictions['Predictions']]
important_features = important_features[:num_emps_to_clip]
important_features[:2]

In [None]:
df_clipped = df[:num_emps_to_clip].copy()

In [None]:
df_clipped['Score'] = scores
df_clipped.head()

In [None]:
df_features = pd.DataFrame(important_features,columns=['LF1','LF2','LF3'])
df_clipped = df_clipped.join(df_features)
df_clipped.head()

### Show input file with anomaly scores and important features appended (sorted by anomaly score)

In [None]:
df_clipped_sorted = df_clipped.sort_values(by=['Score'])
df_clipped_sorted.head()