# Implementation

## Packages

In [75]:
# data elaboration functions
import pandas as pd
from six.moves import collections_abc
import string
import numpy as np

# datetime functions
import datetime as dt

# file management functions
import os
import sys
import opendatasets as od
import pickle
from pathlib import Path

# plot functions
import matplotlib.pyplot as plt
%matplotlib inline

# data science functions
import matplotlib.pyplot as plt
from kneed import KneeLocator
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler, scale
from sklearn.metrics import mean_absolute_error
import joblib
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
import xgboost as xgb

# statistical functions
from scipy.stats.mstats import winsorize

# configuration file
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# custom functions
from Code.Profiling.Intermittent.intermittent import Intermittent
from Code.Utils.utils import Utils
from Code.Scoring.kpi import Kpi
from Code.Scoring.forecast import Forecasting
from Code.Scoring.train import Training
from Code.Scoring.train_test import TrainTest
from Code.Scoring.scoring import Scoring
from Code.Regressors.regressors import Regressors
from Code.Plotting.plots import Plots
from Configuration.config import cfg_path

## Setup

In [76]:
root = Path(os.getcwd()).parent

## Load Data

In [77]:
df_final = pd.read_pickle(os.path.join(
    root, cfg_path.data_dir.output_path, 'insurance_claims_final.pkl'))
df_final.head()

Unnamed: 0,ICD10Description,DateOfAccident,RmaRegionDesc,Product,Sum of PaidDaysValue
0,Amputation_of_finger,1982-10-24,Witbank,IOD Workmans Policy,0.0
1,Amputation_of_finger,1982-10-25,,,0.0
2,Amputation_of_finger,1982-10-26,,,0.0
3,Amputation_of_finger,1982-10-27,,,0.0
4,Amputation_of_finger,1982-10-28,,,0.0


## Parameter setup

In [78]:
id = 'ICD10Description'
list_unique_id = ['ICD10Description	', 'DateOfAccident']
list_temp = ['']
y = 'Sum of PaidDaysValue'
date_var = Utils.find_date(df_final)

find_date, date_col found: ['DateOfAccident']


## Working dataframe

In [79]:
# Defining working dataframe by removing potential null values in y variable
df = df_final.loc[~df_final[y].isnull(),].copy()
print('Id available', list(df[id].unique()))

Id available ['Amputation_of_finger', 'Fracture_of_finger', 'Fatal_due_to_Accident_Any_', 'Noise_Induced_Hearing_Loss', 'Open_wound_head', 'Contusion_of_finger_s_thumb', 'Foreign_body_cornea', 'Sprain_and_strain_of_cervical_spine', 'Other_respiratory_conditions', 'Contusion_of_knee', 'Contusion_of_shoulder_upper_arm', 'Sprain_collateral_ligament_of_knee', 'Open_wound_other_parts_of_wrist_hand', 'Superficial_injuries_of_abdomen_lower_back_pelvis', 'Sprain_and_strain_of_ankle', 'Open_wound_other_parts_lower_leg', 'Superficial_injury_of_other_parts_of_head', 'Contusion_of_ankle', 'Open_wound_of_finger_s_thumb', 'Sprain_and_strain_of_lumbar_spine', 'Barotrauma_Otitic', 'Contusion_of_elbow', 'Contusion_other_parts_of_wrist_hand', 'Fracture_of_thumb', 'Open_wound_of_knee', 'Open_wound_of_back_wall_of_thorax', 'Superficial_injury_of_wrist_hand', 'Fracture_of_shaft_of_tibia', 'Contusion_of_lower_back_and_pelvis', 'Contusion_other_parts_of_foot', 'Superficial_injury_of_lip_oral_cavity', 'Open_w

# Compute intermittent indicators

In [80]:
# Winsorizing parameters
highest = 0.05
lowest = 0.05

# Identifying intermittent time series parameters
perc = 0.10
quant = 0.90

### Choose indicator function

In [81]:
# Applying indicator function ('compute_indicator_values' or 'enhanced_compute_indicator_values')
func = 'enhanced_compute_indicator_values'
df_scoring = df.loc[:, [id, y]].groupby(id).apply(lambda x: Intermittent.call_intermittent_function(func, np.array(x[y]), threshold, perc, quant, highest, lowest)).reset_index(level=id)

## Profiling intermittent time series

In [89]:
# Excluding the ids for which the indicators are np.NaN
df_scoring_no_nan = df_scoring.dropna()
list_nan = list(set(df_scoring[id]) - set(df_scoring_no_nan[id]))
print('List of nan ids', list_nan)

# Classify intermittent TS -> these parameters need to be adjusted to each type of data you are dealing with
thres_cv2_constant = 0.01
thres_cv2 = 0.2
thres_adi = 1.2
thres_sddi = 6.0
min_time_cons = 25
type = df_scoring_no_nan['type'].unique()[0]

df_profiling = Intermittent.classify_intermittent(df_scoring_no_nan, type, thres_cv2_constant, thres_cv2, thres_adi, thres_sddi, min_time_cons)

# Dictionary
list_of_profiles = ['regular', 'constant_zero', 'constant', 'spikes',
                    'lumpy', 'erratic', 'unforecastable_time', 'unforecastable_quantity']
dict_profiling = {}
for c in list_of_profiles:
    dict_profiling[c] = list(
        df_profiling.loc[df_profiling.profile == c, id].unique())
    print(id, c, dict_profiling[c])


List of nan ids ['Blast_injury_syndrome', 'Effects_of_electric_current', 'Open_wound_forearm_Injury_of_muscle_and_tendons', 'Needle_stick_injuries', 'Heat_cramp', 'Open_wound_forearm_Open_wound_of_finger_s_thumb', 'Open_wound_of_eyelid_periocular_area_Contusion_of_eyeball_orbital_tissues_Open_wound_of_finger_s_thumb', 'Burn_of_trunk_Burn_of_shoulder_upper_limb', 'Contusion_unspecified_parts_forearm_Contusion_of_lower_back_and_pelvis', 'Superficial_injury_of_other_parts_of_head_Open_wound_of_lip_oral_cavity', 'Foreign_body_in_ear_auditory_canal_Barotrauma_Otitic', 'Contusion_of_throat_Contusion_of_elbow', 'Dermatitis', 'Open_wound_of_neck_part_unspecified_Contusion_of_lower_back_and_pelvis', 'Contusion_of_eyeball_orbital_tissues_Crushing_injury_of_face', 'Superficial_injury_of_lower_leg_Open_wound_head_Contusion_of_hip', 'Superficial_injury_of_forearm_unspecified_Contusion_other_parts_of_foot', 'Open_wound_of_wrist_and_hand_Nitrogen_oxides', 'Open_wound_of_lower_back_pelvis_Open_wound_o

### Plotting profiled series

In [90]:
for profile in list_of_profiles:
    df_to_plot = df.loc[df[id].isin(list(dict_profiling[profile])), ].copy()
    count = 1
    for i in list(df_to_plot[id].unique()):
        print('Plotting id:', i, 'as', count, 'of',
              len(list(df_to_plot[id].unique())))
        plot = Plots.sliding_line_plot(df_to_plot, y, id, i, chart_title="")
        folder_name = os.path.join(root, cfg_path.data_dir.plot_path, profile)
        Utils.create_folder_tree(folder_name)
        plot.write_html(os.path.join(folder_name, id + '_' + str(i) + '_profile_' + profile + ".html"))
        count = count + 1


Plotting id: Sprain_and_strain_of_cervical_spine as 1 of 31
find_date, date_col found: ['DateOfAccident']
sliding_line_plot: plotting Sum of paiddaysvalue ICD10Description Sprain_and_strain_of_cervical_spine
Creation of the directory failed or already present c:\Users\mabellani\OneDrive - Microsoft\Documents\REPOSITORY\dstoolkit-forecasting\Data/Plots\constant_zero
Plotting id: Other_respiratory_conditions as 2 of 31
find_date, date_col found: ['DateOfAccident']
sliding_line_plot: plotting Sum of paiddaysvalue ICD10Description Other_respiratory_conditions
Creation of the directory failed or already present c:\Users\mabellani\OneDrive - Microsoft\Documents\REPOSITORY\dstoolkit-forecasting\Data/Plots\constant_zero
Plotting id: Contusion_of_elbow as 3 of 31
find_date, date_col found: ['DateOfAccident']
sliding_line_plot: plotting Sum of paiddaysvalue ICD10Description Contusion_of_elbow
Creation of the directory failed or already present c:\Users\mabellani\OneDrive - Microsoft\Documents\RE

# Saving

In [61]:
# create a binary pickle file 
f = open(os.path.join(root, cfg_path.data_dir.output_path, 'dict_profiling.pkl'),"wb")
# write the python object (dict) to pickle file
pickle.dump(dict_profiling,f)
# close file
f.close()