# This notebook gives a demo to load the binary classifier models built for predicting the success of an LP Attempt

### Loading the binary_models.yaml file

In [None]:
from os import truncate
import sys
import pandas as pd
from pathlib import Path
import pandas as pd
import joblib
from pathlib import Path
import os
import yaml
import h3

path_root = Path(os.path.abspath('')).parents[0] / 'lib'
sys.path.append(str(path_root))
from ds_shared.service_level import unify_service_levels, lp_sla_time_in_minutes

# Define the path where models are stored
MODEL_FOLDER = Path().resolve().parents[0] / 'output_models'

# Define the file path
yaml_file_path = MODEL_FOLDER / "binary_models.yaml"

# Read from a YAML file
with open(yaml_file_path, 'r') as file:
    yaml_data = yaml.safe_load(file)

best_resolution = yaml_data['geospatial_resolution']

predictors = {}
for model_num in range(4):
    predictors[model_num] =  yaml_data['feature_sets'][f'set{model_num}']

### Loading all the models

In [None]:
# Number of models
n_models = len(predictors)
all_models = {}

# Load the models
for i in range(n_models):
    fname = 'model_set' + str(i) + '.joblib'
    all_models[i] = joblib.load(MODEL_FOLDER / fname)
    print(f"Loaded models from: {MODEL_FOLDER / fname}")

# Loading the past month's data to update driver features

In [None]:
import sys
print("Extracting data!!")
!{sys.executable} ../data_extraction/sf-extract-attempt-data-evenly-by-sla.py --capture_days 30 --folder_name 'past_month' --sample_rate 10

print("Combining CSVs!!")
!{sys.executable} ../data_extraction/combine_csvs.py --folder_name 'past_month'

In [None]:
# Loading the pricing data
DATA = Path().resolve().parents[0] / 'past_month'
PRICING_SET = DATA / 'attempt_success_prediction_final_mi.csv'
df_month = pd.read_csv(PRICING_SET, dtype={7: str})
df_month = df_month.sort_values(by='dispatchedOn').reset_index(drop=True)
df_month = df_month.drop_duplicates(subset=['attemptId'], keep=False)

df_month['attemptSuccess'] = df_month.apply(lambda x: 1 if x.attemptStatus == 'DELIVERED' else 0, axis=1)
df_month['dispatchedOn'] = pd.to_datetime(df_month['dispatchedOn'])

df_month, all_sls = unify_service_levels(df_month)
# Including only the rows with serviceLevels in the desired modes
df_month = df_month[df_month['serviceLevel'].isin(lp_sla_time_in_minutes.keys())]

# Adding the geospatial feature
df_month['h3_zone'] = df_month.apply(lambda row: h3.latlng_to_cell(row.fromLat, row.fromLon, best_resolution), axis=1).astype('category')

In [None]:
past_1mo_stats = df_month.groupby('lpOrganizationId').agg(
    past_1mo_attempts=('attemptSuccess', 'size'),
    past_1mo_success=('attemptSuccess', 'sum')
)

In [None]:
df_month.columns

# Testing on sample raw data

In [None]:
# Sample test data
path_root = Path(os.path.abspath('')).parents[0] / 'lib'
sys.path.append(str(path_root))
from ds_shared.service_level import unify_service_levels

# Create a new row as a dictionary
raw_data = {'distanceMi': 50, 'weightLbsTotal': 0, 'largestDimIn': 0, 'secondDimIn': 0, 'thirdDimIn': 0, 'largestWeightLbs': 0, 'serviceLevel': '1 Minute', 'fromLat': 32, 'fromLon': -90, 'lpOrganizationId':'16b6c714-f8d7-4a81-9ac2-a3938f3450c2'}

X_test = pd.DataFrame([raw_data])
X_test, all_sls = unify_service_levels(X_test)

#X_test['serviceLevel'].isin(lp_sla_time_in_minutes.keys())

X_test['h3_zone'] = X_test.apply(lambda x: h3.latlng_to_cell(x.fromLat, x.fromLon, best_resolution), axis=1).astype('category')
X_test['past_1mo_total_attempts'] = X_test.apply(lambda x: past_1mo_stats.past_1mo_attempts.get(x.lpOrganizationId, 0), axis=1)
X_test['past_1mo_successes'] = X_test.apply(lambda x: past_1mo_stats.past_1mo_success.get(x.lpOrganizationId, 0), axis=1)


# This is the check for a single instance, do modify when you have multiple rows
if(X_test['serviceLevel'].isin(lp_sla_time_in_minutes.keys()).loc[0]):
    if(('secondDimIn' in X_test.columns) and ('thirdDimIn' in X_test.columns)):
        model_num = 3
    else:
        model_num = 1
else:
    if(('secondDimIn' in X_test.columns) and ('thirdDimIn' in X_test.columns)):
        model_num = 2
    else:
        model_num = 0

print('For teh above set of features using model#{} and its prediction is {}'.format(model_num, all_models[model_num].predict(X_test[predictors[model_num]])))