# Import packages

In [None]:
from tarandm_analytics.export_predictive_model.create_predictive_model import ExportPredictiveModel
from tarandm_analytics.attribute_evaluator.evaluate_attributes import EvaluateAttributes

# Prepare dataset
Use attribute evaluator to prepare dataset.

In [None]:
ea = EvaluateAttributes(
    endpoint_url="http://127.0.0.1:8086",
    username="",
    password=""
)

In [None]:
ea.check_evaluation_progress()

## List available attribute classes

In [None]:
attribute_classes = ea.get_attribute_classes()

print('Available attribute classes:\n')
for attr_class, attrs in attribute_classes.items():
    print(f"{attr_class}:")
    for attr in attrs:
        print(f"\t{attr}")

## List available business classes

In [None]:
business_cases = ea.get_business_cases()

print('Available business cases:\n')
for business_case, meta in business_cases.items():
    input_class = meta.get("input_class")
    audience = '; '.join(meta['audiences'])
    print(f"{business_case}:\n\tinput_data: {input_class}\n\taudience: {audience}")

## Evaluate attributes of selected attribute classes

In [None]:
# decision_ids = [
#     '77498d97-5f34-41dc-bd22-7c0a1b3d435f',
#     'fc24bf34-6aa8-4370-8330-a25f517a1b4c',
# ]

ea.evaluate(
    date_from="2023-01-01",
    date_to="2023-08-07",
    attribute_classes=[
        "repository.strategies.attributes.attributes_complex_strategy.AttributesComplexStrategyCBData",
        "repository.strategies.attributes.attributes_complex_strategy.AttributesComplexStrategyInputDataDummy"
    ],
    input_data_class="repository.interface.interface.InputDataDummy",
    business_case="Complex integration test",
    repository="https://gitlab.develop.demo.tarandm.com/tarandm/strategies.git",
    git_user_name = "",
    git_user_email = "...",
    git_user_token = "..."
)

## Check data in DB
Evaluated attributes can be checked directly in DB - following will generate SQL commad to get the data.

In [None]:
print(ea.generate_query_to_extract_attributes())

## Fetch dataset from DB

In [None]:
ea.last_attribute_extractor_id['id']

In [None]:
df = ea.fetch_data_from_db(attribute_extractor_id=ea.last_attribute_extractor_id['id'])
df

In [None]:
ea.check_evaluation_progress()

In [None]:
ea.last_attribute_extractor_id
ea.attribute_extractor_ids

# Create model

In [None]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
import pandas as pd
import xgboost as xgb

## Prepare dataset
For model export we will create artificial dataset. Dataset from attribute evaluator is small to achive fast response.

In [None]:
data, target = make_classification(n_samples=50000, n_features=10, random_state=12, n_informative=3, n_redundant=1, flip_y=0.2)

In [None]:
predictors = ['age', 'avg_monthly_income', 'max_days_past_due', 'cnt_rejected', 'selected_amount', 'total_debt', 'debt_to_income', 'cb_score', 'social_circle_score', 'telco_score']

data = pd.DataFrame(data, columns=predictors)
data['target'] = target

In [None]:
data.head()

## Split data

In [None]:
data_train, data_rest = train_test_split(data, test_size=0.4, stratify=data[['target']])
data_test, data_valid = train_test_split(data_rest, test_size=0.5, stratify=data_rest[['target']])

data_train['sample'] = 'train'
data_test['sample'] = 'test'
data_valid['sample'] = 'valid'

data = pd.concat([data_train, data_test, data_valid])

train_mask = data['sample'] == 'train'
test_mask = data['sample'] == 'test'
valid_mask = data['sample'] == 'valid'

## Train model

### XGBoost

In [None]:
params = {
    'objective': 'binary:logistic',
    'eval_metric': 'auc',
    
    'max_depth': 3,
    'eta': 0.3
}

evals_result = {}
booster = xgb.train(
    params=params,
    dtrain=xgb.DMatrix(data[train_mask][predictors], data[train_mask]["target"]),
    num_boost_round=1000,
    evals=(
        (xgb.DMatrix(data[train_mask][predictors], data[train_mask]["target"]), "train"),
        (xgb.DMatrix(data[valid_mask][predictors], data[valid_mask]["target"]), "valid"),
        (xgb.DMatrix(data[test_mask][predictors], data[test_mask]["target"]), "test"),
    ),
    early_stopping_rounds=10,
    evals_result=evals_result
)

In [None]:
data['predicted_pd'] = booster.predict(xgb.DMatrix(data[predictors]))

In [None]:
model = booster
data = data
attributes = predictors

import shap
import matplotlib.pyplot as plt

explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(data[attributes])
shap.summary_plot(shap_values, data[attributes], max_display=15, show=False, )

# ax = plt.gca()
# if ax.get_legend():
#     ax.get_legend().remove()

### Neural Network
Testing ONNX for neural networks implementation.

In [None]:
# import torch
# from torch import nn, tensor
# import math

In [None]:
# predictors

In [None]:
# N_INPUTS = len(predictors)
# device = "cpu"

# class NeuralNetwork(nn.Module):    
#     def __init__(self):
#         super().__init__()
#         self.nn_architecture = nn.Sequential(
#             nn.Linear(N_INPUTS, 20),
#             nn.ReLU(),
#             nn.Linear(20, 20),
#             nn.ReLU(),
#             nn.Linear(20, 1),
#             nn.Sigmoid()
#         )
    
#     def forward(self, x):
#         return self.nn_architecture(x)

In [None]:
# model = NeuralNetwork().to(device)

# loss_fn = nn.BCELoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [None]:
# def train(data, model, loss_fn, optimizer, batch_size):
#     size = len(data)
    
#     model.train()
#     for i in range(0, math.ceil(size / batch_size)):
#         batch_x = tensor(data.iloc[i*batch_size : (i+1)*batch_size][predictors].values, dtype=torch.float)
#         batch_y = tensor(data.iloc[i*batch_size : (i+1)*batch_size][['target']].values, dtype=torch.float)
        
#         X, y = batch_x.to(device), batch_y.to(device)

#         # Compute prediction error
#         pred = model(X)
#         loss = loss_fn(pred, y)

#         # Backpropagation
#         loss.backward()
#         optimizer.step()
#         optimizer.zero_grad()

#         if i % 100 == 0:
#             loss, current = loss.item(), (i + 1) * len(X)
#             print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
            
# # def test(data, model, loss_fn):
# #     size = len(data)
# #     # num_batches = len(dataloader)
# #     model.eval()
# #     test_loss, correct = 0, 0
# #     with torch.no_grad():
# #         for X, y in dataloader:
# #             X, y = X.to(device), y.to(device)
# #             pred = model(X)
# #             test_loss += loss_fn(pred, y).item()
# #             correct += (pred.argmax(1) == y).type(torch.float).sum().item()
# #     test_loss /= num_batches
# #     correct /= size
# #     print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [None]:
# epochs = 2
# for t in range(epochs):
#     print(f"Epoch {t+1}\n-------------------------------")
#     train(data, model, loss_fn, optimizer, batch_size=64)
#     # test(test_dataloader, model, loss_fn)
# print("Done!")

In [None]:
# torch.onnx.export(
#     model, tensor(data[predictors].values, dtype=torch.float), "test.onnx", verbose=True, 
#     # input_names=predictors, output_names=['target']
# )

In [None]:
# torch.onnx.dynamo_export(model, tensor(data[predictors].values, dtype=torch.float))

In [None]:
# torch.__version__

# Export model

In [None]:
epm = ExportPredictiveModel(
    endpoint_url="http://127.0.0.1:8086",
    username="",
    password=""
)

In [None]:
monitoring_data = epm.get_monitoring_data(
    data=data,
    label_name="target",
    attributes=predictors,
    model_output_name="predicted_pd"
)

In [None]:
pred_descr = {
    "age": "Client's age."
}

request_data, images = epm.prepare_predictive_model_data(
    model_name="tarandm_xgboost",
    model=booster,
    attributes=predictors,
    label_name="target",
    target_class="1",
    hyperparameters=params,
    monitoring_data=monitoring_data,    
    attribute_description=pred_descr,
    data=data,
    column_name_sample="sample",
    column_name_date="date_decision",
    column_name_prediction="predicted_pd",
    evaluate_performance={"target": ["AUC"]},
    learning_curves_data=evals_result
)

In [None]:
epm.build_predictive_model(request_data=request_data, images=images, filename="tarandm_xgboost.zip")

# Commit model and create merge request

In [None]:
from tarandm_analytics.export_predictive_model.upload_model_to_gitlab import create_merge_request
import getpass

In [None]:
# user setup
repository_url = "https://gitlab.develop.demo.tarandm.com/tarandm/strategies.git"
gitlab_url = "https://gitlab.develop.demo.tarandm.com"
environment = "develop"
git_user_name = 'kkozmik'
git_user_token = getpass.getpass(prompt='Enter your git token: ')
cloned_repo_dir = 'tarandm_strategies_working_copy'

In [None]:
create_merge_request(
    gitlab_url=gitlab_url,
    repository_url=repository_url,
    environment=environment,
    git_user_name=git_user_name,
    git_user_token=git_user_token,
    cloned_repo_dir=cloned_repo_dir,
    request_data=request_data,
    images=images,
    epm=epm,
)