In [None]:
# On importe la librairie bigml
from bigml.api import BigML

In [None]:
# On se connecte à bigml
api = BigML(project='project/5d94a32e42129f2e16000232')

In [None]:
# On crée une source à partir du csv
source_train = api.create_source('storage/source_dataset_train_full.csv')
source_test = api.create_source('storage/source_dataset_test.csv')
api.ok(source_test)

In [None]:
# On crée un dataset à partir de la source puis on split (DATASET de TRAIN)
dataset_train_full = api.create_dataset(source_train, {"name": "Dataset Train Full"})
dataset_train_train = api.create_dataset(dataset_train_full, {"name": "Dataset Train Train", "sample_rate": 0.8, "seed": "my seed"})
dataset_train_test = api.create_dataset(dataset_train_full, {"name": "Dataset Train Test", "sample_rate": 0.8 , "seed": "my seed", "out_of_bag": True})

# On crée un dataset à partir de la source (DATASET de TEST)
dataset_test = api.create_dataset(source_test, {"name": "Dataset Test"})
api.ok(dataset_test)

In [None]:
# On lance un ensemble
ensemble = api.create_ensemble(dataset_train_train, {"objective_field" : "SeriousDlqin2yrs", "name": "Ensemble"})
ensemble_full = api.create_ensemble(dataset_train_full, {"objective_field" : "SeriousDlqin2yrs", "name": "Ensemble Full"})

In [None]:
# On lance un batch sur le dataset TEST
batch_prediction = api.create_batch_prediction(ensemble_full, dataset_test, {"name": "Batch Prediction", "output_fields": ["Id"], "probabilities": True})
# On vérifie le bon fonctionnement du batch
api.ok(batch_prediction)

In [None]:
# On lance un batch sur le dataset Train de validation
batch_prediction_all_fields = api.create_batch_prediction(ensemble, dataset_train_test, {"name": "Batch Prediction All Fields", "prediction_name" : "SeriousDlqin2yrs_Predic", "all_fields": True, "probabilities": True,})
# On vérifie le bon fonctionnement du batch
api.ok(batch_prediction_all_fields)

In [None]:
# On récupère le ROC AUC via une évaluation
evaluation = api.create_evaluation(ensemble, dataset_train_test)
api.ok(evaluation)

In [None]:
# On récupère le ROC AUC via une évaluation
evaluation_full = api.create_evaluation(ensemble_full, dataset_train_test)
api.ok(evaluation_full)

In [None]:
# On affiche l'AUC depuis l'évaluation
#api.pprint(evaluation['object']['result'])
AUC = evaluation['object']['result']['model']['average_area_under_roc_curve']
print(f" AUC = {AUC}")

AUC = evaluation_full['object']['result']['model']['average_area_under_roc_curve']
print(f" AUC = {AUC}")

In [None]:
# On télécharge les csv du batch et des datasets ainsi que le json de l'évaluation
api.download_batch_prediction(batch_prediction, filename='storage/batch_prediction.csv')
api.download_batch_prediction(batch_prediction_all_fields, filename='storage/batch_prediction_all_fields.csv')
api.download_dataset(dataset_train_full, filename='storage/dataset_train_full.csv')
api.download_dataset(dataset_train_train, filename='storage/dataset_train_train.csv')
api.download_dataset(dataset_train_test, filename='storage/dataset_train_test.csv')
api.download_dataset(dataset_test, filename='storage/dataset_test.csv')
api.export(evaluation, filename='storage/my_evaluation.json')