[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/kili-technology/automl/blob/feature/ml-345-mapper/notebooks/Tuto_Mapper.ipynb)

# Create Mapper from Kili Dataset

## Requirements

In [None]:
import os
from getpass import getpass
import numpy as np
from tqdm import tqdm

In [None]:
!git clone --branch feature/ml-345-mapper https://$GITHUB_TOKEN@github.com/kili-technology/automl.git

In [None]:
%cd /content/automl
!pip install -r requirements.txt
!pip install -e .

In [None]:
from kili.client import Kili

In [None]:
kili_api_key = getpass('Kili API Key: ')
os.environ["KILI_API_KEY"] =  kili_api_key
api_key = os.environ["KILI_API_KEY"]

kili = Kili(api_key=api_key)

## (Optional) Create Project in Kili

In [None]:
json_interface = {
    "jobRendererWidth": 0.2,
    "jobs": {
        "CLASSIFICATION_JOB": {
            "mlTask": "CLASSIFICATION",
            "content": {
                "categories": {
                    "VEHICLE": {
                        "name": "vehicle"
                    },
                    "NON_VEHICLE": {
                        "name": "non vehicle"
                    }
                },
                "input": "radio"
            },
            "required": 0,
            "isChild": False,
            "instruction": "Class of vehicle presence"
        }
    }
}

In [None]:
project_id = kili.create_project(
        title="Vehicle Classification",
        description="Classify vehicle presence",
        input_type="IMAGE",
        json_interface=json_interface
)["id"]

Add assets

In [None]:
vehicle_assets = [
    {
        "externalId": f"{i}",
        "content": f"https://storage.googleapis.com/kili-machine-learning-automl/notebooks/vehicle_classification/vehicles/image_{i}.png",
        "metadata": {}
    }
    for i in range(500)
]
non_vehicle_assets = [
    {
        "externalId": f"{len(vehicle_assets) + i}",
        "content": f"https://storage.googleapis.com/kili-machine-learning-automl/notebooks/vehicle_classification/non-vehicles/image_{i}.png",
        "metadata": {}
    }
    for i in range(500)
]
assets_to_import = vehicle_assets + non_vehicle_assets

In [None]:
external_id_array = [a.get("externalId") for a in assets_to_import]
content_array = [a.get("content") for a in assets_to_import]
json_metadata_array = [a.get("metadata") for a in assets_to_import]
kili.append_many_to_dataset(project_id=project_id, 
                            content_array=content_array,
                            external_id_array=external_id_array, 
                            json_metadata_array=json_metadata_array)

Add labels

In [None]:
asset_ids = kili.assets(project_id=project_id, fields=["id", "externalId"], first=1000)

for asset_id in tqdm(asset_ids):
    external_id = int(asset_id["externalId"])
    if external_id < 500:
        kili.append_to_labels(label_asset_id=asset_id["id"],
                              json_response={
                                  "CLASSIFICATION_JOB": {
                                      "categories": [{"name": "VEHICLE"}]
                                  }
                              })

    else:
        kili.append_to_labels(label_asset_id=asset_id["id"],
                              json_response={
                                  "CLASSIFICATION_JOB": {
                                      "categories": [{"name": "NON_VEHICLE"}]
                                  }
                              })

## (Optional) Train a model with KiliAutoML and Create predictions

In [None]:
!PYTHONPATH=$(pwd) kiliautoml train --project-id $project_id  --epochs 10 --verbose 4

In [None]:
from commands.predict import predict_one_job
from kiliautoml.utils.helpers import get_project, get_assets

In [None]:
api_endpoint="https://cloud.kili-technology.com/api/label/v2/graphql"
from_model=None
batch_size=10
model_repository=None
model_name=None
model_framework="pytorch"
from_project=None
clear_dataset_cache=False

input_type, jobs, _ = get_project(kili, project_id)

for job_name, job in jobs.items():
  print(job_name)

content_input = job.get("content", {}).get("input")
ml_task = job.get("mlTask")
tools = job.get("tools")

assets = get_assets(
                kili,
                project_id,
                status_in=None,
                randomize=False
            )

job_predictions = predict_one_job(
                    api_key=api_key,
                    api_endpoint=api_endpoint,
                    project_id=project_id,
                    from_model=from_model,
                    verbose=4,
                    job=job,
                    input_type=input_type,
                    assets=assets,
                    batch_size=batch_size,
                    job_name=job_name,
                    content_input=content_input,
                    model_repository=model_repository,
                    model_name=model_name,
                    model_framework=model_framework,
                    from_project=from_project,
                    ml_task=ml_task,
                    tools=tools,
                    clear_dataset_cache=clear_dataset_cache,
                )

In [None]:
data_path = '/root/.cache/kili/automl/' + project_id + '/CLASSIFICATION_JOB/torchvision/data'
!cp -R $data_path '/content/assets'

In [None]:
np.savetxt('/content/predictions.csv', job_predictions.predictions_probability, delimiter=",")

In [None]:
%cd /content

## Create Mapper with AutoML

Mapper argument: 

--api-endpoint, --api-key, --project-id , --target-job, --max-assets,

--assets-repository : Required, where to store downloaded assets.

--asset-status-in: if None or TODO or ONGOING included, mapper.py will not use labels in the asset assignment.

--cv-folds: Number of cv-folds used to compute predictions. Used only if not (None or TODO or ONGOING included in asset-status-in). 

--focus-class: only use assets with labels or predictions inside focus-class.

In [None]:
!python automl/mapper.py --project-id $project_id  --assets-repository /content/assets --predictions-path /content/predictions.csv