In [1]:
"""
Get the Lightly API token and save it here.
For the full docs see https://docs.lightly.ai/docs/install-lightly#api-token
"""

lightly_token = "CHANGE_ME"

import os
os.environ["LIGHTLY_TOKEN"] = lightly_token

!echo "LIGHTLY_TOKEN =" $LIGHTLY_TOKEN

LIGHTLY_TOKEN = CHANGE_ME


In [2]:
"""
Set the path to the dataset.
Here we use the clothing-small dataset and download it. It has about 4k images.
"""

from pathlib import Path
dataset_path = Path("./dataset_clothing").absolute().as_posix()

os.environ["DATASET_PATH"] = dataset_path
!echo "DATASET_PATH =" $DATASET_PATH

!git clone https://github.com/alexeygrigorev/clothing-dataset-small.git $DATASET_PATH

DATASET_PATH = /absolute/path/to/dataset_clothing
Cloning into '/absolute/path/to/dataset_clothing'...
remote: Enumerating objects: 3839, done.[K
remote: Counting objects: 100% (400/400), done.[K
remote: Compressing objects: 100% (400/400), done.[K
remote: Total 3839 (delta 9), reused 385 (delta 0), pack-reused 3439[K
Receiving objects: 100% (3839/3839), 100.58 MiB | 22.72 MiB/s, done.
Resolving deltas: 100% (10/10), done.
Updating files: 100% (3783/3783), done.


In [3]:
"""
Install the Docker Engine. 
Instructions work for Linux. For other OS see https://docs.docker.com/engine/install/
"""
import subprocess

def is_nvidia_gpu_available():
    try:
        subprocess.run(["nvidia-smi"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        return True
    except subprocess.CalledProcessError:
        return False

# Check for NVIDIA GPU
if is_nvidia_gpu_available():
    # Commands to run if NVIDIA GPU is available
    print("NVIDIA GPU detected. Running Docker install instructions for NVIDIA GPU.")
    !distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \
        && curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
        && curl -s -L https://nvidia.github.io/libnvidia-container/$distribution/libnvidia-container.list | \
                sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
                sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
    !sudo apt-get update
    !sudo apt-get install -y nvidia-docker2
    !sudo systemctl restart docker
    !sudo docker run --rm --gpus all nvidia/cuda:11.0.3-base-ubuntu20.04 nvidia-smi
else:
    # Commands to run if no NVIDIA GPU is detected
    print("No NVIDIA GPU detected. Running standard Docker install instructions.")
    !sudo apt-get install -y docker-ce docker-ce-cli containerd.io
    !sudo systemctl restart docker
    !sudo docker run --rm hello-world



No NVIDIA GPU detected. Running standard Docker install instructions.
E: Conflicting values set for option Signed-By regarding source https://nvidia.github.io/libnvidia-container/stable/ubuntu18.04/amd64/ /: /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg != 
E: The list of sources could not be read.
E: Conflicting values set for option Signed-By regarding source https://nvidia.github.io/libnvidia-container/stable/ubuntu18.04/amd64/ /: /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg != 
E: The list of sources could not be read.

Hello from Docker!
This message shows that your installation appears to be working correctly.

To generate this message, Docker took the following steps:
 1. The Docker client contacted the Docker daemon.
 2. The Docker daemon pulled the "hello-world" image from the Docker Hub.
    (amd64)
 3. The Docker daemon created a new container from that image which runs the
    executable that produces the output you are currently reading.
 4. The Dock

In [4]:
""" Install the Lightly worker and do a quick sanity check. """
!docker pull lightly/worker:latest
!docker run --shm-size="1024m" --rm -it lightly/worker:latest sanity_check=True

latest: Pulling from lightly/worker
Digest: sha256:40178e6ae8c52e7becb1455a588cd8491e55449a45d0b5455b4ddc158dfa9aa1
Status: Image is up to date for lightly/worker:latest
docker.io/lightly/worker:latest
[2023-12-19 15:25:30] Lightly Worker Solution v2.10.1[0m
[2023-12-19 15:25:30] Congratulations! It looks like the Lightly container is running![0m


In [5]:
""" Install the Lightly Python SDK. """
!pip3 install lightly


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m23.3.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [6]:
""" Register the Lightly Worker. """

from lightly.api import ApiWorkflowClient

client = ApiWorkflowClient(token=lightly_token)

# Create a Lightly Worker. If a worker with this name already exists, the id of the existing
# worker is returned.
worker_id = client.register_compute_worker(name="clothing-worker")

os.environ["LIGHTLY_WORKER_ID"] = worker_id
!echo "LIGHTLY_WORKER_ID =" $LIGHTLY_WORKER_ID

LIGHTLY_WORKER_ID = 65806b455ca68c93b29ad6b3


In [7]:

""" Create a dataset in the Lightly platform and configure the datasource. """

from lightly.api import ApiWorkflowClient
from lightly.openapi_generated.swagger_client import DatasetType
from lightly.openapi_generated.swagger_client import DatasourcePurpose

# Create the Lightly client to connect to the API.
client = ApiWorkflowClient(token=lightly_token)

# Create the dataset on the Lightly Platform.
try:
    client.create_dataset(
        dataset_name="clothing-small",
        dataset_type=DatasetType.IMAGES
    )
except:
    client.set_dataset_id_by_name("clothing-small")
# Configure the datasource.
client.set_local_config(
    relative_path="validation",  # Relative path in the input mount folder (DATASET_PATH)
    purpose=DatasourcePurpose.INPUT,
)
client.set_local_config(
    relative_path="",  # Relative path in the lightly mount folder (DATASET_PATH)
    purpose=DatasourcePurpose.LIGHTLY,
)

In [8]:
""" Schedule a run on the dataset to select 50 samples. """

scheduled_run_id = client.schedule_compute_worker_run(
    worker_config={"shutdown_when_job_finished": True},
    selection_config={
        "n_samples": 50,
        "strategies": [
            {"input": {"type": "EMBEDDINGS"}, "strategy": {"type": "DIVERSITY"}}
        ],
    },
)
print(f"{scheduled_run_id=}")

scheduled_run_id='6581b5fb8fd8c19dd9f6f543'


In [9]:
"""
Run the Lightly Worker to process the run. It mounts the dataset defined earlier.
Ensure that all 3 environment variables are set correctly.
"""

!echo "LIGHTLY_TOKEN=" $LIGHTLY_TOKEN
!echo "DATASET_PATH=" $DATASET_PATH
!echo "LIGHTLY_WORKER_ID=" $LIGHTLY_WORKER_ID

# See if there is another running Lightly Worker that might pick up the job instead.
!docker ps

if is_nvidia_gpu_available():
  !docker run --shm-size="1024m" --gpus all --rm -it \
    -v $DATASET_PATH:/input_mount:ro \
    -v $DATASET_PATH:/lightly_mount \
    -v $DATASET_PATH:/home/output_dir \
    -e LIGHTLY_TOKEN=$LIGHTLY_TOKEN \
    -e LIGHTLY_WORKER_ID=$LIGHTLY_WORKER_ID \
    lightly/worker:latest
else:
  # Same command, but without `--gpus all` flag.
  !docker run --shm-size="1024m" --rm -it \
    -v $DATASET_PATH:/input_mount:ro \
    -v $DATASET_PATH:/lightly_mount \
    -v $DATASET_PATH:/home/output_dir \
    -e LIGHTLY_TOKEN=$LIGHTLY_TOKEN \
    -e LIGHTLY_WORKER_ID=$LIGHTLY_WORKER_ID \
    lightly/worker:latest


LIGHTLY_TOKEN= CHANGE_ME
DATASET_PATH= /absolute/path/to/dataset_clothing
LIGHTLY_WORKER_ID= 65806b455ca68c93b29ad6b3
CONTAINER ID   IMAGE     COMMAND   CREATED   STATUS    PORTS     NAMES
[2023-12-19 15:25:57] Lightly Worker Solution v2.10.1[0m
[2023-12-19 15:25:58] You are using docker build: Mon Dec 11 17:10:10 UTC 2023.[0m
[2023-12-19 15:25:58] Starting worker with id '65806b455ca68c93b29ad6b3'...[0m
[93m[2023-12-19 15:25:58] Worker 2.10.1 can only process jobs scheduled with Lightly Python client 1.4 or higher.[0m
[2023-12-19 15:25:58] Worker with labels '[]' started. Waiting for jobs...[0m
[2023-12-19 15:25:58] Found 1 open jobs.[0m
[2023-12-19 15:25:59] Started job with job_id '6581b5fb8fd8c19dd9f6f543'.[0m
[2023-12-19 15:26:00] Configs:

 token: null
worker:
  worker_id: 65806b455ca68c93b29ad6b3
  force_start: true
dataset_id: null
sanity_check: false
relevant_filenames_file: ''
corruptness_check:
  corruption_threshold: 0.1
checkpoint: ''
checkpoint_run_id: ''
embeddin