# Tutorial loading and exploring LabelMe dataset

## Basic imports

In [3]:
import numpy as np
from pathlib import Path

In [13]:
DIR = Path().cwd()
DIRlabelme = (DIR / ".." / "datasets" / "labelme").resolve()
DIR_module = DIRlabelme / "labelme.py"
DIR_answers = DIRlabelme / "answers.json"
DIR_remove = DIRlabelme / "identification" / "waum_0.01_yang" / "too_hard_0.01.txt"  #not labelme toohard be careful
print(DIRlabelme)

/home/tlefort/Documents/peerannot/peerannot/datasets/labelme


## Install dataset

Only run this command once

In [3]:
# ! peerannot install $DIR_module

# Majority vote

In [14]:
! peerannot aggregate $DIRlabelme -s MV

Running aggregation mv with options {}
Aggregated labels stored at /home/tlefort/Documents/peerannot/peerannot/datasets/labelme/labels/labels_labelme_mv.npy with shape (1000,)


# Naive Soft labelling

In [5]:
! peerannot aggregate $DIRlabelme -s NaiveSoft

Running aggregation naivesoft with options {}
Aggregated labels stored at /home/tlefort/Documents/peerannot/peerannot/datasets/labelme/labels/labels_labelme_naivesoft.npy with shape (1000, 8)


# Loading datasets

The majority voting in case of equality of votes returns one of the possible classes with equal probabilities (the naive soft aggregation computes the accuracy on aggregation with a simple `np.argmax` and thus does not sample the equalities).
This can lead to small differences in aggregation accuracy in practice.

In [5]:
from peerannot.runners.train import load_all_data

labels_path_mv = DIRlabelme / "labels" / "labels_labelme_mv.npy"
trainset, valset, testset = load_all_data(DIRlabelme,
                                          labels_path_mv,
                                          path_remove=None,
                                          labels=labels_path_mv,
                                          data_augmentation=False)

Loading datasets
Accuracy on aggregation: 77.800%


In [8]:
labels_path_soft = DIRlabelme / "labels" / "labels_labelme_naivesoft.npy"
trainset, valset, testset = load_all_data(DIRlabelme,
                                          labels_path_soft,
                                          path_remove=None,
                                          labels=labels_path_soft,
                                          data_augmentation=False)

Loading datasets
Accuracy on aggregation: 76.900%


# Train a network

In [6]:
! peerannot train $DIRlabelme -o labelme_mv \
            -K 8 --labels=$labels_path_mv \
            --model resnet18 --n-epochs=1 --lr=0.1 --scheduler -m 50 -m 100 \
            --scheduler --num-workers=8 --pretrained

Running the following configuration:
----------
- Data at /home/tlefort/Documents/peerannot/peerannot/datasets/labelme will be saved with prefix labelme_mv
- number of classes: 8
- labels: /home/tlefort/Documents/peerannot/peerannot/datasets/labelme/labels/labels_labelme_mv.npy
- model: resnet18
- n_epochs: 1
- lr: 0.1
- scheduler: True
- milestones: (50, 100)
- num_workers: 8
- pretrained: True
- optimizer: SGD
- metadata_path: None
- img_size: 224
- data_augmentation: False
- path_remove: None
- momentum: 0.9
- decay: 0.0005
- n_params: 3072
- lr_decay: 0.1
- batch_size: 64
----------
Loading datasets
Accuracy on aggregation: 77.800%
Train set: 1000 tasks
Test set: 1188 tasks
Validation set: 500 tasks
Using cache found in /home/tlefort/.cache/torch/hub/pytorch_vision_main
Using cache found in /home/tlefort/.cache/torch/hub/pytorch_vision_main
Using cache found in /home/tlefort/.cache/torch/hub/pytorch_vision_main
Successfully loaded resnet18 with n_classes=8
Training epoch: 100%|████

In [9]:
! peerannot train $DIRlabelme -o labelme_soft \
            -K 8 --labels=$labels_path_soft \
            --model resnet18 --n-epochs=1 --lr=0.1 --scheduler -m 50 -m 100 \
            --scheduler --num-workers=8 --pretrained

Running the following configuration:
----------
- Data at /home/tlefort/Documents/peerannot/peerannot/datasets/labelme will be saved with prefix labelme_soft
- number of classes: 8
- labels: /home/tlefort/Documents/peerannot/peerannot/datasets/labelme/labels/labels_labelme_naivesoft.npy
- model: resnet18
- n_epochs: 1
- lr: 0.1
- scheduler: True
- milestones: (50, 100)
- num_workers: 8
- pretrained: True
- optimizer: SGD
- metadata_path: None
- img_size: 224
- data_augmentation: False
- path_remove: None
- momentum: 0.9
- decay: 0.0005
- n_params: 3072
- lr_decay: 0.1
- batch_size: 64
----------
Loading datasets
Accuracy on aggregation: 76.900%
Train set: 1000 tasks
Test set: 1188 tasks
Validation set: 500 tasks
Using cache found in /home/tlefort/.cache/torch/hub/pytorch_vision_main
Using cache found in /home/tlefort/.cache/torch/hub/pytorch_vision_main
Using cache found in /home/tlefort/.cache/torch/hub/pytorch_vision_main
Successfully loaded resnet18 with n_classes=8
Training epoch: 

# WAUM identification

In [16]:
path_votes = str(DIRlabelme / "answers.json")
DIRlabelme = str(DIRlabelme)

In [17]:
! peerannot identify {DIRlabelme} -K 8 \
                     --labels={path_votes} --method WAUM  \
                     --model resnet18 --n-epochs 2 --lr 0.005 --optimizer=adam \
                     --maxiter-DS=100 --pretrained

Running the following configuration:
----------
- Data at /home/tlefort/Documents/peerannot/peerannot/datasets/labelme
- number of classes: 8
- labels: /home/tlefort/Documents/peerannot/peerannot/datasets/labelme/answers.json
- model: resnet18
- n_epochs: 2
- lr: 0.005
- optimizer: adam
- maxiter_ds: 100
- pretrained: True
- use_pleiss: False
- alpha: 0.01
- n_params: 3072
- momentum: 0.9
- metadata_path: None
- decay: 0.0005
- img_size: 224
- data_augmentation: False
- freeze: False
----------
Train set: 2547 tasks
Using cache found in /home/tlefort/.cache/torch/hub/pytorch_vision_main
Using cache found in /home/tlefort/.cache/torch/hub/pytorch_vision_main
Using cache found in /home/tlefort/.cache/torch/hub/pytorch_vision_main
Successfully loaded resnet18 with n_classes=8
Running identification with method: WAUMstacked
Finished:  62%|███████████████████▊            | 62/100 [00:09<00:06,  6.23it/s]
epoch: 100%|██████████████████████████████████████| 2/2 [00:24<00:00, 12.00s/it]
Scores

# AUM computation

In [27]:
!peerannot identify {DIRlabelme} -K 8 --method AUM \
                    --model resnet34 \
                    --n-epochs 200 --lr=0.005 --optimizer=adam \
                    --pretrained

Running the following configuration:
----------
- Data at /home/tlefort/Documents/peerannot/peerannot/datasets/labelme
- number of classes: 8
- model: resnet34
- n_epochs: 200
- lr: 0.005
- optimizer: adam
- pretrained: True
- labels: /home/tlefort/Documents/peerannot/peerannot/examples/answers.json
- use_pleiss: False
- alpha: 0.01
- n_params: 3072
- momentum: 0.9
- metadata_path: None
- decay: 0.0005
- img_size: 224
- maxiter_ds: 50
- data_augmentation: False
----------
Train set: 1000 tasks
Using cache found in /home/tlefort/.cache/torch/hub/pytorch_vision_main
Using cache found in /home/tlefort/.cache/torch/hub/pytorch_vision_main
Using cache found in /home/tlefort/.cache/torch/hub/pytorch_vision_main
Successfully loaded resnet34 with n_classes=8
Running identification with method: AUM
{'train_accuracy': [100.0], 'train_loss': [5.355819093529135e-05]}              
Saved full log at /home/tlefort/Documents/peerannot/peerannot/datasets/labelme/identification/aum/full_aum_records.csv

# CoNAL with WAUM

In [24]:
!peerannot aggregate-deep {DIRlabelme} \
            -o cifar10h_CoNAL[scale=0]_test \
            --answers={DIR_answers} --model=resnet18 --n-classes=10 --n-epochs 500 \
            --lr 0.1 --optimizer sgd --batch-size 64 --num-workers 8 --img-size=32 \
            --strategy=CoNAL[scale=0] --scheduler -m 250 -m 400 --path-remove {DIR_remove}

Running the following configuration:
----------
- Data at /home/tlefort/Documents/peerannot/peerannot/datasets/labelme will be saved with prefix cifar10h_CoNAL[scale=0]_test
- number of classes: 10
- output_name: cifar10h_CoNAL[scale=0]_test
- answers: /home/tlefort/Documents/peerannot/peerannot/datasets/labelme/answers.json
- model: resnet18
- n_classes: 10
- n_epochs: 500
- lr: 0.1
- optimizer: sgd
- batch_size: 64
- num_workers: 8
- img_size: 32
- strategy: CoNAL[scale=0]
- scheduler: True
- milestones: (250, 400)
- path_remove: /home/tlefort/Documents/peerannot/peerannot/datasets/labelme/identification/waum_stacked_0.01_yang/too_hard_0.01.txt
- dataset: /home/tlefort/Documents/peerannot/peerannot/datasets/labelme
- pretrained: False
- momentum: 0.9
- decay: 0.0005
- n_params: 3072
- lr_decay: 0.1
- data_augmentation: False
- metadata_path: None
- freeze: False
----------
Loading datasets
Using cache found in /home/tlefort/.cache/torch/hub/pytorch_vision_main
Using cache found in /h

In [25]:
! cat /home/tlefort/Documents/peerannot/peerannot/datasets/labelme/results/cifar10h_CoNAL[scale=0]_test.json

{
   "train_loss": [
      5115.41748046875,
      7743.6201171875,
      5239.8125,
      2619.397705078125,
      1581.6348876953125,
      882.5059814453125,
      621.0291748046875,
      444.9188537597656,
      351.3404235839844,
      275.0377197265625,
      282.8267822265625,
      201.20033264160156,
      168.8780517578125,
      153.2613983154297,
      159.59243774414062,
      165.41409301757812,
      166.67279052734375,
      141.35427856445312,
      131.70228576660156,
      97.05409240722656,
      89.56207275390625,
      88.33782196044922,
      97.79270935058594,
      75.00166320800781,
      78.74836730957031,
      70.40473937988281,
      76.33447265625,
      70.73409271240234,
      70.9795150756836,
      90.16873931884766,
      73.5377426147461,
      86.9969253540039,
      61.48834991455078,
      64.47010803222656,
      57.928218841552734,
      80.01107788085938,
      76.48992156982422,
      69.59379577636719,