In [1]:
#  Copyright 2022 Institute of Advanced Research in Artificial Intelligence (IARAI) GmbH.
#  IARAI licenses this file to You under the Apache License, Version 2.0
#  (the "License"); you may not use this file except in compliance with
#  the License. You may obtain a copy of the License at
#  http://www.apache.org/licenses/LICENSE-2.0
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

In [2]:
import os
import sys

In [3]:
# Alternatevly, in order to make the module imports work properly set PYTHONPATH=$PWD before launching the notebook server from the repo root folder.
sys.path.insert(0, os.path.abspath("../"))  # noqa:E402

![t4c20logo](../t4c20logo.png)

In [4]:
from IPython.core.display import HTML
from IPython.display import display
from pathlib import Path
import torch
import numpy as np
import t4c22
import tqdm
from t4c22.misc.t4c22_logging import t4c_apply_basic_logging_config
from t4c22.t4c22_config import class_fractions
from t4c22.t4c22_config import load_basedir
from t4c22.t4c22_config import load_cc_labels
from t4c22.dataloading.t4c22_dataset import T4c22Dataset
from t4c22.metric.masked_crossentropy import get_weights_from_class_fractions
from t4c22.misc.notebook_helpers import restartkernel # noqa:E402,F811

In [5]:
%matplotlib inline
%load_ext autoreload
%load_ext time
%autoreload 2
%autosave 60

display(HTML("<style>.container { width:80% !important; }</style>"))

The time module is not an IPython extension.


Autosaving every 60 seconds


In [6]:
t4c_apply_basic_logging_config(loglevel="DEBUG")

In [7]:
# load BASEDIR from file, change to your data root
BASEDIR = load_basedir(fn="t4c22_config.json", pkg=t4c22)

## Dummy baselines

In [8]:
device = 0
device = f"cuda:{device}" if torch.cuda.is_available() else "cpu"
device = torch.device(device)

In [9]:
def evaluate_static_baseline(city, ds, probas):
    # get weights for cross entropy
    city_class_fractions = class_fractions[city]
    city_class_weights = torch.tensor(get_weights_from_class_fractions([city_class_fractions[c] for c in ["green", "yellow", "red"]])).float()


    # get true labels    
    y_list = [y for _, y in tqdm.notebook.tqdm(train_dataset, total=len(train_dataset))]

    y = torch.cat(y_list, 0)
    y = y.nan_to_num(-1)
    y = y.long()

    # get logits for green, yellow, red
    y_hat = static_baseline(len(y), probas)
    
    # compute loss
    loss_f = torch.nn.CrossEntropyLoss(weight=city_class_weights, ignore_index=-1)
    
    loss_f = loss_f.to(device)
    y_hat = y_hat.to(device)
    y = y.to(device)
    
    loss = loss_f(y_hat, y)
    print(f"{city} {probas}: {loss.cpu().numpy()}")

In [10]:
def static_baseline(num, probas):
    logits = [np.log(p + 0.0000000000000000001) for p in probas]
    return torch.tensor([logits] * num).float()

### London

In [11]:
city = "london"

In [12]:
train_dataset = T4c22Dataset(root=BASEDIR, city=city, split="train", cachedir=Path("/tmp/processed"), limit=200)

In [13]:
evaluate_static_baseline(city=city, ds=train_dataset, probas=[1, 0, 0])

  0%|          | 0/200 [00:00<?, ?it/s]

london [1, 0, 0]: 27.34807014465332


In [14]:
evaluate_static_baseline(city=city, ds=train_dataset, probas=[0, 1, 0])

  0%|          | 0/200 [00:00<?, ?it/s]

london [0, 1, 0]: 30.468948364257812


In [15]:
evaluate_static_baseline(city=city, ds=train_dataset, probas=[0, 0, 1])

  0%|          | 0/200 [00:00<?, ?it/s]

london [0, 0, 1]: 29.589311599731445


In [16]:
evaluate_static_baseline(city=city, ds=train_dataset, probas=[1 / 3, 1 / 3, 1 / 3])

  0%|          | 0/200 [00:00<?, ?it/s]

london [0.3333333333333333, 0.3333333333333333, 0.3333333333333333]: 1.0995548963546753


In [17]:
# free resources by restarting kernel
# restartkernel()