In [7]:
%load_ext autoreload
%autoreload 2

import torch
from tqdm import tqdm

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Import of clean dataloader

In [8]:
from data_utils.data_loaders import FocusCNNLoader, ClassifierLoader, FocusLoader
dl = FocusCNNLoader(
    images_dir='/root/focus-convolutional-neural-network/data/processed/PASCAL/PascalFocusCNN/train/images',
    batch_size=16,
    csv_path='/root/focus-convolutional-neural-network/data/processed/PASCAL/PascalFocusCNN/train/labels.csv',
    save_out_dir = "/root/focus-convolutional-neural-network/res/focus_cnn/pascal_focuscnn/trainer/labels/",
    shuffle=True,
    validation_split=0.15,
    num_workers=32,
    labels={
        "0": "none",
        "1": "person",
        "2": "car",
        "3": "bicycle"
    },
    balance_train=True,
    balance_methods=["remove_only_0"],
    tf_image_size=[640, 640],
    is_test=False
)
dl_train = dl.get_train_loader()

# Function to calculate mean and std of the dataset

In [9]:
def get_mean_and_std(dataloader):
    channels_sum, channels_squared_sum, num_batches = 0, 0, 0
    for data in tqdm(dataloader, desc="Calculating mean and std"):
        if isinstance(data, list) or isinstance(data, tuple):
            data = data[0]
        elif isinstance(data, dict):
            data = data['image']
        else:
            raise Exception("Unknown data type")
        channels_sum += torch.mean(data, dim=[0,2,3])
        channels_squared_sum += torch.mean(data**2, dim=[0,2,3])
        num_batches += 1
    
    mean = channels_sum / num_batches

    # std = sqrt(E[X^2] - (E[X])^2)
    std = (channels_squared_sum / num_batches - mean ** 2) ** 0.5

    return mean, std

In [10]:
get_mean_and_std(dl_train)

Calculating mean and std:   0%|          | 0/446 [00:00<?, ?it/s]

Calculating mean and std: 100%|██████████| 446/446 [00:38<00:00, 11.70it/s]


(tensor([0.4309, 0.4104, 0.3840]), tensor([0.2620, 0.2618, 0.2679]))

# Check if works :) 

In [11]:
from data_utils.data_loaders import FocusCNNLoader, ClassifierLoader, FocusLoader
dl = FocusCNNLoader(
    images_dir='/root/focus-convolutional-neural-network/data/processed/PASCAL/PascalFocusCNN/train/images',
    batch_size=16,
    csv_path='/root/focus-convolutional-neural-network/data/processed/PASCAL/PascalFocusCNN/train/labels.csv',
    save_out_dir = "/root/focus-convolutional-neural-network/res/focus_cnn/pascal_focuscnn/trainer/labels/",
    shuffle=True,
    validation_split=0.15,
    num_workers=32,
    labels={
        "0": "none",
        "1": "person",
        "2": "car",
        "3": "bicycle"
    },
    balance_train=True,
    balance_methods=["remove_only_0"],
    tf_image_size=[640, 640],
    is_test=False,
    transform_mean=[0.4309, 0.4104, 0.3840],
    transform_std=[0.2620, 0.2618, 0.2679]
)
dl_train = dl.get_train_loader()

In [12]:
get_mean_and_std(dl_train)

Calculating mean and std:   0%|          | 0/446 [00:00<?, ?it/s]

Calculating mean and std: 100%|██████████| 446/446 [00:39<00:00, 11.41it/s]


(tensor([-8.0582e-05,  2.2756e-03,  1.3948e-03]),
 tensor([1.0005, 0.9994, 1.0001]))

In [8]:
data = next(iter(dl_train))

# Classifiers:
## Person:
+ "transform_mean": [0.5462,0.5803,0.6043]
+ "transform_std": [0.2952,0.2886,0.2965]
## Car:
+ "transform_mean": [0.5615,0.5820,0.5965]
+ "transform_std": [0.2911,0.2849,0.2908]
## Dog:
+ "transform_mean": [0.5243,0.5564,0.5869],
+ "transform_std": [0.2958,0.2923,0.3011]
## Bicycle:
+ "transform_mean": [0.5647,0.5906,0.6088]
+ "transform_std": [0.2894,0.283,0.2891]
## Cat:
+ "transform_mean": [0.5295,0.5672,0.5936]
+ "transform_std": [0.3045,0.3015,0.3107]
## Multi:
+ "transform_mean": [0.5438,0.5786,0.6030]
+ "transform_std": [0.2944,0.2882,0.2966]

# Focus:
## Person:
+ "transform_mean": [0.4686, 0.4496, 0.4134]
+ "transform_std": [0.2553, 0.2530, 0.2708]
## Car:
+ "transform_mean": [0.4572, 0.4423, 0.4103]
+ "transform_std": [0.2577, 0.2591, 0.2712]

# Class weights computing

In [14]:
import pandas as pd
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

In [13]:
df_train = pd.read_csv("/root/focus-convolutional-neural-network/res/classifiers/pascal_classifier_multi/trainer/labels/labels_train.csv")
df_train_aug = pd.read_csv("/root/focus-convolutional-neural-network/res/classifiers/pascal_classifier_multi/trainer/labels/labels_train_aug.csv")

In [14]:
df_train['label'].value_counts()

label
0    7843
1    6111
3     824
2     384
Name: count, dtype: int64

In [15]:
df_train_aug['label'].value_counts()

label
2    384
3    178
Name: count, dtype: int64

In [16]:
list_train = df_train['label'].to_list()

In [17]:
list_train_aug = df_train_aug['label'].to_list()

In [18]:
list_train_full = list_train + list_train_aug

In [19]:
compute_class_weight(class_weight='balanced', classes=np.unique(list_train_full), y=list_train_full)

array([0.50121127, 0.64326624, 5.11848958, 3.92315369])

# Class weights:

In [5]:
import pandas as pd
import numpy as np
from sklearn.utils.class_weight import compute_class_weight

In [6]:
df = pd.read_csv("/root/focus-convolutional-neural-network/res/focus_cnn/pascal_focuscnn/trainer/labels/labels_train_undersampled.csv")

In [7]:
arr = np.array(df['label_1'].to_list() + df['label_2'].to_list() + df['label_3'].to_list())

In [8]:
compute_class_weight(class_weight='balanced', classes=np.unique(arr), y=arr)

array([ 0.39267881,  0.82231278, 13.07457213,  6.21802326])

In [9]:
arr = np.array([1] * 1000 + [0] * 9000)

In [10]:
compute_class_weight(class_weight='balanced', classes=np.unique(arr), y=arr)

array([0.55555556, 5.        ])