In [1]:
%load_ext autoreload
%autoreload 2

import torch
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


# Import of clean dataloader

In [5]:
from data_utils.data_loaders import FocusLoader
dl = FocusLoader(
    images_dir='../../data/processed/COCO-2017/CocoCarFocus/train/images',
    batch_size=96,
    csv_path='../../data/processed/COCO-2017/CocoCarFocus/train/labels.csv',
    save_out_dir = "../../res/focus/coco_focus_car/trainer/labels/",
    shuffle=True,
    validation_split=0.15,
    num_workers=12,
    labels={
        "0": "none",
        "1": "car"
    },
    tf_image_size=(650,650),
    balance_train=True,
    balance_methods=['undersample'],
    balance_max_multiplicity=3
)
dl_train = dl.get_train_loader()

# Function to calculate mean and std of the dataset

In [15]:
def get_mean_and_std(dataloader):
    channels_sum, channels_squared_sum, num_batches = 0, 0, 0
    for data in tqdm(dataloader, desc="Calculating mean and std"):
        # print(data)
        # Mean over batch, height and width, but not over the channels
        data = data['image']
        channels_sum += torch.mean(data, dim=[0,2,3])
        channels_squared_sum += torch.mean(data**2, dim=[0,2,3])
        num_batches += 1
    
    mean = channels_sum / num_batches

    # std = sqrt(E[X^2] - (E[X])^2)
    std = (channels_squared_sum / num_batches - mean ** 2) ** 0.5

    return mean, std

In [16]:
get_mean_and_std(dl_train)

Calculating mean and std: 100%|██████████| 109/109 [01:05<00:00,  1.67it/s]


(tensor([0.4572, 0.4423, 0.4103]), tensor([0.2577, 0.2591, 0.2712]))

# Check if works :) 

In [17]:
from data_utils.data_loaders import FocusLoader
dl = FocusLoader(
    images_dir='../../data/processed/COCO-2017/CocoCarFocus/train/images',
    batch_size=96,
    csv_path='../../data/processed/COCO-2017/CocoCarFocus/train/labels.csv',
    save_out_dir = "../../res/focus/coco_focus_car/trainer/labels/",
    shuffle=True,
    validation_split=0.15,
    num_workers=12,
    labels={
        "0": "none",
        "1": "car"
    },
    tf_image_size=(650,650),
    balance_train=True,
    balance_methods=['undersample'],
    balance_max_multiplicity=3,
    transform_mean=[0.4572, 0.4423, 0.4103],
    transform_std=[0.2577, 0.2591, 0.2712],
)
dl_train = dl.get_train_loader()

In [18]:
get_mean_and_std(dl_train)

Calculating mean and std: 100%|██████████| 109/109 [01:03<00:00,  1.72it/s]


(tensor([0.0022, 0.0015, 0.0005]), tensor([1.0019, 1.0020, 1.0012]))

# Classifiers:
## Person:
+ "transform_mean": [0.5462,0.5803,0.6043]
+ "transform_std": [0.2952,0.2886,0.2965]
## Car:
+ "transform_mean": [0.5615,0.5820,0.5965]
+ "transform_std": [0.2911,0.2849,0.2908]
## Dog:
+ "transform_mean": [0.5243,0.5564,0.5869],
+ "transform_std": [0.2958,0.2923,0.3011]
## Bicycle:
+ "transform_mean": [0.5647,0.5906,0.6088]
+ "transform_std": [0.2894,0.283,0.2891]
## Cat:
+ "transform_mean": [0.5295,0.5672,0.5936]
+ "transform_std": [0.3045,0.3015,0.3107]
## Multi:
+ "transform_mean": [0.5438,0.5786,0.6030]
+ "transform_std": [0.2944,0.2882,0.2966]

# Focus:
## Person:
+ "transform_mean": [0.4686, 0.4496, 0.4134]
+ "transform_std": [0.2553, 0.2530, 0.2708]
## Car:
+ "transform_mean": [0.4572, 0.4423, 0.4103]
+ "transform_std": [0.2577, 0.2591, 0.2712]

# Class weights computing

In [29]:
import pandas as pd
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

In [30]:
df_train = pd.read_csv("/home/ubuntu/focus-convolutional-neural-network/res/classifiers/coco_classifier_multi/trainer/labels/labels_train.csv")
df_train_aug = pd.read_csv("/home/ubuntu/focus-convolutional-neural-network/res/classifiers/coco_classifier_multi/trainer/labels/labels_train_aug.csv")

In [31]:
df_train['label'].value_counts()

0    409917
1    196848
2     32900
3      5335
Name: label, dtype: int64

In [32]:
df_train_aug['label'].value_counts()

3    5335
2     865
Name: label, dtype: int64

In [33]:
list_train = df_train['label'].to_list()

In [34]:
list_train_aug = df_train_aug['label'].to_list()

In [35]:
list_train_full = list_train + list_train_aug

In [36]:
compute_class_weight(class_weight='balanced', classes=np.unique(list_train_full), y=list_train_full)

array([ 0.39715357,  0.82703406,  4.82156079, 15.25773196])