# Instructions

Implement probability calibration transformer.

As was explained in the blog https://blog.deepsense.ai/deep-learning-right-whale-recognition-kaggle/
probability calibration was an important step that improved the results considerably.

The idea is simple. You should take the output `y_pred` from the network and raise it to the power of `1.1 - 1.6`

You will learn how to do it cleanly with a BaseTransformer object that is used heavily in this project.

It was used to improve the log loss on classification so use `-s classification` in the execution command.

# Your Solution
Your solution function should be called solution. In this case we leave it for consistency but you don't need to do anything with it. 

CONFIG is a dictionary with all parameters that you want to pass to your solution function.

In [1]:
CONFIG={},

def solution():
    """
    Create your ProbabilityCalibration implementation and
    output an instance of that class.
    """
    class DatasetLocalizer(Dataset):
        def __init__(self, X, y, img_dirpath, augmentation, target_size, bins_nr):
            super().__init__()
            self.img_dirpath = img_dirpath
            self.X = X.reset_index(drop=True)
            
            self.target_size = target_size
            self.bins_nr = bins_nr
            self.augmentation = augmentation
            self.preprocessing_function = localizer_preprocessing
            self.normalization_function = normalization

        def load_image(self, img_name):
            """
            Read image from disk to numpy array
            """
            return NotImplementedError

        def __len__(self):
            """
            Determine the length of the dataset
            """
            return length

        def __getitem__(self, index):
            """
            
            """
            return Xi_tensor, yi_tensors

    return DatasetLocalizer

In [2]:
class DatasetBasic(Dataset):
    def __init__(self, X, y, img_dirpath, augmentation, target_size, bins_nr):
        super().__init__()
        self.img_dirpath = img_dirpath
        self.X = X.reset_index(drop=True)
        if y is not None:
            self.y = y.reset_index(drop=True)
        else:
            """
            Wouldn't work with kaggle submission Fix it
            """
            raise NotImplementedError('Not working with y being None')
        self.target_size = target_size
        self.bins_nr = bins_nr
        self.augmentation = augmentation
        self.preprocessing_function = None
        self.normalization_function = None

    def load_image(self, img_name):
        img_filepath = os.path.join(self.img_dirpath, img_name)
        return Image.open(img_filepath, 'r')

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, index):
        img_name = self.X['Image'].iloc[index]
        yi = self.y.iloc[index]

        Xi_img = self.load_image(img_name)
        Xi = np.asarray(Xi_img)

        Xi, yi = self.preprocessing_function(Xi, yi, self.augmentation, self.target_size, self.bins_nr)
        Xi = self.normalization_function(Xi)

        Xi_tensor = torch.from_numpy(Xi).permute(2, 0, 1).type(torch.FloatTensor)
        yi_tensors = torch.from_numpy(yi).type(torch.LongTensor)
        return Xi_tensor, yi_tensors

In [None]:
def localizer_preprocessing(img, target, augmentation, target_size, bins_nr):
    height, width = target_size

    scale = iaa.Scale({"height": height, "width": width}).to_deterministic()
    augmenter = iaa.Sequential([iaa.Affine(rotate=(-10, 10),
                                           scale=(1 / 1.2, 1.2)),
                                #  KirzhevskyColorPerturbation
                                ]).to_deterministic()

    if augmentation:
        transformations = [augmenter, scale]
    else:
        transformations = [scale]
    transformer = iaa.Sequential(transformations).to_deterministic()

    aug_X = transformer.augment_image(img)

    keypoints = ia.KeypointsOnImage([
        ia.Keypoint(x=int(target.bbox1_x), y=int(target.bbox1_y)),
        ia.Keypoint(x=int(target.bbox2_x), y=int(target.bbox2_y))],
        shape=img.shape)
    aug_points = transformer.augment_keypoints([keypoints])
    aug_points_formatted = np.reshape(aug_points[0].get_coords_array(), -1).astype(np.float)
    aug_points_binned = bin_quantizer(aug_points_formatted, (height, width), bins_nr)

    return aug_X, aug_points_binned