In [None]:
!pip install timm

import os
import gc
import cv2
import math
import copy
import time
import random

# For data manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp

# Utils
import joblib
from tqdm import tqdm
from collections import defaultdict

# For Image Models
import timm
from math import cos, pi

# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

import torch.cuda.amp as amp

# For colored terminal text
from colorama import Fore, Back, Style
b_ = Fore.BLUE
sr_ = Style.RESET_ALL

import warnings
warnings.filterwarnings("ignore")

# For descriptive error messages
#os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [None]:
#model_name:b7
#img_size:384
#weights:
#in_features
#test_df
class Config(object):
      version = "merge"
      model_name = "tf_efficientnet_b7"
      #model_name = 'seresnext50_32x4d'
      seed = 2022
      img_size = (512,512)

      num_classes = 15587
    
      valid_batch_size = 10


      #input
      TEST_DIR = '../input/backfin-test-new/test_images/'
      TRAIN_DIR = '../input/backfin-train-new/train_images/'

      train_csv = "../input/train-csv-new/train_box_clear.csv" 
      label_path = "../input/happywhalelabel/label.csv"   
      sample_submission = "../input/happy-whale-and-dolphin/sample_submission.csv"
      work_dirs = "./"

      n_fold = 5
      num_workers = 2

      # ArcFace Hyperparametes
      s = 30.0
      m = 0.50
      ls_eps = 0.0
      easy_margin = False
    
      KNN = 100
        
      device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")




    
CONFIG = Config()
print (CONFIG.device)

def set_seed(seed=42):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed

In [None]:
data_transforms = {
    "train": A.Compose([
        A.Resize(CONFIG.img_size[1], CONFIG.img_size[0]),
        A.HueSaturationValue(p=0.5, hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=50),
        A.HorizontalFlip(),
        A.GaussianBlur(),
        A.RandomBrightnessContrast(p=0.5),
        A.ShiftScaleRotate(),
        A.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
            p=1.0
        ),
        ToTensorV2()], p=1.),

    "test": A.Compose([
        A.Resize(CONFIG.img_size[1], CONFIG.img_size[0]),
        A.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
            p=1.0
        ),
        ToTensorV2()], p=1.),
    "test_flip": A.Compose([
        A.Resize(CONFIG.img_size[1], CONFIG.img_size[0]),
        A.HorizontalFlip(p=1.0),
        A.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
            p=1.0
        ),
        ToTensorV2()], p=1.),
    
    "test_rotate": A.Compose([
        A.Resize(CONFIG.img_size[1], CONFIG.img_size[0]),
        A.ShiftScaleRotate(p=1.0),
        A.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
            p=1.0
        ),
        ToTensorV2()], p=1.)
}

In [None]:
class HappyWhaleDataset(Dataset):
    def __init__(self, df, model = "test_valid", transforms=None, df_boxes = None):
        self.model = model
        if model not in ["test_valid", "test", "test_tta"]:
            assert False, f"{model} is not in [test_valid, test, test_tta]"

        self.file_paths = df['file_path'].values
        #self.check_image_path()

        if self.model in ["test_valid"]:
            self.labels = df['individual_id'].values

        if self.model in ["test", "test_tta"]:
            self.labels = [-1] * len(self.file_paths)


        self.transforms = transforms


    def check_image_path(self):
        for file in self.file_paths:
            if not os.path.exists(file):
                print (file + " if not exists!")
        print ("check over!")
        return



    def __len__(self):
        return len(self.labels)



    def __getitem__(self, index):
        img_path = self.file_paths[index]
        label = int(self.labels[index])
        img_name = img_path.split('/')[-1]
        img_name = img_name.split("_")[-1]

        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        '''
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = np.expand_dims(img, axis=2)
        img = np.concatenate((img, img, img), axis=-1)
        '''
        img1 = self.transforms(image=img)["image"]
        if self.model in ["test_valid", "test"]:
            return img1, label, img_name
        if self.model == "test_tta":
            img2 = data_transforms["test_flip"](image=img)["image"]
            img3 = data_transforms["test_rotate"](image=img)["image"]
            return img1, label, img_name, img2, img3

In [None]:
#GeM Pooling
class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeM, self).__init__()
        self.p = nn.Parameter(torch.ones(1) * p)
        self.eps = eps

    def forward(self, x):
        return self.gem(x, p=self.p, eps=self.eps)

    def gem(self, x, p=3, eps=1e-6):
        return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1. / p)

    def __repr__(self):
        return self.__class__.__name__ + \
               '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + \
               ', ' + 'eps=' + str(self.eps) + ')'


In [None]:
def cosine_sim(x1, x2, dim=1, eps=1e-8):
    ip = torch.mm(x1, x2.t())
    w1 = torch.norm(x1, 2, dim)
    w2 = torch.norm(x2, 2, dim)
    return ip / torch.ger(w1,w2).clamp(min=eps)

class CosMarginProduct(nn.Module):
    r"""Implement of large margin cosine distance: :
    Args:
        in_features: size of each input sample
        out_features: size of each output sample
        s: norm of input feature
        m: margin
    """

    def __init__(self, in_features, out_features, s=30.0, m=0.40):
        super(CosMarginProduct, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.s = s
        self.m = m
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)
        #stdv = 1. / math.sqrt(self.weight.size(1))
        #self.weight.data.uniform_(-stdv, stdv)

    def forward(self, input, label):
        cosine = cosine_sim(input, self.weight)
        # cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        # --------------------------- convert label to one-hot ---------------------------
        # https://discuss.pytorch.org/t/convert-int-into-one-hot-format/507
        one_hot = torch.zeros_like(cosine)
        one_hot.scatter_(1, label.view(-1, 1), 1.0)
        # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
        output = self.s * (cosine - one_hot * self.m)

        return output


class ArcMarginProduct(nn.Module):
    r"""Implement of large margin arc distance: :
        Args:
            in_features: size of each input sample
            out_features: size of each output sample
            s: norm of input feature
            m: margin
            cos(theta + m)
        """
    def __init__(self, in_features, out_features, s=30.0,
                 m=0.50, easy_margin=False, ls_eps=0.0):
        super(ArcMarginProduct, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.s = s
        self.m = m
        self.ls_eps = ls_eps  # label smoothing
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

        self.easy_margin = easy_margin
        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.th = math.cos(math.pi - m)
        self.mm = math.sin(math.pi - m) * m

    def forward(self, input, label):
        # --------------------------- cos(theta) & phi(theta) ---------------------
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine.to(dtype=phi.dtype) > self.th, phi, cosine.to(dtype=phi.dtype) - self.mm)
        # --------------------------- convert label to one-hot ---------------------
        # one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda')
        one_hot = torch.zeros(cosine.size(), device=CONFIG.device)
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        if self.ls_eps > 0:
            one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.out_features
        # -------------torch.where(out_i = {x_i if condition_i else y_i) ------------
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.s

        return output



def l2_norm(input,axis=1):
    norm = torch.norm(input,2,axis,True)
    output = torch.div(input, norm)
    return output

#binary
class BinaryHead(nn.Module):

    def __init__(self, emb_size = 2048, num_class=10008,  s = 16.0):
        super(BinaryHead,self).__init__()
        self.s = s
        self.fc = nn.Sequential(nn.Linear(emb_size, num_class))

    def forward(self, fea):
        fea = l2_norm(fea)
        logit = self.fc(fea)*self.s
        return logit

#arc
class ArcMarginHead(nn.Module):
    def __init__(self, embedding_size, num_classes):
        super(ArcMarginHead, self).__init__()
        self.arc = ArcMarginProduct(embedding_size, num_classes)

    def forward(self, fea, label):
        logit = self.arc(fea, label)
        return logit

#cos
class CosMarginHead(nn.Module):
    def __init__(self, embedding_size, num_classes):
        super(CosMarginHead, self).__init__()
        self.cos = CosMarginProduct(embedding_size, num_classes)

    def forward(self, fea, label):
        logit = self.cos(fea, label)
        return logit

In [None]:

def l2_norm(input,axis=1):
    norm = torch.norm(input,2,axis,True)
    output = torch.div(input, norm)
    return output

#attention
class ChannelAttention(nn.Module):
    def __init__(self, channel, reduction):
        super().__init__()
        self.maxpool = nn.AdaptiveMaxPool2d(1)
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.se = nn.Sequential(
            nn.Conv2d(channel, channel // reduction, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(channel // reduction, channel, 1, bias=False)
        )
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        max_result = self.maxpool(x)
        avg_result = self.avgpool(x)
        max_out = self.se(max_result)
        avg_out = self.se(avg_result)
        output = self.sigmoid(max_out + avg_out)
        return output
    
class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=3):
        super().__init__()
        self.conv = nn.Conv2d(2, 1, kernel_size=kernel_size, padding=(kernel_size -1) // 2)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        max_result, _ = torch.max(x, dim=1, keepdim=True)
        avg_result = torch.mean(x, dim=1, keepdim=True)
        result = torch.cat([max_result, avg_result], 1)
        output = self.conv(result)
        output = self.sigmoid(output)
        return output

    
class CBAMBlock(nn.Module):

    def __init__(self, channel, reduction):
        super().__init__()
        self.ca = ChannelAttention(channel=channel, reduction=reduction)
        self.sa = SpatialAttention()

    def init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                init.kaiming_normal_(m.weight, mode='fan_out')
                if m.bias is not None:
                    init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                init.constant_(m.weight, 1)
                init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                init.normal_(m.weight, std=0.001)
                if m.bias is not None:
                    init.constant_(m.bias, 0)
    def forward(self, x):
        b, c, _, _ = x.size()
        residual = x
        out = x * self.ca(x)
        out = out * self.sa(out)
        return out + residual 

In [None]:
class HappyWhaleModel(nn.Module):
    def __init__(self, model_name, pretrained=False):
        super(HappyWhaleModel, self).__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)

        if model_name in["tf_efficientnet_b5","tf_efficientnet_b6","tf_efficientnet_b7"]:
            in_features = self.model.classifier.in_features
            self.model.classifier = nn.Identity()
            self.model.global_pool = nn.Identity()
            print (model_name)
        elif model_name == 'seresnext50_32x4d':
            in_features = self.model.fc.in_features
            self.model.global_pool = nn.Identity()
            self.model.fc = nn.Identity()
            for param in self.model.layer1.parameters():
                param.requires_grad = False
        else:
            print ("only support tf_efficientnet_b5, seresnext50_32x4d")
            assert 0

        self.pooling = GeM()
        
        self.attention = CBAMBlock(channel=in_features, reduction=4)

        embedding_size = 1750
        #embedding_size = in_features
        self.neck1 = nn.Sequential(
            nn.Dropout(p=0.2, inplace=False),
            nn.Linear(in_features, embedding_size , bias = True),
            nn.BatchNorm1d(embedding_size),
            nn.PReLU(),
        )

        self.neck2 = nn.Sequential(
            nn.Dropout(p=0.2, inplace=False),
            nn.Linear(in_features, embedding_size, bias=True),
            nn.BatchNorm1d(embedding_size),
            nn.PReLU()
        )


        self.neck3 = nn.Sequential(
            nn.Dropout(p=0.2, inplace=False),
            nn.Linear(in_features, embedding_size, bias=True),
            nn.BatchNorm1d(embedding_size),
            nn.PReLU()
        )
        
        self.arc= ArcMarginHead(embedding_size, CONFIG.num_classes)
        self.cos = CosMarginHead(embedding_size, CONFIG.num_classes)
        self.binary = BinaryHead(embedding_size, CONFIG.num_classes)


        self.b4 = BinaryHead(embedding_size, CONFIG.num_classes)
        self.fc = nn.Sequential(
            nn.Dropout(p=0.5, inplace=False),
            nn.Linear(embedding_size * 3, embedding_size, bias=True),
            nn.BatchNorm1d(embedding_size)
        )

    def inference(self, images):
        features = self.model(images)
        
        features = self.attention(features)
        gem_fea = self.pooling(features).flatten(1)

        embedding1 = self.neck1(gem_fea)
        embedding2 = self.neck2(gem_fea)
        embedding3 = self.neck3(gem_fea)
        embedding = torch.cat((embedding1, embedding2, embedding3), dim = 1)
        
        embedding4 = self.fc(embedding)
        embedding = torch.cat((embedding1, embedding2, embedding3, embedding4), dim = 1)
        return embedding



In [None]:
def l2_norm(input,axis=1):
    norm = torch.norm(input,2,axis,True)
    output = torch.div(input, norm)
    return output

In [None]:
sample_list = ['938b7e931166', '5bf17305f073', '7593d2aee842', '7362d7a01d00','956562ff2888']
from sklearn.neighbors import NearestNeighbors

class EmbedFeatureCom():
    def __init__(self, weights, resuled_csv_path):

        target_df = pd.read_csv(CONFIG.label_path)

        self.resuled_csv_path = resuled_csv_path

        self.inverse_encodings = target_df.set_index(["id"])["individual_name"].to_dict()
        self.encodings = target_df.set_index(["individual_name"])["id"].to_dict()

        self.train_df = pd.read_csv(CONFIG.train_csv)
        self.train_df['file_path'] = "_" + self.train_df['image']
        self.train_df['file_path'] = self.train_df['individual_id'].astype("str") + self.train_df['file_path']
        self.train_df['file_path'] = CONFIG.TRAIN_DIR + self.train_df['file_path']
 


        self.test_df = pd.read_csv(CONFIG.sample_submission)
        self.test_df['file_path'] = CONFIG.TEST_DIR + self.test_df['image']
        self.test_df["predictions"] = ''
        
        #self.train_df = self.train_df.iloc[:1000]
        #self.test_df = self.test_df.iloc[:100]
        
        
        self.models = []
        for weight in weights:
            model = HappyWhaleModel(CONFIG.model_name)
            model.load_state_dict(torch.load(weight))
            model.to(CONFIG.device)
            model.eval()
            self.models.append(model)



    def map_per_image(self, label, predictions):

        try:
            return 1 / (predictions[:5].index(label) + 1)
        except ValueError:
            return 0.0

    def get_loaders(self, df, mode, trans_model = "test"):

        dataset = HappyWhaleDataset(df, mode, transforms=data_transforms[trans_model])
        loader = DataLoader(dataset, batch_size=CONFIG.valid_batch_size,
                            num_workers=CONFIG.num_workers, shuffle=False, pin_memory=True)

        return loader


    def get_embeddings_train(self, data_loader):
        targets_container  = []
        embeddings_container  = []
        names_container = []
        with torch.no_grad():
            for i, (images, targets, image_names) in tqdm(enumerate(data_loader), total=len(data_loader)):

                images = images.to(CONFIG.device, dtype=torch.float)
                embeddings = []
                for model in self.models:
                    embedding = model.inference(images)                   
                    embedding = l2_norm(embedding)                    
                    embeddings.append(embedding.cpu().numpy())
                embedding_final = np.max(np.stack(embeddings), axis=0)
    
                                        
                embeddings_container.append(embedding_final)
                targets_container.append(np.array(targets))
                names_container.append(np.array(image_names))

            embeddings_container = np.concatenate(embeddings_container)
            labels_container = np.concatenate(targets_container)
            names_container = np.concatenate(names_container)

        return embeddings_container, labels_container, names_container

    def get_embeddings_test(self, data_loader):
        targets_container  = []
        embeddings_container  = []
        names_container = []
        with torch.no_grad():
            for i, (images, targets, image_names, images_flip, images_rotate) in tqdm(enumerate(data_loader), total=len(data_loader)):

                images = images.to(CONFIG.device, dtype=torch.float)
                embeddings = []
                for model in self.models:
                    embedding = model.inference(images)
                    embedding = l2_norm(embedding)
                    
                    embeddings.append(embedding.cpu().numpy())
                embedding = np.mean(np.stack(embeddings), axis=0)
                embeddings_container.append(embedding)
                targets_container.append(np.array(targets))
                names_container.append(np.array(image_names))

                images_flip = images_flip.to(CONFIG.device, dtype=torch.float)
                embeddings = []
                for model in self.models:
                    embedding = model.inference(images_flip)
                    embedding = l2_norm(embedding)
                    embeddings.append(embedding.cpu().numpy())
                embedding = np.mean(np.stack(embeddings), axis=0)
                embeddings_container.append(embedding)
                targets_container.append(np.array(targets))
                names_container.append(np.array(image_names))
                
                images_rotate = images_rotate.to(CONFIG.device, dtype=torch.float)
                embeddings = []
                for model in self.models:
                    embedding = model.inference(images_rotate)
                    embedding = l2_norm(embedding)
                    embeddings.append(embedding.cpu().numpy())
                embedding = np.mean(np.stack(embeddings), axis=0)
                embeddings_container.append(embedding)
                targets_container.append(np.array(targets))
                names_container.append(np.array(image_names))



            embeddings_container = np.concatenate(embeddings_container)
            labels_container = np.concatenate(targets_container)
            names_container = np.concatenate(names_container)

        return embeddings_container, labels_container, names_container

    def get_Neighbors_dis(self, neigh, embeddings, names, train_targets):
        test_nn_distances, test_nn_idxs = neigh.kneighbors(embeddings, CONFIG.KNN, return_distance=True)

        test_df = []
        for i in tqdm(range(len(names))):
            id_ = names[i]
            labels = train_targets[test_nn_idxs[i]]
            distances = test_nn_distances[i]

            subset_preds = pd.DataFrame(np.stack([labels, distances], axis=1), columns=['target', 'distances'])

            subset_preds['target'] = subset_preds['target'].astype('int')
            subset_preds['distances'] = subset_preds['distances'].astype('float')
            subset_preds['image'] = id_
            test_df.append(subset_preds)

        test_df = pd.concat(test_df).reset_index(drop=True)
        test_df['confidence'] = 1 - test_df['distances']

        return test_df


    def get_center(self, embeddings, type):
        embeddings = np.concatenate(embeddings)
        if type == "mean":
            embeddings = np.mean(embeddings, axis=0)
        if type == "max":
            embeddings = np.max(embeddings, axis=0)
        return embeddings

    def get_center_features(self, train_embeddings, train_targets, type = "mean"):
        labels_to_embeddings = {}
        for i, label in enumerate(train_targets):
            if label not in labels_to_embeddings.keys():
                labels_to_embeddings[label] = []
            labels_to_embeddings[label].append(train_embeddings[i].reshape(1, -1))

        for label in labels_to_embeddings.keys():
            embeddings = self.get_center(labels_to_embeddings[label], type)
            labels_to_embeddings[label] = embeddings

        return np.array(list(labels_to_embeddings.values())), np.array(list(labels_to_embeddings.keys()))


    def get_test_Neighbors(self, name):

        train_loader = self.get_loaders(self.train_df, mode="test_valid")
        train_embeddings1, train_targets1, train_names = self.get_embeddings_train(train_loader)

        train_embeddings2, train_targets2 = self.get_center_features(train_embeddings1, train_targets1, type="mean")

        train_embeddings = np.concatenate([train_embeddings1, train_embeddings2])
        train_targets = np.concatenate([train_targets1, train_targets2])
        
        #pd.DataFrame(train_embeddings).to_csv("{}_train_embeddings.csv".format(name), index = False)
        #pd.DataFrame(train_targets).to_csv("{}_train_targets.csv".format(name), index = False)
        neigh = NearestNeighbors(n_neighbors=CONFIG.KNN, metric='cosine')
        neigh.fit(train_embeddings)
        
        
        '''
        test_nums = self.test_df.shape[0]
        print (test_nums)
        test_nums_half = int(test_nums / 2)
        
        df_1 = self.test_df.iloc[:test_nums_half]
        print(df_1.shape)
        test_loader1 = self.get_loaders(df_1, mode="test_tta")
        test_embeddings1, _, test_names1 = self.get_embeddings_test(test_loader1)
        test_df1 = self.get_Neighbors_dis(neigh, test_embeddings1, test_names1, train_targets)
        test_df1.to_csv("{}_test_df_1_gray.csv".format(name), index=False)
        
        df_2 = self.test_df.iloc[test_nums_half:]
        print(df_2.shape)
        test_loader2 = self.get_loaders(df_2, mode="test_tta")
        test_embeddings2, _, test_names2 = self.get_embeddings_test(test_loader2)
        test_df2 = self.get_Neighbors_dis(neigh, test_embeddings2, test_names2, train_targets)
        test_df2.to_csv("{}_test_df_2_gray.csv".format(name), index=False)
        
        test_df = pd.concat([test_df1, test_df2])
       
        '''
        test_loader = self.get_loaders(self.test_df, mode="test_tta")
        test_embeddings, _, test_names = self.get_embeddings_test(test_loader)
        test_df = self.get_Neighbors_dis(neigh, test_embeddings, test_names, train_targets)
        test_df.to_csv("{}_test_df.csv".format(name), index=False)
 
    
        
        test_df = test_df.groupby(
            ['image', 'target']).confidence.max().reset_index()

        test_df = test_df.sort_values('confidence', ascending=False).reset_index(drop=True)

        test_df['target'] = test_df['target'].map(self.inverse_encodings)

        test_df.image.value_counts().value_counts()

        best_threshold_adjusted = 0.7

        predictions = self.get_predictions(test_df, threshold=best_threshold_adjusted)

        for x in predictions:
            predictions[x] = ' '.join(predictions[x])

        predictions = pd.Series(predictions).reset_index()
        predictions.columns = ['image', 'predictions']
        predictions.to_csv(self.resuled_csv_path, index=False)
        predictions.head()




    def get_predictions(self, dis_df, threshold=0.2):
        predictions = {}

        for i, row in dis_df.iterrows():

            if row.image in predictions:
                if len(predictions[row.image]) == 5:
                    continue
                predictions[row.image].append(row.target)
            elif row.confidence > threshold:
                predictions[row.image] = [row.target, 'new_individual']
            else:
                predictions[row.image] = ['new_individual', row.target]

        for x in predictions:
            if len(predictions[x]) < 5:
                remaining = [y for y in sample_list if y not in predictions]
                predictions[x] = predictions[x] + remaining
                predictions[x] = predictions[x][:5]

        return predictions


    def get_Neighbors_score(self, name, fold):

        '''

        df_train = self.train_df[self.train_df.kfold != fold].reset_index(drop=True)
        df_valid = self.train_df[self.train_df.kfold == fold].reset_index(drop=True)
        '''


        valid_proportion = 0.1

        df_valid = self.train_df.sample(frac=valid_proportion, replace=False, random_state=1).copy()
        df_train = self.train_df[~self.train_df['image'].isin(df_valid['image'])].copy()


        train_targets = np.array(df_train.individual_id)
        valid_names = np.array(df_valid.image)
        valid_targets = np.array(df_valid.individual_id)


        train_loader = self.get_loaders(df_train, mode="test_valid")
        valid_loader = self.get_loaders(df_valid, mode="test_valid")
        train_embeddings1, train_targets1, train_names = self.get_embeddings_train(train_loader)
        valid_embeddings, valid_targets, valid_names = self.get_embeddings_train(valid_loader)

        
        train_embeddings2, train_targets2 = self.get_center_features(train_embeddings1, train_targets1, type="mean")

        train_embeddings = np.concatenate([train_embeddings1, train_embeddings2])
        train_targets = np.concatenate([train_targets1, train_targets2])


        neigh = NearestNeighbors(n_neighbors=CONFIG.KNN, metric='cosine')
        neigh.fit(train_embeddings)


        test_df = self.get_Neighbors_dis(neigh, valid_embeddings, valid_names, train_targets)

        test_df.to_csv(name + "_train0.1_test_df.csv", index=False)


        test_df = test_df.groupby(['image', 'target']).confidence.max().reset_index()

        test_df = test_df.sort_values('confidence', ascending=False).reset_index(drop=True)
        test_df['target'] = test_df['target'].map(self.inverse_encodings)

        allowed_targets = set([self.inverse_encodings[x] for x in np.unique(train_targets)])
        val_targets_df = pd.DataFrame(np.stack([valid_names, valid_targets], axis=1), columns=['image', 'target'])
        val_targets_df['target'] = val_targets_df['target'].astype(int).map(self.inverse_encodings)


        val_targets_df.loc[~val_targets_df.target.isin(allowed_targets), 'target'] = 'new_individual'
        val_targets_df.target.value_counts()

        ## Compute CV
        best_cv = 0
        for th in [0.01 * x for x in range(60, 90)]:
            all_preds = self.get_predictions(test_df, threshold=th)
            for i, row in val_targets_df.iterrows():
                target = row.target
                preds = all_preds[row.image]
                val_targets_df.loc[i, th] = self.map_per_image(target, preds)
            cv = val_targets_df[th].mean()
            print(f"CV at threshold {round(th, 2)}: {cv}")
            if cv > best_cv:
                best_th = th
                best_cv = cv 


                
weight1 = "../input/v38-b7-512-29/best_v38_b7_512_fold0_epoch_29_Loss0.9888.bin"

weights = [weight1]
name = (weight1.split('/')[-1])[:-4]
resuled_csv_path = "./{}_0.7.csv".format( name)

comper = EmbedFeatureCom(weights, resuled_csv_path)
comper.get_Neighbors_score(name, fold=0)
#comper.get_test_Neighbors(name)