In [None]:
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
import torch
import torchvision.models as models
import math
from torch.nn import init
from torchvision import transforms
import torchvision.transforms as transforms
import time
from torch.backends import cudnn
import numpy as np
import json
import cv2

import sys 
sys.path.insert(0, '../input/resnestpy/')
from resnest import resnest50

class ModelResnest50_Embeding(nn.Module):
    def __init__(self, enet="Resnest", num_class=203093, out_dim=1):
        super(ModelResnest50_Embeding, self).__init__()
        base_model = resnest50(pretrained=False)
        self.conv = nn.Sequential(*list(base_model.children())[:-2])
        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1)) #194390
        print("fc , ", num_class)
        self.classifier = nn.Linear(in_features=512, out_features=num_class, bias=True)

        nn.init.xavier_uniform_(self.classifier.weight)
        nn.init.constant_(self.classifier.bias, 0)
        feature = 2048
        self.fc5 = nn.Linear(2048, 512)


        nn.init.xavier_uniform_(self.fc5.weight)
        nn.init.constant_(self.fc5.bias, 0)
        
        self.bn5 = nn.BatchNorm1d(512)
        self.bn5.weight.data.normal_(1.0,0.02) #bn层里初始化γ，服从（1，0.02）的正态分布
        self.bn5.bias.data.fill_(0)  #bn层里初始化β，默认为0

    def forward(self, x):
        x = self.conv(x)
        x = self.avg_pool(x).view(x.size(0), -1)
        x = self.fc5(x)
        x = self.bn5(x)
        feat = x
        x = self.classifier(x)

        return x, feat

transform_test  = transforms.Compose(
    [
     transforms.ToPILImage(),
     transforms.Resize([512, 512]),
     #transforms.CenterCrop((448,448)),
     transforms.ToTensor(),
     transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225))
    ])

In [None]:
import os

def com_cos_dist(x1, x2):
    return np.dot(x1, x2)/(np.linalg.norm(x1)*(np.linalg.norm(x2)))
    

#os.environ['CUDA_VISIBLE_DEVICES'] = '0'
cropsize = 448
resize   = 512
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

transform_test = transforms.Compose([
        transforms.ToPILImage(),
        # transforms.Resize(resize),
        transforms.CenterCrop(cropsize),
        transforms.ToTensor(),
        normalize
        ]
        )


def get_affine_transform(size1, size2):
    src = np.zeros((3, 2), dtype=np.float32)
    dst = np.zeros((3, 2), dtype=np.float32)
    scale1 = size2[0]*1.0/size1[0]
    scale2 = size2[1]*1.0/size1[1]
    scale = min(scale1, scale2)
    # Center to Center
    src[0, :] = [size1[0]/2.0 , size1[1]/2.0]
    dst[0, :] = [size2[0]/2.0 , size2[1]/2.0]

    # Left Center to Left Center Boarder
    src[1, :] = [0.0 , size1[1]/2.0]
    dst[1, :] = [size2[0]/2.0 - scale*size1[0]/2.0 , size2[1]/2.0]

    # Top Center to Top Center Boader
    src[2, :] = [ size1[0]/2.0, 0.0]
    dst[2, :] = [ size2[0]/2.0 , size2[1]/2.0 - scale*size1[1]/2.0 ]
    trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
    return trans


class Feature_Extract():
    def __init__(self, ckpt_path=None, transforms = None ):
        
        if ckpt_path is None:
            ckpt_path = os.path.dirname(path) + "resnest-0038.pth"
        NUM_CLASS = 203093
        model = ModelResnest50_Embeding(num_class=NUM_CLASS)
        
        #transform_train, transform_test = get_cub_transform()
        from collections import OrderedDict
        #ckpt_path = "checkpoints_multi/resnest_" + str(RESUME_EPOACH).zfill(4) + ".pth"
        ckpt = torch.load(ckpt_path, map_location=lambda storage, loc: storage)
        #print(ckpt['model'])
        state_dict = ckpt['model']
        new_state_dict = OrderedDict()

        for k, v in state_dict.items():
            #print(k,v)
            name = k[7:]  # remove `module.`
            new_state_dict[name] = v
        model.load_state_dict(new_state_dict)
        print("finish resume ... ")
        
        #self.model_dict = model.cuda().eval()
        self.model_dict = model.eval()
        self.Size = (512, 512)
        self.transforms = transforms
        
    def run(self, cv2_img):        
        img = cv2_img[:, :, ::-1]
        img_w, img_h = img.shape[1], img.shape[0]
        trans_input = get_affine_transform((img_w, img_h), self.Size)
        image = cv2.warpAffine(img, trans_input, self.Size, flags=cv2.INTER_LINEAR)
        
        if self.transforms :
            image = self.transforms(image)
        #image = image.reshape(1, 3, cropsize, cropsize).cuda()
        image = image.reshape(1, 3, cropsize, cropsize)
        res = {}
        with torch.no_grad():
            #print("image shape is ", image.shape)
            _, logits_m = self.model_dict(image)
            #print("logits_m shape is ", logits_m)
            res["fea"] = logits_m
            return res

In [None]:
import copy
import csv
import gc
import operator
import os
import pathlib
import shutil
import cv2

import numpy as np
import PIL
import pydegensac
from scipy import spatial
import tensorflow as tf
import sys
import json
#sys.path.append("/root/mnt/shuhuigao/workspace/backup_from_venus/classification/extract_retrieval_feature/")

import torch
#from extract_retrieval_feature import inference
#from extract_retrieval_feature import tfs
import torchvision.transforms as transforms

NUM_EMBEDDING_DIMENSIONS = 512 
# Dataset parameters:
INPUT_DIR = os.path.join('../', 'input/')
#INPUT_DIR = os.path.join('', '')

DATASET_DIR = os.path.join(INPUT_DIR, 'landmark-recognition-2021')
#DATASET_DIR = os.path.join(INPUT_DIR, '')
TEST_IMAGE_DIR = os.path.join(DATASET_DIR, 'test')
TRAIN_IMAGE_DIR = os.path.join(DATASET_DIR, 'train')
TRAIN_LABELMAP_PATH = os.path.join(DATASET_DIR, 'train.csv')

# DEBUGGING PARAMS:
NUM_PUBLIC_TRAIN_IMAGES = 1580470  # Used to detect if in session or re-run.
MAX_NUM_EMBEDDINGS = -1  # Set to > 1 to subsample dataset while debugging.

# Retrieval & re-ranking parameters:
NUM_TO_RERANK = 6
TOP_K = 3  # Number of retrieved images used to make prediction for a test image.

# RANSAC parameters:
MAX_INLIER_SCORE = 26
MAX_REPROJECTION_ERROR = 6.0
MAX_RANSAC_ITERATIONS = 900000
HOMOGRAPHY_CONFIDENCE = 0.95

# DELG model:
SAVED_MODEL_DIR = '../input/delg-saved-models/local_and_global'
#DELG_MODEL = tf.saved_model.load(SAVED_MODEL_DIR)
DELG_IMAGE_SCALES_TENSOR = tf.convert_to_tensor([0.70710677, 1.0, 1.4142135])
DELG_SCORE_THRESHOLD_TENSOR = tf.constant(175.)
DELG_INPUT_TENSOR_NAMES = [
    'input_image:0', 'input_scales:0', 'input_abs_thres:0'
]
"""
# Global feature extraction:
NUM_EMBEDDING_DIMENSIONS = 2048
GLOBAL_FEATURE_EXTRACTION_FN = DELG_MODEL.prune(DELG_INPUT_TENSOR_NAMES,
                                                ['global_descriptors:0'])

# Local feature extraction:
LOCAL_FEATURE_NUM_TENSOR = tf.constant(1000)
LOCAL_FEATURE_EXTRACTION_FN = DELG_MODEL.prune(
    DELG_INPUT_TENSOR_NAMES + ['input_max_feature_num:0'],
    ['boxes:0', 'features:0'])
"""

def to_hex(image_id) -> str:
    return '{0:0{1}x}'.format(image_id, 16)


def get_image_path(subset, image_id):
    name = to_hex(image_id)
    return os.path.join(DATASET_DIR, subset, name[0], name[1], name[2],
                        '{}.jpg'.format(name))


def load_image_tensor(image_path):
    return tf.convert_to_tensor(
        np.array(PIL.Image.open(image_path).convert('RGB')))


def extract_global_features(image_root_dir):
    """Extracts embeddings for all the images in given `image_root_dir`."""
    cropsize = 448
    resize   = 512
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])


    #model define  and transformer
    transform_test = transforms.Compose([
        transforms.ToPILImage(),
        # transforms.Resize(resize),
        transforms.CenterCrop(cropsize),
        transforms.ToTensor(),
        normalize
        ])

    model = Feature_Extract(ckpt_path="../input/ckpt-init-true/resnest_0015.pth", transforms=transform_test)
    image_paths = [x for x in pathlib.Path(image_root_dir).rglob('*.jpg')]
    #image_paths = image_paths[:100]
    

    #print( "image_paths is ", image_paths )
    num_embeddings = len(image_paths)
    if MAX_NUM_EMBEDDINGS > 0:
        num_embeddings = min(MAX_NUM_EMBEDDINGS, num_embeddings)

    ids = num_embeddings * [None]
    embeddings = np.empty((num_embeddings, NUM_EMBEDDING_DIMENSIONS))

    for i, image_path in enumerate(image_paths):
        if i >= num_embeddings:
            break

        ids[i] = int(image_path.name.split('.')[0], 16)
        #print(" id i",ids[i] ,image_path, image_path.name )
        #image_tensor = load_image_tensor(image_path)
        image_tensor = cv2.imread(str(image_path))
        features = model.run(image_tensor)
        feature = features["fea"]
        feature = feature / (torch.norm(feature, keepdim=True) + 1e-6)
        feature = feature.view(-1).detach().float().cpu().numpy()
        embeddings[i, :] = feature

        #features = GLOBAL_FEATURE_EXTRACTION_FN(image_tensor,
        #                                        DELG_IMAGE_SCALES_TENSOR,
        #                                        DELG_SCORE_THRESHOLD_TENSOR)

        #embeddings[i, :] = tf.nn.l2_normalize(
        #    tf.reduce_sum(features[0], axis=0, name='sum_pooling'),
        #    axis=0,
        #    name='final_l2_normalization').numpy()

    return ids, embeddings


def extract_local_features(image_path):
    """Extracts local features for the given `image_path`."""

    image_tensor = load_image_tensor(image_path)

    features = LOCAL_FEATURE_EXTRACTION_FN(image_tensor, DELG_IMAGE_SCALES_TENSOR,
                                           DELG_SCORE_THRESHOLD_TENSOR,
                                           LOCAL_FEATURE_NUM_TENSOR)

    # Shape: (N, 2)
    keypoints = tf.divide(
        tf.add(
            tf.gather(features[0], [0, 1], axis=1),
            tf.gather(features[0], [2, 3], axis=1)), 2.0).numpy()

    # Shape: (N, 128)
    descriptors = tf.nn.l2_normalize(
        features[1], axis=1, name='l2_normalization').numpy()

    return keypoints, descriptors


def get_putative_matching_keypoints(test_keypoints,
                                    test_descriptors,
                                    train_keypoints,
                                    train_descriptors,
                                    max_distance=0.9):
    """Finds matches from `test_descriptors` to KD-tree of `train_descriptors`."""

    train_descriptor_tree = spatial.cKDTree(train_descriptors)
    _, matches = train_descriptor_tree.query(
        test_descriptors, distance_upper_bound=max_distance)

    test_kp_count = test_keypoints.shape[0]
    train_kp_count = train_keypoints.shape[0]

    test_matching_keypoints = np.array([
        test_keypoints[i,]
        for i in range(test_kp_count)
        if matches[i] != train_kp_count
    ])
    train_matching_keypoints = np.array([
        train_keypoints[matches[i],]
        for i in range(test_kp_count)
        if matches[i] != train_kp_count
    ])

    return test_matching_keypoints, train_matching_keypoints


def get_num_inliers(test_keypoints, test_descriptors, train_keypoints,
                    train_descriptors):
    """Returns the number of RANSAC inliers."""

    test_match_kp, train_match_kp = get_putative_matching_keypoints(
        test_keypoints, test_descriptors, train_keypoints, train_descriptors)

    if test_match_kp.shape[
        0] <= 4:  # Min keypoints supported by `pydegensac.findHomography()`
        return 0

    try:
        _, mask = pydegensac.findHomography(test_match_kp, train_match_kp,
                                            MAX_REPROJECTION_ERROR,
                                            HOMOGRAPHY_CONFIDENCE,
                                            MAX_RANSAC_ITERATIONS)
    except np.linalg.LinAlgError:  # When det(H)=0, can't invert matrix.
        return 0

    return int(copy.deepcopy(mask).astype(np.float32).sum())


def get_total_score(num_inliers, global_score):
    local_score = min(num_inliers, MAX_INLIER_SCORE) / MAX_INLIER_SCORE
    return local_score + global_score


def rescore_and_rerank_by_num_inliers(test_image_id,
                                      train_ids_labels_and_scores):
    """Returns rescored and sorted training images by local feature extraction."""

    test_image_path = get_image_path('test', test_image_id)
    test_keypoints, test_descriptors = extract_local_features(test_image_path)

    for i in range(len(train_ids_labels_and_scores)):
        train_image_id, label, global_score = train_ids_labels_and_scores[i]

        train_image_path = get_image_path('train', train_image_id)
        train_keypoints, train_descriptors = extract_local_features(
            train_image_path)

        num_inliers = get_num_inliers(test_keypoints, test_descriptors,
                                      train_keypoints, train_descriptors)
        total_score = get_total_score(num_inliers, global_score)
        train_ids_labels_and_scores[i] = (train_image_id, label, total_score)

    train_ids_labels_and_scores.sort(key=lambda x: x[2], reverse=True)

    return train_ids_labels_and_scores


def load_labelmap():
    with open(TRAIN_LABELMAP_PATH, mode='r') as csv_file:
        csv_reader = csv.DictReader(csv_file)
        labelmap = {row['id']: row['landmark_id'] for row in csv_reader}

    return labelmap


def get_prediction_map(test_ids, train_ids_labels_and_scores):
    """Makes dict from test ids and ranked training ids, labels, scores."""

    prediction_map = dict()

    for test_index, test_id in enumerate(test_ids):
        hex_test_id = to_hex(test_id)

        aggregate_scores = {}
        for _, label, score in train_ids_labels_and_scores[test_index][:TOP_K]:
            if label not in aggregate_scores:
                aggregate_scores[label] = 0
            aggregate_scores[label] += score

        label, score = max(aggregate_scores.items(), key=operator.itemgetter(1))

        prediction_map[hex_test_id] = {'score': score, 'class': label}

    return prediction_map

def load_train_feature(f_in_path):

    f_in = open(f_in_path)
    line_lib = f_in.readline()
    list_fea = []
    list_label = []
    list_img_dir = []
    
    ind_line = 0

    while(line_lib):
        ind_line += 1
        if ind_line > 200000:
            break

        img_id, img_cls, img_dir, fea = line_lib.strip().split("\t")
        fea = json.loads(fea)
        fea = [round(i,8) for i in fea]
        list_fea.append(np.array(fea).astype(np.float32))
        current = copy.deepcopy(img_cls)
        #index.add()

        list_label.append(current)
        #list_img_dir.append(img_dir)
        #ids[i] = int(image_path.split("/")[-1].split('.')[0], 16)
        list_img_dir.append(int(img_dir.split("/")[-1].split('.')[0], 16))


        line_lib = f_in.readline()
        
    return list_fea, list_label, list_img_dir


def get_predictions(labelmap):
    """Gets predictions using embedding similarity and local feature reranking."""
    
    # extract test feature
    test_ids, test_embeddings = extract_global_features(TEST_IMAGE_DIR)

    # load train feature
    #f_in = "fea_lib_class_resnest_base_true_15_img_lst_train.txt"
    f_in = "../input/lib-init/fea_lib_class_resnest_base_true_15_img_lst_train_round8.txt"
    list_fea, list_label, train_ids_old = load_train_feature(f_in)
    
    train_embeddings = np.array(list_fea)
    train_ids = train_ids_old  #[id_it.split("/")[-1].split(".")[0] for id_it in train_ids_old]

    #image_path.name.split('.')[0], 16

    #test_ids, test_embeddings = extract_global_features(TEST_IMAGE_DIR)

    #train_ids, train_embeddings = extract_global_features(TRAIN_IMAGE_DIR)
    #--------------------next need change-----------------------
    train_ids_labels_and_scores = [None] * test_embeddings.shape[0]

    # Using (slow) for-loop, as distance matrix doesn't fit in memory.
    for test_index in range(test_embeddings.shape[0]):
        distances = spatial.distance.cdist(
            test_embeddings[np.newaxis, test_index, :], train_embeddings,
            'cosine')[0]
        partition = np.argpartition(distances, NUM_TO_RERANK)[:NUM_TO_RERANK]

        nearest = sorted([(train_ids[p], distances[p]) for p in partition],
                         key=lambda x: x[1])
        #print(nearest)

        train_ids_labels_and_scores[test_index] = [
            (train_id, labelmap[to_hex(train_id)], 1. - cosine_distance)
            for train_id, cosine_distance in nearest
        ]

    del test_embeddings
    del train_embeddings
    del labelmap
    gc.collect()
    

    pre_verification_predictions = get_prediction_map(
        test_ids, train_ids_labels_and_scores)
    print("pre_verification_predictions is ", pre_verification_predictions )

    return None, pre_verification_predictions
    """
    for test_index, test_id in enumerate(test_ids):
        train_ids_labels_and_scores[test_index] = rescore_and_rerank_by_num_inliers(
            test_id, train_ids_labels_and_scores[test_index])

    post_verification_predictions = get_prediction_map(
        test_ids, train_ids_labels_and_scores)

    return pre_verification_predictions, post_verification_predictions"""


def save_submission_csv(predictions=None):
    """Saves optional `predictions` as submission.csv.

  The csv has columns {id, landmarks}. The landmarks column is a string
  containing the label and score for the id, separated by a ws delimeter.

  If `predictions` is `None` (default), submission.csv is copied from
  sample_submission.csv in `IMAGE_DIR`.

  Args:
    predictions: Optional dict of image ids to dicts with keys {class, score}.
  """

    if predictions is None:
        # Dummy submission!
        shutil.copyfile(
            os.path.join(DATASET_DIR, 'sample_submission.csv'), 'submission.csv')
        return

    with open('submission.csv', 'w') as submission_csv:
        csv_writer = csv.DictWriter(submission_csv, fieldnames=['id', 'landmarks'])
        csv_writer.writeheader()
        for image_id, prediction in predictions.items():
            label = prediction['class']
            score = prediction['score']
            csv_writer.writerow({'id': image_id, 'landmarks': f'{label} {score}'})


def main():
    labelmap = load_labelmap()
    """
    
    num_training_images = len(labelmap.keys())
    print(f'Found {num_training_images} training images.')

    if num_training_images == NUM_PUBLIC_TRAIN_IMAGES:
        print(
            f'Found {NUM_PUBLIC_TRAIN_IMAGES} training images. Copying sample submission.'
        )
        save_submission_csv()
        return

    _, post_verification_predictions = get_predictions(labelmap)
    save_submission_csv(post_verification_predictions)
    """
    _, post_verification_predictions = get_predictions(labelmap)
    print("test_fea shape", post_verification_predictions )
    save_submission_csv(post_verification_predictions)


if __name__ == '__main__':
    main()