In [None]:
'''
20200826
这是一个start code,用来学习reid项目。
仅仅使用官方给的train数据进行训练，没有数据增强，没有使用外部数据。
使用densenet121网络训练和抽取特征。
使用fp16进行训练。
没有re-ranking。
lb 0.2357

代码的基本框架来源于郑哲东大神的开源项目：https://github.com/layumi/Person_reID_baseline_pytorch
环境：pytorch 1.4
训练10个epoch，就能得到0.23x的分数。
'''

In [1]:
import numpy as np
import pandas as pd

In [2]:
# train_df=pd.read_csv('../train/label.txt',sep=':',header=None,names=['img_id','target']).sample(frac=1,random_state=42)
train_df=pd.read_csv('../train/label.txt',sep=':',header=None,names=['img_id','target'])
train_df

Unnamed: 0,img_id,target
0,00040591.png,15178
1,00066284.png,15178
2,00025569.png,15178
3,00024054.png,15178
4,00028221.png,4664
...,...,...
72819,00024418.png,4332
72820,00029689.png,15371
72821,00026579.png,15371
72822,00017589.png,17149


In [3]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72824 entries, 0 to 72823
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   img_id  72824 non-null  object
 1   target  72824 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 1.1+ MB


In [4]:
train_df['target'].value_counts()

8499     779
1107     749
7753     657
14331    495
17495    477
        ... 
2656       1
5288       1
12839      1
16072      1
12250      1
Name: target, Length: 19658, dtype: int64

In [6]:
# import argparse
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
from torchvision import datasets, transforms
import torch.backends.cudnn as cudnn
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
#from PIL import Image
import time
import os
# from model import ft_net, ft_net_dense, ft_net_NAS, PCB
# from random_erasing import RandomErasing
# import yaml
import math
from shutil import copyfile

In [7]:
#fp16
try:
    from apex.fp16_utils import *
    from apex import amp, optimizers
except ImportError: # will be 3.x series
    print('This is not an error. If you want to use low precision, i.e., fp16, please install the apex with cuda support (https://github.com/NVIDIA/apex) and update pytorch to 1.0')
######################################################################

In [8]:
import random
from torch.utils.data import Dataset, DataLoader
import cv2
fp16 = True
DIR_INPUT = '../train'
name = 'reid'# 模型保存的目录

gpu_ids = [0,1]

if not os.path.exists('./model/'+name):
    os.mkdir('./model/'+name)
    
def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  # type: ignore
    torch.backends.cudnn.deterministic = True  # type: ignore
    torch.backends.cudnn.benchmark = True  # type: ignore

In [9]:
set_seed()

## 设置数据增强

In [12]:
import albumentations as A
from albumentations.pytorch import ToTensorV2
transforms_train = A.Compose([
#     A.Resize(height=256, width=128, p=1.0),
#     A.RandomResizedCrop(height=256, width=128, p=1.0),
#     A.Flip(),
#     A.ShiftScaleRotate(rotate_limit=1.0, p=0.8),

    # Pixels
#     A.OneOf([
#         A.IAAEmboss(p=1.0),
#         A.IAASharpen(p=1.0),
#         A.Blur(p=1.0),
#     ], p=0.5),

    # Affine
#     A.OneOf([
#         A.ElasticTransform(p=1.0),
#         A.IAAPiecewiseAffine(p=1.0)
#     ], p=0.5),

    A.Normalize(p=1.0),
    ToTensorV2(p=1.0),
])

transforms_valid = A.Compose([
#     A.Resize(height=256, width=128, p=1.0),
    A.Normalize(p=1.0),
    ToTensorV2(p=1.0),
])

In [47]:
class ReIDDataset(Dataset):
    
    def __init__(self, df, transforms=None):
    
        self.df = df
        self.transforms=transforms
        
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, idx):
        image_src = DIR_INPUT + '/images/' + self.df.loc[idx, 'img_id']

        image = cv2.imread(image_src, cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        labels = self.df.loc[idx, ['target']].values
        labels = torch.from_numpy(labels.astype(np.int8))
        labels = labels.squeeze(-1)
        
        if self.transforms:
            transformed = self.transforms(image=image)
            image = transformed['image']


        return image, labels

    
#读取速度更快
class ReIDDataset2(Dataset):
    
    def __init__(self, df,targets, transforms=None):
    
        self.df = df
        self.targets=targets
        self.transforms=transforms
        
    def __len__(self):

        return len(self.targets)
    
    def __getitem__(self, idx):

        image_src = DIR_INPUT + '/images/' + self.df.loc[idx, 'img_id']

        image = cv2.imread(image_src, cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        labels=self.targets[idx]

        labels=torch.tensor(labels)

        
        if self.transforms:
            transformed = self.transforms(image=image)
            image = transformed['image']


        return image, labels

# 为提交文件而设计的
class ReIDDatasetTest(Dataset):
    
    def __init__(self, df, transforms=None):
    
        self.df = df
        
        self.transforms=transforms
        
    def __len__(self):
        return self.df.shape[0]

    
    def __getitem__(self, idx):

        image_src =  self.df.loc[idx, 'img_id']

        image = cv2.imread(image_src, cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        
        if self.transforms:
            transformed = self.transforms(image=image)
            image = transformed['image']

        return image
    
    
# class ReIDUnlabeledDataset(Dataset):
    
#     def __init__(self, df, transforms_weak=None, transforms_strong=None):
    
#         self.df = df
#         self.transforms_weak=transforms_weak
#         self.transforms_strong=transforms_strong
        
#     def __len__(self):
#         return self.df.shape[0]
    
#     def __getitem__(self, idx):
#         image_src = DIR_INPUT + '/images/' + self.df.loc[idx, 'img_id'] + '.jpg'
#         # print(image_src)
#         image = cv2.imread(image_src, cv2.IMREAD_COLOR)
#         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        
#         if self.transforms_strong:
#             transformed = self.transforms_strong(image=image)
#             image_new = transformed['image']
#             if self.transforms_weak:
#                 transformed = self.transforms_weak(image=image)
#                 image = transformed['image']
#             return image,image_new
        
#         return image,image    

In [16]:
from sklearn.model_selection import KFold, StratifiedKFold,StratifiedShuffleSplit,train_test_split

In [None]:
#把原始训练数据分割成0.8:0.2比例的train和valid数据

In [18]:
X_train, X_val, y_train, y_val=train_test_split(train_df,train_df.target.values, test_size=0.2, random_state=42)
print(X_train.shape,y_train.shape)

(58259, 2) (58259,)


In [19]:

DEBUG=False#True
if DEBUG:
    X_train=X_train.sample(n=10000,random_state=0)
    y_train=X_train.target.values

In [20]:
#避免index error 
X_train.reset_index(inplace=True)
X_val.reset_index(inplace=True)

In [21]:
# dataset_train = ReIDDataset(df=X_train, transforms=transforms_train)
dataset_train = ReIDDataset2(df=X_train,targets=X_train.target.values, transforms=transforms_train)

BATCH_SIZE=64#16
VAL_BATCH_SIZE=4*BATCH_SIZE
dataloader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE, num_workers=4, shuffle=True,pin_memory=True)

In [22]:
#测试dataloader
for x,y in dataloader_train:
    print(x.size(),y.size())
    break

torch.Size([64, 3, 256, 128]) torch.Size([64])


In [23]:
dataset_valid = ReIDDataset2(df=X_val,targets=X_val.target.values, transforms=transforms_train)
dataloader_valid = DataLoader(dataset_valid, batch_size=VAL_BATCH_SIZE, num_workers=2, shuffle=False,pin_memory=True)

# model

In [24]:
from model import ft_net, ft_net_dense, ft_net_NAS, PCB

# train

In [26]:
######################################################################
# Save model
#---------------------------

def save_network(network, epoch_label):
    save_filename = 'net_%s.pth'% epoch_label
    save_path = os.path.join('./model',name,save_filename)
    torch.save(network.cpu().state_dict(), save_path)
    if torch.cuda.is_available():
        network.cuda(gpu_ids[0])

In [27]:
y_loss = {} # loss history
y_loss['train'] = []
y_loss['val'] = []
y_err = {}
y_err['train'] = []
y_err['val'] = []


WARM_EPOCH=5
# NUM_EPOCHS=10
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    #best_model_wts = model.state_dict()
    #best_acc = 0.0
    warm_up = 0.1 # We start from the 0.1*lrRate
    warm_iteration = round(len(dataset_train)/BATCH_SIZE)*WARM_EPOCH # first 5 epoch

    for epoch in range(NUM_EPOCHS):
        print('Epoch {}/{}'.format(epoch, NUM_EPOCHS - 1))
        print('-' * 10)
        
        # Each epoch has a training and validation phase
        scheduler.step()
        model.train(True)  # Set model to training mode


        running_loss = 0.0
        running_corrects = 0.0
        # Iterate over data.
        for data in dataloader_train:
            # get the inputs
            inputs, labels = data

            now_batch_size,c,h,w = inputs.shape

            inputs = inputs.cuda()
            labels = labels.cuda()
                       

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward            
            outputs = model(inputs)
        
            
            _, preds = torch.max(outputs.data, 1)
            loss = criterion(outputs, labels)


            # backward + optimize only if in training phase
            if epoch<WARM_EPOCH : 
                warm_up = min(1.0, warm_up + 0.9 / warm_iteration)
                loss *= warm_up

            
            if fp16: # we use optimier to backward loss
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()
            optimizer.step()

            # statistics
            # for the new version like 0.4.0, 0.5.0 and 1.0.0
            running_loss += loss.item() * now_batch_size
            
            running_corrects += float(torch.sum(preds == labels.data))

        epoch_loss = running_loss / len(dataset_train)
        epoch_acc = running_corrects / len(dataset_train)

        print('{} Loss: {:.4f} Acc: {:.4f}'.format(
            'train', epoch_loss, epoch_acc))

        y_loss['train'].append(epoch_loss)
        y_err['train'].append(1.0-epoch_acc)  
        
        #valid the model
        running_loss = 0.0
        running_corrects = 0.0
        with torch.no_grad():
            for data in dataloader_valid:
                # get the inputs
                inputs, labels = data
                now_batch_size,c,h,w = inputs.shape
                
                inputs = inputs.cuda()
                labels = labels.cuda()
                outputs = model(inputs)
                
                _, preds = torch.max(outputs.data, 1)
                loss = criterion(outputs, labels)
                running_loss += loss.item() * now_batch_size
            
                running_corrects += float(torch.sum(preds == labels.data))
                
            epoch_loss = running_loss / len(dataset_valid)
            epoch_acc = running_corrects / len(dataset_valid)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                'valid', epoch_loss, epoch_acc))

            y_loss['val'].append(epoch_loss)
            y_err['val'].append(1.0-epoch_acc) 


        time_elapsed = time.time() - since
        print('Training complete in {:.0f}m {:.0f}s'.format(
            time_elapsed // 60, time_elapsed % 60))
        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    #print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
#     model.load_state_dict(last_model_wts)
    save_network(model, 'last')
    return model


In [28]:
torch.cuda.empty_cache()
#densenet121
model = ft_net_dense(19658, 0.1)

In [29]:
print(model)

ft_net_dense(
  (model): DenseNet(
    (features): Sequential(
      (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu0): ReLU(inplace=True)
      (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (denseblock1): _DenseBlock(
        (denselayer1): _DenseLayer(
          (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu1): ReLU(inplace=True)
          (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu2): ReLU(inplace=True)
          (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        )
        (denselayer2): _DenseLayer(
          (norm1): BatchNorm2d(96, eps=1e-05, momentum=

In [30]:
lr=1e-2

ignored_params = list(map(id, model.classifier.parameters() ))
base_params = filter(lambda p: id(p) not in ignored_params, model.parameters())
optimizer_ft = optim.SGD([
         {'params': base_params, 'lr': 0.1*lr},
         {'params': model.classifier.parameters(), 'lr': lr}
     ], weight_decay=5e-4, momentum=0.9, nesterov=True)

In [31]:
# Decay LR by a factor of 0.1 every 40 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=40, gamma=0.1)

In [32]:
# model to gpu
model = model.cuda()
if fp16:
    model, optimizer_ft = amp.initialize(model, optimizer_ft, opt_level = "O1")

criterion = nn.CrossEntropyLoss()
NUM_EPOCHS=60
model = train_model(model, criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=NUM_EPOCHS)

Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Epoch 0/59
----------




train Loss: 1.7837 Acc: 0.0339
valid Loss: 9.0218 Acc: 0.0558
Training complete in 3m 44s

Epoch 1/59
----------
train Loss: 3.2036 Acc: 0.0808
valid Loss: 8.5045 Acc: 0.1023
Training complete in 7m 20s

Epoch 2/59
----------
Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 65536.0
train Loss: 4.4383 Acc: 0.1247
valid Loss: 8.1080 Acc: 0.1279
Training complete in 10m 56s

Epoch 3/59
----------
train Loss: 5.3931 Acc: 0.1475
valid Loss: 7.6077 Acc: 0.1366
Training complete in 14m 34s

Epoch 4/59
----------
train Loss: 5.8904 Acc: 0.1790
valid Loss: 7.0424 Acc: 0.1574
Training complete in 18m 10s

Epoch 5/59
----------
Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 65536.0
train Loss: 5.3778 Acc: 0.2674
valid Loss: 6.5086 Acc: 0.1939
Training complete in 21m 50s

Epoch 6/59
----------
train Loss: 4.1749 Acc: 0.4057
valid Loss: 6.1260 Acc: 0.2191
Training complete in 25m 29s

Epoch 7/59
----------
train Loss: 2.9127 Acc: 0.6086
valid Loss: 5

Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/tomqin/anaconda3/envs/torch14/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)
  File "/home/tomqin/anaconda3/envs/torch14/lib/python3.6/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/home/tomqin/anaconda3/envs/torch14/lib/python3.6/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/home/tomqin/anaconda3/envs/torch14/lib/python3.6/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
  File "/home/tomqin/anaconda3/envs/torch14/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)
  File "/home/tomqin/anaconda3/envs/torch14/lib/python3.6/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/home/tomqin/anaconda3/env

KeyboardInterrupt: 

In [33]:
#valid the model
running_loss = 0.0
running_corrects = 0.0
with torch.no_grad():
    for data in dataloader_valid:
        # get the inputs
        inputs, labels = data
        now_batch_size,c,h,w = inputs.shape

        inputs = inputs.cuda()
        labels = labels.cuda()
        outputs = model(inputs)

        _, preds = torch.max(outputs.data, 1)
        loss = criterion(outputs, labels)
        running_loss += loss.item() * now_batch_size

        running_corrects += float(torch.sum(preds == labels.data))

    epoch_loss = running_loss / len(dataset_valid)
    epoch_acc = running_corrects / len(dataset_valid)

    print('{} Loss: {:.4f} Acc: {:.4f}'.format(
        'valid', epoch_loss, epoch_acc))

valid Loss: 5.9169 Acc: 0.2673


# Test

In [None]:
#通拿query的一张图片，在gallery目录里查询符合这个query图片的可能图片（可能存在多张）
'''
mAP(mean average precision)：反应检索的人在数据库中所有正确的图片排在排序列表前面的程度，能更加全面的衡量ReID算法的性能。
如下图，假设该检索行人在gallery中有10张图片，在检索的list中位置（rank）分别为1、2、3、4、5、6、7、8、9，
则ap为(1/ 1 + 2 / 2 + 3 / 3 + 4 / 4 + 5 / 5 + 6 / 6 + 7 / 7 + 8 / 8 + 9 / 9) / 10 = 0.90；
ap较大时，该行人的检索结果都相对靠前，对所有query的ap取平均值得到mAP

'''

In [34]:
save_network(model, 'last')

In [None]:
# Change to test mode
model = model.eval()


In [None]:
def compute_mAP(index, good_index, junk_index):
    ap = 0
    cmc = torch.IntTensor(len(index)).zero_()
    if good_index.size==0:   # if empty
        cmc[0] = -1
        return ap,cmc

    # remove junk_index
    mask = np.in1d(index, junk_index, invert=True)
    index = index[mask]

    # find good_index index
    ngood = len(good_index)
    mask = np.in1d(index, good_index)
    rows_good = np.argwhere(mask==True)
    rows_good = rows_good.flatten()
    
    cmc[rows_good[0]:] = 1
    for i in range(ngood):
        d_recall = 1.0/ngood
        precision = (i+1)*1.0/(rows_good[i]+1)
        if rows_good[i]!=0:
            old_precision = i*1.0/rows_good[i]
        else:
            old_precision=1.0
        ap = ap + d_recall*(old_precision + precision)/2

    return ap, cmc


#######################################################################
# Evaluate
def evaluate(qf,ql,qc,gf,gl,gc):
    query = qf.view(-1,1)
    # print(query.shape)
    score = torch.mm(gf,query)
    score = score.squeeze(1).cpu()
    score = score.numpy()
    # predict index
    index = np.argsort(score)  #from small to large
    index = index[::-1]
    # index = index[0:2000]
    # good index
    query_index = np.argwhere(gl==ql)
    camera_index = np.argwhere(gc==qc)

    good_index = np.setdiff1d(query_index, camera_index, assume_unique=True)
    junk_index1 = np.argwhere(gl==-1)
    junk_index2 = np.intersect1d(query_index, camera_index)
    junk_index = np.append(junk_index2, junk_index1) #.flatten())
    
    CMC_tmp = compute_mAP(index, good_index, junk_index)
    return CMC_tmp

In [58]:
def extract_feature(model,dataloaders):
    features = torch.FloatTensor()
    count = 0
    for data in dataloaders:
        img=data.cuda()          
        outputs = model(img) 
        features = torch.cat((features,outputs.data.cpu()), 0)
    return features

## 准备数据

### query（查询）

In [48]:
import glob
#query
query_list=glob.glob(r'../image_A/query/*.png')
query_name_list=[]
for file_path   in query_list:
    file_name=file_path
    query_name_list.append(file_name)

query_df=pd.DataFrame(data={'img_id':query_name_list})
query_df

Unnamed: 0,img_id
0,../image_A/query/00000000.png
1,../image_A/query/00000001.png
2,../image_A/query/00000002.png
3,../image_A/query/00000003.png
4,../image_A/query/00000004.png
...,...
2895,../image_A/query/00002895.png
2896,../image_A/query/00002896.png
2897,../image_A/query/00002897.png
2898,../image_A/query/00002898.png


In [49]:
dataset_query = ReIDDatasetTest(df=query_df, transforms=transforms_train)
dataloader_query = DataLoader(dataset_query, batch_size=VAL_BATCH_SIZE, num_workers=2, shuffle=False,pin_memory=True)

In [53]:
for img in dataloader_query:
    print(img.size())
    break

torch.Size([256, 3, 256, 128])


### gallery（待搜索图库）

In [50]:
#gallery
gallery_list=glob.glob(r'../image_A/gallery/*.png')
gallery_name_list=[]
for file_path   in gallery_list:
    #     file_name=file_path.split('/')[-1]
    file_name=file_path
    gallery_name_list.append(file_name)
#     break
# print((query_list))
gallery_df=pd.DataFrame(data={'img_id':gallery_name_list})
gallery_df

Unnamed: 0,img_id
0,../image_A/gallery/00000000.png
1,../image_A/gallery/00000006.png
2,../image_A/gallery/00000009.png
3,../image_A/gallery/00000012.png
4,../image_A/gallery/00000016.png
...,...
40461,../image_A/gallery/00143698.png
40462,../image_A/gallery/00143707.png
40463,../image_A/gallery/00143708.png
40464,../image_A/gallery/00143710.png


In [72]:
#gallery字典，用于根据索引反向获得对应的图片id
gallery_dict={}
for i,file in enumerate(gallery_df.img_id.values):
    gallery_dict[i]=file.split('/')[-1]

In [51]:
dataset_gallery = ReIDDatasetTest(df=gallery_df, transforms=transforms_train)
dataloader_gallery = DataLoader(dataset_gallery, batch_size=VAL_BATCH_SIZE, num_workers=2, shuffle=False,pin_memory=True)

In [77]:
gallery_dict[0]

'00000000.png'

## 抽取query特征和gallery特征，准备计算距离做排序

In [61]:
# Extract feature
with torch.no_grad():
    query_feature = extract_feature(model,dataloader_query)

In [62]:
query_feature.size()

torch.Size([2900, 19658])

In [63]:
# Extract feature
with torch.no_grad():
    gallery_feature = extract_feature(model,dataloader_gallery)

In [64]:
gallery_feature.size()

torch.Size([40466, 19658])

In [80]:
m, n = query_feature.size(0), gallery_feature.size(0)
distmat = torch.pow(query_feature, 2).sum(dim=1, keepdim=True).expand(m, n) + \
          torch.pow(gallery_feature, 2).sum(dim=1, keepdim=True).expand(n, m).t()
distmat.addmm_(1, -2, query_feature, gallery_feature.t())
distmat = distmat.numpy()




'\nfind rank1 positional information\n'

In [None]:
#排序取前200
indices = np.argsort(distmat,1)
indices_200 = indices[:,:200]

In [68]:
distmat.shape,indices.shape

((2900, 40466), (2900, 40466))

In [69]:
distmat[0],indices[0]

(array([52697.695, 78874.87 , 57114.316, ..., 56367.2  , 49260.613,
        72397.86 ], dtype=float32),
 array([32001, 37225, 16589, ..., 39009, 22941, 15142]))

In [70]:
indices_200.shape

(2900, 200)

## 生成用来导出json的字典

In [149]:
#query_name_list
new_dict={}
for j,a in enumerate(indices_200.tolist()):
    line_arr=[]
    counter+=1
    for i in a:
        line_arr.append(gallery_dict[i])
    key=query_name_list[j].split('/')[-1]

    new_dict[key]=list(line_arr)

In [145]:
#query_name_list
# new_dict={}
# with open("submission_0826.json","w",encoding='utf-8') as fout:
#     fout.write('{')
#     for j,a in enumerate(indices_200.tolist()):
#         line_arr=[]
#         counter+=1
#         str_arr=''
#         for i in a:
#             line_arr.append(gallery_dict[i])
#             str_arr+='\"{}\",'.format(gallery_dict[i])
#         key=query_name_list[j].split('/')[-1]
# #         line_str='\"{}\":[{}]\n'.format(key,str_arr[:-1])
#         line_str='\"{}\":[{}]'.format(key,str_arr[:-1])

#         fout.write(line_str+',')
#     fout.write('}\n')

## 生成提交文件

In [150]:
import json
with open("submission_0826.json","w") as f:
    json.dump(new_dict,f)
    print("加载入文件完成...")

加载入文件完成...


In [151]:
#检查生成的文件
with open("submission_0826.json","r") as fin:
    for line in fin:
        print(line[:6000])
        break

{"00000000.png": ["00113784.png", "00132168.png", "00058828.png", "00111189.png", "00136251.png", "00049582.png", "00043037.png", "00124806.png", "00120209.png", "00106560.png", "00002368.png", "00098689.png", "00128112.png", "00059088.png", "00102430.png", "00128367.png", "00028445.png", "00128727.png", "00046388.png", "00102983.png", "00062888.png", "00114940.png", "00106806.png", "00121973.png", "00044722.png", "00041101.png", "00012701.png", "00055319.png", "00021437.png", "00082180.png", "00083046.png", "00102891.png", "00002120.png", "00093492.png", "00089232.png", "00051046.png", "00045632.png", "00125816.png", "00032149.png", "00110083.png", "00071599.png", "00019856.png", "00041288.png", "00100146.png", "00021927.png", "00081096.png", "00041469.png", "00019634.png", "00125209.png", "00028638.png", "00116918.png", "00041662.png", "00045368.png", "00021149.png", "00075311.png", "00129853.png", "00039597.png", "00010708.png", "00059308.png", "00009919.png", "00119033.png", "00139

In [None]:
print('Done!')
#lb 0.235