In [None]:
!git clone https://github.com/CellProfiling/HPA-competition-solutions.git

In [None]:
import sys
sys.path.append('./HPA-competition-solutions/bestfitting/src/')

# Need to restart the kernel after this

In [None]:
from config.config import *
from networks.densenet import *
from networks.resnet_ml import *
from datasets.tool import *
from torch.utils.data import Dataset
import pandas as pd
import torchvision
from tqdm import tqdm
import torch
import cv2
import os

In [None]:
opj = os.path.join

In [None]:
def read_rgby_test(img_id):
    img_dir = '../input/hpa-public-768-excl-0-16/hpa_public_excl_0_16_768/small'
    suffix = '.png'
    colors = ['red', 'green', 'blue', 'yellow']
    flags = cv2.IMREAD_GRAYSCALE
    img = [cv2.imread(opj(img_dir, img_id + '_' + color + suffix), flags)
           for color in colors]
    img = np.stack(img, axis=-1)
    return img

In [None]:
class ProteinTestDataset(Dataset):
    def __init__(self,
                 test_df,
                 img_size=512,
                 transform=None,
                 in_channels=4
                 ):
        self.test_df = test_df
        self.img_size = img_size
        self.in_channels = in_channels
        self.transform = transform

    def __getitem__(self, index):
        img_id = self.test_df.ID.loc[index]
        image = read_rgby_test(img_id)
        image = cv2.resize(image, (512,512))
        image = image / 255.0
        image = image_to_tensor(image)
        return image

    def __len__(self):
        return len(self.test_df)

In [None]:
df = pd.read_csv('../input/hpa-public-768-excl-0-16/hpa_public_excl_0_16_768.csv')

In [None]:
df.head()

In [None]:
len(df)

In [None]:
#DEBUG
# df = df.iloc[:30]

In [None]:
test_dataset = ProteinTestDataset(
    df
)
test_loader = torch.utils.data.DataLoader(
    test_dataset,
    sampler=torch.utils.data.SequentialSampler(test_dataset),
    batch_size=4,
    drop_last=False,
    num_workers=2,
    pin_memory=True,
)

In [None]:
def convert_probs(all_probs):
    new_probs = np.zeros((all_probs.shape[0],19))
    new_probs[:,:8] = all_probs[:,:8]
    new_probs[:,8] = all_probs[:,11]
    new_probs[:,9] = (all_probs[:,12] + all_probs[:,13]) / 2
    new_probs[:,10] = all_probs[:,14]
    new_probs[:,11] = all_probs[:,17]
    new_probs[:,12] = all_probs[:,19]
    new_probs[:,13] = (all_probs[:,21] + all_probs[:,22]) / 2
    new_probs[:,14] = all_probs[:,23]
    new_probs[:,15] = all_probs[:,24]
    new_probs[:,16] = all_probs[:,25]
    new_probs[:,17] = (all_probs[:,8] + all_probs[:,9] + all_probs[:,10] + all_probs[:,26]) / 4
    new_probs[:,18] = 1 - np.max(all_probs, axis=-1)
    return new_probs

In [None]:
dm = torchvision.models.resnet50(pretrained=True)
model = class_resnet50_dropout(num_classes=12815, in_channels=4, pretrained_file='/root/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth')
weights = torch.load('../input/bestfittingml/045.pth')
model.load_state_dict(weights['state_dict'])

In [None]:
model.set_configs(extract_feature=True)
model.cuda() # .cuda()
model.eval()

with torch.no_grad():
    all_feats = []
    for it, iter_data in tqdm(enumerate(test_loader, 0), total=len(test_loader)):
        images = iter_data
        images = Variable(images.cuda(), volatile=True) ### .cuda()
        outputs = model(images)
        logits, feats = outputs
        feats = feats.data.cpu().numpy()
        all_feats.append(feats)
    all_feats = np.vstack(all_feats)

In [None]:
all_feats.shape

In [None]:
import pickle
with open('feats_ext.pickle', 'wb') as handle:
    pickle.dump(all_feats, handle)

In [None]:
df.to_csv('feats_df_ext.csv', index=False)