In [None]:
import sys
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master/')
from timm import create_model
from fastai.vision.all import *
set_seed(999, reproducible=True)
BATCH_SIZE = 16
N_FOLDS = 5
dataset_path = Path('../input/petfinder-pawpularity-score/')
dataset_path.ls()

In [None]:
### Fast.ai with SVR head! 

In [None]:
seed=999
set_seed(seed, reproducible=True)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.use_deterministic_algorithms = True

In [None]:
train_df = pd.read_csv(dataset_path/'train.csv')
train_df['path'] = train_df['Id'].map(lambda x:str(dataset_path/'train'/x)+'.jpg')#映射为图片地址
train_df = train_df.drop(columns=['Id'])
train_df = train_df.sample(frac=1).reset_index(drop=True) #shuffle dataframe
train_df['norm_score'] = train_df['Pawpularity']/100
train_df.head()
num_bins = int(np.floor(1+3.3*np.log2(len(train_df))))
num_bins#分为44个等级
train_df['bins'] = pd.cut(train_df['norm_score'], bins=num_bins, labels=False)#指定多个区间！pd.cut
train_df['bins'].hist()
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

train_df['fold'] = -1

strat_kfold = StratifiedKFold(n_splits=N_FOLDS, random_state=seed, shuffle=True)#分层采样 各类别样本的比例与原始数据集中相同
for i, (_, train_index) in enumerate(strat_kfold.split(train_df.index, train_df['bins'])):#index当做X  bins当做y 标签
    train_df.iloc[train_index, -1] =i #第i折 
    
train_df['fold'] = train_df['fold'].astype('int')

In [None]:
test_df = pd.read_csv(dataset_path/'test.csv')

test_df['Pawpularity'] = [1]*len(test_df)
test_df['path'] = test_df['Id'].map(lambda x:str(dataset_path/'test'/x)+'.jpg')
test_df = test_df.drop(columns=['Id'])
test_df.head()

In [None]:
def petfinder_rmse(input,target):
    return 100*torch.sqrt(F.mse_loss(torch.sigmoid(input.flatten()), target))

## 检测猫狗

In [None]:
!mkdir /root/.config/Ultralytics/
!cp ../input/arial-front/Arial.ttf -r  /root/.config/Ultralytics/
!mkdir /root/.cache/torch/
!mkdir /root/.cache/torch/hub/
!cp ../input/swin-ck/ultralytics_yolov5_master -r /root/.cache/torch/hub/


In [None]:
yolov5x6_model = torch.hub.load('ultralytics/yolov5', 'yolov5x6')
# Get Image Info
def get_image_info(file_path, plot=True):
    # Read Image
    image = imageio.imread(file_path)
    h, w, c = image.shape
    
    if plot: # Debug Plots
        fig, ax = plt.subplots(1, 2, figsize=(8,8))
        ax[0].set_title('Pets detected in Image', size=16)
        ax[0].imshow(image)
        
    # Get YOLOV5 results using Test Time Augmentation for better result
    results = yolov5x6_model(image, augment=True)
    
    # Mask for pixels containing pets, initially all set to zero
    pet_pixels = np.zeros(shape=[h, w], dtype=np.uint8)
    
    # Dictionary to Save Image Info
    h, w, _ = image.shape
    image_info = { 
        'n_pets': 0, # Number of pets in the image
        'labels': [], # Label assigned to found objects
        'thresholds': [], # confidence score
        'coords': [], # coordinates of bounding boxes
        'x_min': 0, # minimum x coordinate of pet bounding box
        'x_max': w - 1, # maximum x coordinate of pet bounding box
        'y_min': 0, # minimum y coordinate of pet bounding box
        'y_max': h - 1, # maximum x coordinate of pet bounding box
    }
    
    # Save found pets to draw bounding boxes
    pets_found = []
    
    # Save info for each pet
    for x1, y1, x2, y2, treshold, label in results.xyxy[0].cpu().detach().numpy():
        label = results.names[int(label)]
        if label in ['cat', 'dog']:
            image_info['n_pets'] += 1
            image_info['labels'].append(label)
            image_info['thresholds'].append(treshold)
            image_info['coords'].append(tuple([x1, y1, x2, y2]))
            image_info['x_min'] = max(x1, image_info['x_min'])
            image_info['x_max'] = min(x2, image_info['x_max'])
            image_info['y_min'] = max(y1, image_info['y_min'])
            image_info['y_max'] = min(y2, image_info['y_max'])
            
            # Set pixels containing pets to 1
            pet_pixels[int(y1):int(y2), int(x1):int(x2)] = 1
            
            # Add found pet
            pets_found.append([x1, x2, y1, y2, label])

    if plot:
        for x1, x2, y1, y2, label in pets_found:
            c = 'red' if label == 'dog' else 'blue'
            rect = patches.Rectangle((x1, y1), x2-x1, y2-y1, linewidth=2, edgecolor=c, facecolor='none')
            # Add the patch to the Axes
            ax[0].add_patch(rect)
            ax[0].text(max(25, (x2+x1)/2), max(25, y1-h*0.02), label, c=c, ha='center', size=14)
                
    # Add Pet Ratio in Image
    image_info['pet_ratio'] = pet_pixels.sum() / (h*w)

    if plot:
        # Show pet pixels
        ax[1].set_title('Pixels Containing Pets', size=16)
        ax[1].imshow(pet_pixels)
        plt.show()
        
    return image_info

In [None]:
from tqdm.notebook import tqdm
import imageio
# Image Info
def detection_dog_cat(train_df):
    IMAGES_INFO = {
    'n_pets': [],
    'label': [],
    'coords': [],
    'x_min': [],
    'x_max': [],
    'y_min': [],
    'y_max': [],
    'pet_ratio': [],
    }


    for idx, file_path in enumerate(tqdm(train_df['path'])):#对train_df进行检测！
        image_info = get_image_info(file_path, plot=False)
    
        IMAGES_INFO['n_pets'].append(image_info['n_pets'])
        IMAGES_INFO['coords'].append(image_info['coords'])
        IMAGES_INFO['x_min'].append(image_info['x_min'])
        IMAGES_INFO['x_max'].append(image_info['x_max'])
        IMAGES_INFO['y_min'].append(image_info['y_min'])
        IMAGES_INFO['y_max'].append(image_info['y_max'])
        IMAGES_INFO['pet_ratio'].append(image_info['pet_ratio'])
    
    # Not Every Image can be Correctly Classified
        labels = image_info['labels']
        if len(set(labels)) == 1: # unanimous label
            IMAGES_INFO['label'].append(labels[0])
        elif len(set(labels)) > 1: # Get label with highest confidence
            IMAGES_INFO['label'].append(labels[0])
        else: # unknown label, yolo could not find pet
            IMAGES_INFO['label'].append('unknown')
        
    for k, v in IMAGES_INFO.items():#写入dataframe
        train_df[k] = v
    return 0



In [None]:
detection_dog_cat(test_df)

In [None]:
test_df['label'].value_counts()

In [None]:
test_df['Cat']=0
test_df.loc[test_df['label']=='cat','Cat']=1
test_df['Dog']=0
test_df.loc[test_df['label']=='dog','Dog']=1
features_list=['Subject Focus','Eyes','Face','Near','Action', 'Accessory','Group','Collage','Human','Occlusion','Info','Blur','n_pets','Cat','Dog']


In [None]:

params = {
    'model': 'swin_large_patch4_window7_224',
    'pretrained': True,
    'inp_channels': 3,
    'out_features': 1,
    'dropout': 0,
}
class PetNet(nn.Module):
    def __init__(self, model_name=params['model'], out_features=params['out_features'], inp_channels=params['inp_channels'],
                 pretrained=params['pretrained']):
        super().__init__()
        self.model = create_model(model_name, pretrained=pretrained, in_chans=inp_channels)
        n_features = self.model.head.in_features
        self.model.head = nn.Linear(n_features, 256)
        self.fc = nn.Sequential(
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, out_features)
        )
        self.dropout = nn.Dropout(params['dropout'])
    
    def forward(self, image):
        embeddings = self.model(image)
        #x = self.dropout(embeddings)
        x=embeddings
        output = self.fc(x)
        x = torch.cat([embeddings, output], dim=1)
        return x
    
def get_learner(fold_num):
    data = get_data(fold_num)
    
    model = PetNet()
#     model = nn.DataParallel(model)
#     model = model.cuda()

    learn = Learner(data, model, loss_func=BCEWithLogitsLossFlat(), metrics=petfinder_rmse).to_fp16()
    
    return learn

In [None]:
sample_df = pd.read_csv(dataset_path/'sample_submission.csv')

In [None]:
all_preds = []
import gc
dir_pt='../input/swin-ck/224_64_lrs'
dir_head='../input/swin-ck/SVR'
for i in range(N_FOLDS):
    print(f'Fold {i} results')
    dls = ImageDataLoaders.from_df(train_df, #pass in train DataFrame
                               valid_pct=0.2, #80-20 train-validation random split
                               seed=999, #seed
                               fn_col='path', #filename/path is in the second column of the DataFrame
                               label_col='norm_score', #label is in the first column of the DataFrame
                               y_block=RegressionBlock, #The type of target
                               bs=BATCH_SIZE, #pass in batch size
                               num_workers=8,
                               item_tfms=Resize(224), #pass in item_tfms
                               batch_tfms=setup_aug_tfms([Brightness(), Contrast(), Hue(), Saturation()]))
    
    learn = load_learner('{}/model_fold_{}.pkl'.format(dir_pt,i),cpu=False)
    learn.model=learn.model.module
    learn.loss_func=None
    learn = learn.to_fp32()
#########################    
    test_dl = dls.test_dl(test_df)
    
    preds, _ = learn.tta(dl=test_dl, n=5, beta=0)
    
    embds=pd.DataFrame(preds[:,:-1])
    features=test_df[features_list].reset_index(drop=True)
    X_train=pd.concat([features, embds], axis=1) 
    clf = pickle.load(open('{}/head_{}.pkl'.format(dir_head,i), "rb"))
    f=nn.Sigmoid()
    y0=f(preds[:,-1])
    y1=clf.predict(X_train)
    y=(y0*100+y1)/2

    all_preds.append(y.view(-1))

    del learn

    torch.cuda.empty_cache()

    gc.collect()

In [None]:
sample_df = pd.read_csv(dataset_path/'sample_submission.csv')
preds = np.mean(np.stack(all_preds), axis=0)
sample_df['Pawpularity'] = preds
sample_df.to_csv('submission.csv',index=False)

In [None]:
sample_df.to_csv('submission.csv',index=False)