In [None]:
import sys
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')
from timm import create_model
from IPython.display import display
import gc
import os

from fastai.vision.all import *

In [None]:
if not os.path.exists('/root/.cache/torch/hub/checkpoints/'):
    os.makedirs('/root/.cache/torch/hub/checkpoints/')
!cp '../input/swin-large-models/swin_large_patch4_window7_224_22kto1k.pth' '/root/.cache/torch/hub/checkpoints/swin_large_patch4_window7_224_22kto1k.pth'

In [None]:
seed = 402
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.use_deterministic_algorithms = True

# 1 Data

In [None]:
class Data:
    def __init__(self, data_path='../input/petfinder-pawpularity-score/', n_split=5):
        self.batch_size = batch_size
        self.img_size = img_size
        
        self.dataset_path = Path('../input/petfinder-pawpularity-score/')
        train_val_df = self.preprocess_df(pd.read_csv(self.dataset_path/'train.csv'), is_train=True)
        self.test_df = self.preprocess_df(pd.read_csv(self.dataset_path/'test.csv'), is_train=False)
        self.train_val_df = self.get_split_df(train_val_df, n_split=n_split)
        
    def preprocess_df(self, df, is_train):
        if is_train:
            img_dirname = "train"
        else:
            img_dirname = "test"
            df['Pawpularity'] = [1] * len(df)
        df['path'] = df['Id'].map(lambda x:str(self.dataset_path/img_dirname/x)+'.jpg')
        df = df.drop(columns=['Id'])
        df['norm_score'] = df['Pawpularity']/100
        if is_train:
            df = df.sample(frac=1.).reset_index(drop=True) #shuffle dataframe
            return df
        else:
            return df
    
    def get_split_df(self, df, n_split=5):
        from sklearn.model_selection import StratifiedKFold
        
        num_bins = int(np.floor(1+(3.3)*(np.log2(len(df)))))
        print(num_bins)

        df['bins'] = pd.cut(df['norm_score'], bins=num_bins, labels=False)
        print()
        display(df['bins'].hist())
        plt.show()

        df['fold'] = -1

        strat_kfold = StratifiedKFold(n_splits=n_split, random_state=seed, shuffle=True)
        for i, (_, train_index) in enumerate(strat_kfold.split(df.index, df['bins'])):
            df.iloc[train_index, -1] = i

        df['fold'] = df['fold'].astype('int')
        df.fold.value_counts().plot.bar()
        plt.show()
        return df
    
    def get_dls(self, img_size, batch_size, fold):
        train_df_f = self.train_val_df.copy()
        train_df_f['is_valid'] = (train_df_f['fold'] == fold)

        dls = ImageDataLoaders.from_df(train_df_f, #pass in train DataFrame
                                       valid_col='is_valid', #
                                       seed=seed, #seed
                                       fn_col='path', # filename/path is in the second column of the DataFrame
                                       label_col='norm_score', # label is in the first column of the DataFrame
                                       y_block=RegressionBlock, # The type of target
                                       bs=batch_size, # pass in batch size
                                       num_workers=2,
                                       item_tfms=Resize(img_size), #pass in item_tfms
                                       batch_tfms=setup_aug_tfms([Brightness(), Contrast(), Hue(), Saturation()])) #pass in batch_tfms
        return dls

# 2 Trainer

In [None]:
def petfinder_rmse(input,target):
    return 100*torch.sqrt(F.mse_loss(torch.sigmoid(input.flatten()), target))

def get_learner(data, batch_size, img_size, fold, model_name):
    dls = data.get_dls(batch_size=batch_size, img_size=img_size, fold=fold)
    print("类别为%d"%dls.c)
    model = create_model(model_name, pretrained=True, num_classes=dls.c)
    learn = Learner(dls, model, loss_func=BCEWithLogitsLossFlat(), metrics=petfinder_rmse).to_fp16()
    return learn

In [None]:
n_split=5
img_size = 224
with_1k = True
if img_size == 224:
    if with_1k:
        model_name = 'swin_large_patch4_window7_224'
    else:
        model_name = 'swin_large_patch4_window7_224_in22k'
    batch_size = 16
    num_epochs = 6
else:
    if with_1k:
        model_name = 'swin_large_patch4_window12_384'
    else:
        model_name = 'swin_large_patch4_window12_384_in22k'
    batch_size = 8
    num_epochs = 4
    
data = Data(n_split=n_split)

all_preds = []
for fold in range(n_split):
    print(f'Fold {fold} results')
    learn = get_learner(data, batch_size, img_size, fold, model_name)
    learn.fit_one_cycle(num_epochs, 2e-5, cbs=[SaveModelCallback(), EarlyStoppingCallback(monitor='petfinder_rmse', comp=np.less, patience=2)]) 
    learn.recorder.plot_loss()

    learn = learn.to_fp32()
    learn.export(f"model_fold_{fold}.pkl")
    
    # TTA
    dls = data.get_dls(batch_size=batch_size, img_size=img_size, fold=fold)
    test_dl = dls.test_dl(data.test_df)
    preds, _ = learn.tta(dl=test_dl, n=5, beta=0)
    all_preds.append(preds)
    
    del learn, dls, test_dl
    torch.cuda.empty_cache()
    gc.collect()

In [None]:
sample_df = pd.read_csv('../input/petfinder-pawpularity-score/sample_submission.csv')
preds = np.median(np.stack(all_preds), axis=0)
sample_df['Pawpularity'] = preds*100
sample_df.to_csv('submission.csv',index=False)
display(pd.read_csv('submission.csv').head())

In [None]:
# def test(model_path, batch_size, img_size):
    # dataset_path = Path("../input/petfinder-pawpularity-score/")
    # path = './models'
    # learn1 = load_learner('model_fold_2.pkl')
    
    # train_df = pd.read_csv(dataset_path/'train.csv')
    # test_df = pd.read_csv(dataset_path/'test.csv')
    # test_df.head()

    # test_df['Pawpularity'] = [1]*len(test_df)
    # test_df['path'] = test_df['Id'].map(lambda x:str(dataset_path/'test'/x)+'.jpg')
    # test_df = test_df.drop(columns=['Id'])
    # train_df['norm_score'] = train_df['Pawpularity']/100

    # dls = ImageDataLoaders.from_df(train_df, #pass in train DataFrame
    #                                valid_pct=0.2, #80-20 train-validation random split
    #                                seed=999, #seed
    #                                fn_col='path', #filename/path is in the second column of the DataFrame
    #                                label_col='norm_score', #label is in the first column of the DataFrame
    #                                y_block=RegressionBlock, #The type of target
    #                                bs=8,                                       # was32, #pass in batch size
    #                                num_workers=8,
    #                                item_tfms=Resize(224), #pass in item_tfms
    #                                batch_tfms=setup_aug_tfms([Brightness(), Contrast(), Hue(), Saturation()])) 
    # test_dl = dls.test_dl(test_df)

    # test_dl.show_batch()

    # preds, _ = learn1.tta(dl=test_dl, n=5, beta=0)

    # sample_df = pd.read_csv(dataset_path/'sample_submission.csv')
    # sample_df['Pawpularity'] = preds.float().numpy()*100
    # sample_df.to_csv('submission.csv',index=False)

    # pd.read_csv('submission.csv').head()