In [6]:
import os
import gc
import cv2
import math
import copy
import time
import random
import glob
from PIL import Image
from matplotlib import pyplot as plt

# For data manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp
import torchvision

# Utils
import joblib
from tqdm import tqdm
from collections import defaultdict

# Sklearn Imports
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold

# For Image Models
import timm

# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

# For colored terminal text
from colorama import Fore, Back, Style
b_ = Fore.BLUE
sr_ = Style.RESET_ALL

import warnings
warnings.filterwarnings("ignore")

# For descriptive error messages
#os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [10]:
CONFIG = {
    "seed": 0,
    "img_size": 2048,
    "model_name": "tf_efficientnet_b0_ns",
    "num_classes": 5,
    "valid_batch_size": 4,
    "device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
}
def set_seed(seed=0):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    
set_seed(CONFIG['seed'])

In [13]:
ROOT_DIR = '~/kaggle/input/UBC-OCEAN'
TEST_DIR = '~/kaggle/input/UBC-OCEAN/test_thumbnails'

LABEL_ENCODER_BIN = "/kaggle/input/ubc-efficienetnetb0-fold1of10-2048pix-thumbnails/label_encoder.pkl"
BEST_WEIGHT = "/kaggle/input/ubc-efficienetnetb0-fold1of10-2048pix-thumbnails/Recall0.9178_Acc0.9437_Loss0.1685_epoch9.bin"

In [14]:
def get_test_file_path(image_id):
    return f"{TEST_DIR}/{image_id}_thumbnail.png"

In [15]:
df = pd.read_csv(f"{ROOT_DIR}/test.csv")
df['file_path'] = df['image_id'].apply(get_test_file_path)
df['label'] = 0 # dummy
df

Unnamed: 0,image_id,image_width,image_height,file_path,label
0,41,28469,16987,~/kaggle/input/UBC-OCEAN/test_thumbnails/41_th...,0


In [16]:
df_sub = pd.read_csv(f"{ROOT_DIR}/sample_submission.csv")
df_sub

Unnamed: 0,image_id,label
0,41,HGSC


In [17]:
encoder = joblib.load( LABEL_ENCODER_BIN )

FileNotFoundError: [Errno 2] No such file or directory: '/kaggle/input/ubc-efficienetnetb0-fold1of10-2048pix-thumbnails/label_encoder.pkl'