In [None]:
!pip install --user /kaggle/input/fastaimaster/torch-1.9.0-cp37-cp37m-manylinux1_x86_64.whl

In [None]:
# kaggle datasets download -d nasheqlbrm/fastaimaster

In [None]:
import fastai
from fastai.vision.all import *

In [None]:
# import fastbook
# from fastbook import *
from pandas.api.types import is_string_dtype, is_numeric_dtype, is_categorical_dtype
from fastai.tabular.all import *
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
#from dtreeviz.trees import *
from IPython.display import Image, display_svg, SVG

pd.options.display.max_rows = 20
pd.options.display.max_columns = 12

# plt.rcParams['figure.dpi'] = 100
# plt.rcParams['savefig.dpi'] = 100
%config InlineBackend.figure_format = 'retina'

In [None]:
path = Path('/kaggle/input/petfinder-pawpularity-score')

In [None]:
Path.BASE_PATH = path
path.ls()

In [None]:
df = pd.read_csv(path/('train.csv'), low_memory=False)

In [None]:
df

In [None]:
df.columns

In [None]:
dep_var = 'Pawpularity'

In [None]:
df[dep_var]

In [None]:
split = int(len(df)*0.8)

In [None]:
splits=list(np.arange(split)),list(np.arange(split, len(df)))

In [None]:
cat = list(df.columns[1:-1])
cat

In [None]:
to = TabularPandas(df.loc[:,'Subject Focus':], cat_names=cat,
                   y_names=dep_var, splits=None)

In [None]:
def r_mse(pred,y):
    return np.sqrt(((y-pred)**2).mean())
def m_rmse(mod,xs,y):
    return r_mse(mod.predict(xs), y)

In [None]:
xs,y = to.train.xs, to.train.y
val_xs,val_y = to.valid.xs, to.valid.y

In [None]:
np.random.seed(42*8)
mod = RandomForestRegressor(n_jobs=-1, min_samples_leaf=4, max_features=0.2)

In [None]:
mod.fit(xs,y)

In [None]:
m_rmse(mod,xs,y)

### Now, let's try a neural net

In [None]:
df[dep_var] = df[dep_var].astype(np.float32)

In [None]:
to_nn = TabularPandas(df.loc[:,'Subject Focus':], [Categorify], cat_names=cat,
                   y_names=dep_var, splits=None)

In [None]:
dls = to_nn.dataloaders(512)

In [None]:
np.random.seed(31)
learn = tabular_learner(dls, y_range=(-1,101), layers=[600,400,100], n_out=1,
                        loss_func=F.mse_loss, metrics=rmse, wd=0.05)

In [None]:
np.random.seed(42)
# LR = learn.lr_find();LR

In [None]:
learn.fit_one_cycle(11, 5e-2)

In [None]:
learn.fit(9, slice(1e-6, 1e-4))

In [None]:
nn_preds,targs = learn.get_preds(ds_idx=0)


In [None]:
r_mse(nn_preds,targs)

### Visual Neural Net

In [None]:
labels = df.set_index('Id')

In [None]:
def get_label(fname):
    return labels.loc[fname[:-4]]['Pawpularity']

In [None]:
valid_Id = list(df.iloc[splits[1]].Id)

In [None]:
def is_valid(fname):
#     return fname[:-4] in valid_Id
    return False

In [None]:
dblock = DataBlock(blocks=(ImageBlock, RegressionBlock),
                   get_items = get_image_files,
                   get_y = using_attr(get_label, 'name'),
                   item_tfms=Resize(256, method='pad', pad_mode = PadMode.Zeros),
                   batch_tfms=[*aug_transforms(max_zoom=1.5, max_rotate=25., max_warp=0.4), RandomResizedCrop(128)],
                   splitter = FuncSplitter(using_attr(is_valid, 'name'))
                  )

In [None]:
dls = dblock.dataloaders(path/'train', bs=32)

In [None]:
learn3 = cnn_learner(dls, resnet34, wd=0.1, pretrained=False, metrics=rmse)

In [None]:
learn3.lr_find()

In [None]:
learn3.fit_one_cycle(9, 1e-2)

In [None]:
learn3.recorder.plot_loss()

# Ensembling

In [None]:
rf_preds = mod.predict(xs)

In [None]:
rf_preds = torch.tensor(rf_preds)

In [None]:
resnet_preds = learn3.get_preds(ds_idx=0)[0]
resnet_preds = torch.tensor(resnet_preds)

In [None]:
# resnet_preds = resnet_preds[0]

In [None]:
pcts = torch.linspace(0.0,1.,50)
rmses = []
for i in pcts:
    i = float(i)
    ens_preds = rf_preds*i + nn_preds*(1-i)
    rmses.append(r_mse(ens_preds, targs))
# rmses = [rmse()]
# accs = [accuracy_multi(preds, targs, thresh=i, sigmoid=False) for i in xs]
plt.plot(pcts,rmses);


In [None]:
mini = pcts[rmses.index(min(rmses))]

ens_preds1 = rf_preds*mini + nn_preds*(1-mini)

In [None]:
pcts = torch.linspace(0.0,1.,50)
rmses = []
for i in pcts:
    i = float(i)
    ens_preds = resnet_preds*i + ens_preds1*(1-i)
    rmses.append(r_mse(ens_preds, targs))
plt.plot(pcts,rmses);


In [None]:
mini2 = pcts[rmses.index(min(rmses))]

ens_preds2 = resnet_preds*mini2 + ens_preds1*(1-mini2)

In [None]:
subm = pd.read_csv(path/'sample_submission.csv')

In [None]:
resnet_preds = [learn3.predict(path/'test'/(Id+'.jpg'))[1] for Id in subm.Id]
resnet_preds = torch.tensor(resnet_preds)

In [None]:
df_test = pd.read_csv(path/'test.csv')

In [None]:
df_test

In [None]:
# to = TabularPandas(df_test.loc[:,'Subject Focus':], cat_names=cat,
#                    y_names=None, splits=None)

In [None]:
# learn.predict(to.xs.iloc[0])

In [None]:
my_x = df_test.iloc[:,1:].astype(np.int8); my_x

In [None]:
nn_preds = [learn.predict(my_x.iloc[i])[1] for i in range(len(my_x))]
nn_preds = torch.tensor(nn_preds)

In [None]:
rf_preds = mod.predict(my_x)
rf_preds = torch.tensor(rf_preds)#.reshape(-1,1)

In [None]:
resnet_preds

In [None]:
preds1 = rf_preds*mini + nn_preds*(1-mini)
preds2 = resnet_preds*mini2 + preds1*(1-mini2)

In [None]:
list(preds2.numpy())

In [None]:
subm.Pawpularity = preds2

In [None]:
subm.to_csv('submission.csv', index=False)