In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Importing Fastai and checking the version 
!pip install ../input/fastai-whl/fastai-2.5.3-py3-none-any.whl -f ./ --no-index --no-deps
!pip install ../input/fastdownload/fastdownload-0.0.5-py3-none-any.whl -f ./ --no-index --no-deps

import fastai 
print(fastai.__version__)

In [None]:
# Importing the packages we need 
from fastai.vision.all import * 
from fastai import * 
import pandas as pd 
import numpy as np
import fastai 

# Checking the version 
print(fastai.__version__)

In [None]:
# Specifying the train and test path for the images 
train_imgs_path = '../input/petfinder-pawpularity-score/train/'
test_imgs_path = '../input/petfinder-pawpularity-score/test/'

train_df = pd.read_csv('../input/petfinder-pawpularity-score/train.csv')
test_df = pd.read_csv('../input/petfinder-pawpularity-score/test.csv')


In [None]:
# Lets normalize the targets 
from sklearn.preprocessing import MinMaxScaler 
scaler = MinMaxScaler() 

# Fit the scaler to our targets 
scaled_targets = scaler.fit_transform(np.expand_dims(train_df.Pawpularity , axis = 1))
scaled_targets

In [None]:

# Applying PCA to condense the rest of the variables into one whole column 

X = train_df.drop(['Id' , 'Pawpularity'] , axis = 1)

from sklearn.decomposition import PCA 
pca = PCA(n_components = 1)

X_new = pca.fit_transform(X)



# Appending the new cols 
train_df['Pawpularity_scaled'] = scaled_targets 
train_df['reg_block'] = X_new


# Append the full path 
def append_ext(fn , train = True):
  if train:
    return f'{train_imgs_path}{fn}.jpg'
  else:
    return f'{test_imgs_path}{fn}.jpg'

# Applying the above function on our dataframe 
train_df['full_path'] = train_df['Id'].apply(lambda x: append_ext(x))
test_df['full_path'] = test_df['Id'].apply(lambda x: append_ext(x , train = False))

train_df.head(10)

In [None]:
# Applying the transforms (basic transformes from the imagenet)

item_tfms=RandomResizedCrop(460)
batch_tfms=[*aug_transforms(size=224, max_warp=0), Normalize.from_stats(*imagenet_stats)]
train_data = Path(train_imgs_path)


# Creating the datablock api 
paw_block = DataBlock(blocks = (ImageBlock , RegressionBlock()) , 
                      get_x = ColReader('full_path') , 
                      get_y = ColReader('Pawpularity') , 
                      splitter = RandomSplitter() , 
                      item_tfms = item_tfms , 
                      batch_tfms = batch_tfms)

# Getting the summary 
paw_block.summary(train_df)

In [None]:
# Creating the DataLoaders and specifying the batch size of 32
paw_dls = paw_block.dataloaders(train_df , batch_size= 32)

In [None]:
# Loading the inference model (that has been trained before on google colab)
learn_inf = load_learner('../input/new-model/new_model.pkl')
learn_inf

In [None]:
# Getting the teest filenames 
test_fns = get_image_files(test_imgs_path)

# Creating a dataloader for our test data
test_dls = paw_dls.test_dl(test_fns)

test_dls.show_batch()

In [None]:
# Creating a empty dataframe for the test  data 
import os 
test_data = pd.DataFrame()

# Get the predictions 
test_dl = paw_dls.test_dl(get_image_files(test_imgs_path))
test_pred = learn_inf.get_preds(dl = test_dl)

test_data['Id'] = [img[:-4]for img in os.listdir(test_imgs_path)]
test_data

In [None]:
# Now adding the pawpularity score to our dataframe 
test_data['Pawpularity'] = scaler.inverse_transform(test_pred[0].detach().numpy())
test_data

In [None]:
# Making the submission
test_data.to_csv('submission.csv' , index=False)