In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import matplotlib.pylab as plt
import os
import PIL

In [None]:
print('TensorFlow version: {}'.format(tf.__version__))
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
    print('GPU device not found - On for CPU time!')
else:
    print('Found GPU at {}'.format(device_name))

In [None]:
# Let's define some helpers

IMG_HEIGHT = 150 # Let's try to arbitrarily set 150px x 150px images
IMG_WIDTH = 150
IMG_CHANNELS = 3

def read_and_decode(filename, reshape_dims):
    # Read an image file to a tensor as a sequence of bytes
    img = tf.io.read_file(filename)
    # Convert the tensor to a 3D uint8 tensor
    img = tf.image.decode_jpeg(img, channels=IMG_CHANNELS)
    # Convert 3D uint8 tensor 
    img = tf.image.convert_image_dtype(img, tf.float32)
    # Resize the image to the desired size
    return tf.image.resize(img, reshape_dims)

def show_image(filename):
    img = read_and_decode(filename, [IMG_HEIGHT, IMG_WIDTH])
    plt.imshow(img.numpy());
    plt.axis('off');

def decode_csv(csv_row):
    record_defaults = ['Id', 'Pawpularity']
    filename, pawpularity = tf.io.decode_csv(csv_row, record_defaults)
    pawpularity = tf.convert_to_tensor(np.float(pawpularity), dtype=tf.float32)
    img = read_and_decode(filename, [IMG_HEIGHT, IMG_WIDTH])
    return img, pawpularity

In [None]:
data_path = '../input/petfinder-pawpularity-score/'
data = pd.read_csv(data_path+'train.csv')

# Use stratified sampling
sssplit = StratifiedShuffleSplit(n_splits=1, test_size=0.2)
for train_index, test_index in sssplit.split(data, data['Pawpularity']):
    training_set = data.iloc[train_index]
    eval_set = data.iloc[test_index]
    
# Visually check distribution of pawpularity score in training and test sets
training_set['Pawpularity'].hist(label='Training set')
eval_set['Pawpularity'].hist(label='Eval set')
plt.title('Pawpularity score distribution in training and test set')
plt.xlabel('Pawpularity score')
plt.ylabel('Count')
plt.legend(loc='upper right')
plt.show()


In [None]:
# Export training and test sets as .csv files
training_set['Id'] = training_set['Id'].apply(lambda x: '../input/petfinder-pawpularity-score/train/'+x+'.jpg')
training_set[['Id', 'Pawpularity']].to_csv('/kaggle/working/training_set.csv', header=False, index=False)
eval_set['Id'] = eval_set['Id'].apply(lambda x: '../input/petfinder-pawpularity-score/train/'+x+'.jpg')
eval_set[['Id', 'Pawpularity']].to_csv('/kaggle/working/eval_set.csv', header=False, index=False)

In [None]:
BATCH_SIZE = 32
IMG_HEIGHT = 164
IMG_WIDTH = 164
IMG_CHANNELS = 3

train_dataset = tf.data.TextLineDataset('/kaggle/working/training_set.csv').map(decode_csv).batch(BATCH_SIZE)

eval_dataset = tf.data.TextLineDataset('/kaggle/working/eval_set.csv').map(decode_csv).batch(BATCH_SIZE)

# Our neural network is built as a Sequential model with a single hidden layer
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS)),
    tf.keras.layers.Dense(units=300, activation='relu'), 
    tf.keras.layers.Dense(units=30, activation='relu'),
    tf.keras.layers.Dense(units=1, activation=None)
])
# Let's compile it with Adam, a well-suited optimiser for CV problems
model.compile(optimizer='adam', loss=tf.keras.losses.MeanSquaredError(), metrics=[tf.keras.metrics.RootMeanSquaredError()])

# And let's now train our model with the training and evaluation data
history = model.fit(train_dataset, validation_data=eval_dataset, epochs=20)

In [None]:
# Let's plot our neural network to see how data is passed through
tf.keras.utils.plot_model(model, show_shapes=True, show_layer_names=False)

In [None]:
import pandas as pd
submission = pd.read_csv('../input/petfinder-pawpularity-score/sample_submission.csv')
submission['Id'] = submission['Id'].apply(lambda x: '../input/petfinder-pawpularity-score/test/'+x+'.jpg')
submission.to_csv('/kaggle/working/submission.csv', index=False, header=False)
submission = tf.data.TextLineDataset(
    './submission.csv'
).map(decode_csv).batch(BATCH_SIZE)

# Make predictions with our model
sample_prediction = model.predict(submission)

In [None]:
# Format predictions to output for submission
submission_output = pd.concat(
    [pd.read_csv('../input/petfinder-pawpularity-score/sample_submission.csv').drop('Pawpularity', axis=1),
    pd.DataFrame(sample_prediction)],
    axis=1
)
print(submission_output)
submission_output.columns = [['Id', 'Pawpularity']]

# Output submission file to csv
submission_output.to_csv('submission.csv', index=False)