In [None]:
import numpy as np
import pandas as pd
import os
import cv2
import random
from tqdm import tqdm
import seaborn as sns
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.python.ops.numpy_ops import np_utils
from tensorflow.keras.utils import to_categorical

In [None]:
train_dir = '../input/petfinder-pawpularity-score/train'
test_dir = '../input/petfinder-pawpularity-score/test'

In [None]:
path0='../input/petfinder-pawpularity-score/train/0007de18844b0dbbb5e1f607da0606e0.jpg'
image=cv2.imread(path0)
print(image.shape)
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

In [None]:
image2=cv2.resize(image,dsize=(60,60),interpolation=cv2.INTER_CUBIC)
print(image2.shape)
plt.imshow(cv2.cvtColor(image2, cv2.COLOR_BGR2RGB))

In [None]:
train=pd.read_csv('../input/petfinder-pawpularity-score/train.csv')
train

In [None]:
train['Pawpularity'].unique()

In [None]:
N0=list(range(100)) 
N1=list(range(1,101)) 
normal_mapping=dict(zip(N1,N0)) 
reverse_mapping=dict(zip(N0,N1)) 

In [None]:
train[train['Id']=='0007de18844b0dbbb5e1f607da0606e0']['Pawpularity'].tolist()[0]

In [None]:
trainimg0=[]
trainlabel0=[]
for im in tqdm(os.listdir(train_dir)):
    image=cv2.imread(os.path.join(train_dir,im))
    image2=cv2.resize(image,dsize=(60,60),interpolation=cv2.INTER_CUBIC)
    trainimg0+=[image2]
    trainlabel0+=[train[train['Id']==im[0:-4]]['Pawpularity'].tolist()[0]]

In [None]:
trainlabel1=pd.Series(trainlabel0).map(normal_mapping)

In [None]:
trainimg=np.array(trainimg0)
trainlabel=np.array(trainlabel1)

In [None]:
m=len(trainimg)
M=list(range(m))
random.seed(2021)
random.shuffle(M)

# Train/Validation Splitting using Train data

In [None]:
trainX=trainimg[M[0:(m//4)*3]]
trainY0=trainlabel[M[0:(m//4)*3]]

testX=trainimg[M[(m//4)*3:]]
testY0=trainlabel[M[(m//4)*3:]]

In [None]:
labels1=to_categorical(trainY0)
trainY=np.array(labels1)

In [None]:
trainx,testx,trainy,testy=train_test_split(trainX,trainY,test_size=0.2,random_state=44)

In [None]:
print(trainx.shape)
print(testx.shape)
print(trainy.shape)
print(testy.shape)

# Model

In [None]:
model = Sequential()
model.add(Conv2D(32,(4,4),input_shape = (60,60,3),activation = 'relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(64,(3,3),activation = 'relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(400, activation='relu'))
model.add(Dense(100, activation='softmax'))

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
his = model.fit(trainx, trainy, validation_split=0.2, epochs=30, batch_size=92, verbose=2)

In [None]:
y_pred=model.predict(testx)
pred=np.argmax(y_pred,axis=1)
ground = np.argmax(testy,axis=1)
print(classification_report(ground,pred))

In [None]:
get_acc = his.history['accuracy']
value_acc = his.history['val_accuracy']
get_loss = his.history['loss']
validation_loss = his.history['val_loss']

epochs = range(len(get_acc))
plt.plot(epochs, get_acc, 'r', label='Accuracy of Training data')
plt.plot(epochs, value_acc, 'b', label='Accuracy of Validation data')
plt.title('Training vs validation accuracy')
plt.legend(loc=0)
plt.figure()
plt.show()

In [None]:
epochs = range(len(get_loss))
plt.plot(epochs, get_loss, 'r', label='Loss of Training data')
plt.plot(epochs, validation_loss, 'b', label='Loss of Validation data')
plt.title('Training vs validation loss')
plt.legend(loc=0)
plt.figure()
plt.show()

In [None]:
pred2=model.predict(testX)
print(pred2.shape)

PRED=[]
for item in pred2:
    value2=np.argmax(item)      
    PRED+=[value2]
print(pd.Series(PRED).value_counts())

In [None]:
ANS=testY0
print(pd.Series(ANS).value_counts())
accuracy=accuracy_score(ANS,PRED)
print(accuracy)

In [None]:
import seaborn as sns
fig, ax = plt.subplots(figsize=(14,5))
sns.histplot(ANS,label='ANS',ax=ax,color='black',bins=100)
sns.histplot(PRED,label='PRED',ax=ax,color='C1',bins=100)
ax.legend()
ax.grid()
plt.show()

In [None]:
fig, axs = plt.subplots(3,3,figsize=(12,12))
for i in range(9):
    r=i//3
    c=i%3
    img1 = testX[i]
    ax=axs[r][c].axis("off")
    actual=reverse_mapping[testY0[i]]
    predict=reverse_mapping[PRED[i]]    
    ax=axs[r][c].set_title(str(actual)+'=='+str(predict))
    ax=axs[r][c].imshow(img1)
plt.show()

# Test Data Predict 

In [None]:
TESTX=[]
testim=[]
for im in tqdm(os.listdir(test_dir)):
    image=cv2.imread(os.path.join(test_dir,im))
    image2=cv2.resize(image,dsize=(60,60),interpolation=cv2.INTER_CUBIC)
    TESTX+=[image2]
    testim+=[im[0:-4]]

In [None]:
TESTX=np.array(TESTX)
print(TESTX.shape)

In [None]:
test_pred2=model.predict(TESTX)

TESTPRED=[]
for item in test_pred2:
    value=np.argmax(item)      
    value2=reverse_mapping[value]
    TESTPRED+=[float(value2)]
print(pd.Series(TESTPRED).value_counts())

In [None]:
sample=pd.read_csv('../input/petfinder-pawpularity-score/sample_submission.csv')
sample

In [None]:
result=pd.DataFrame(testim)
result[1]=TESTPRED
result.columns=['Id','Pawpularity']
result2=result.sort_values('Id')
result2

In [None]:
result2.to_csv('submission.csv',index=False)