# Artificial Neural Network

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
import joblib

ModuleNotFoundError: No module named 'tensorflow'

In [None]:
data = pd.read_csv("train.csv")

In [None]:
data.head()

In [None]:
data.info()

## Impute Missing Data

In [316]:
data["condition"].value_counts()

1.0    6819
0.0    6281
2.0    4257
Name: condition, dtype: int64

In [317]:
data["condition"] = data["condition"].fillna(3.0)

In [318]:
data.head()

Unnamed: 0,pet_id,issue_date,listing_date,condition,color_type,length(m),height(cm),X1,X2,breed_category,pet_category
0,ANSL_69903,2016-07-10 00:00:00,2016-09-21 16:25:00,2.0,Brown Tabby,0.8,7.78,13,9,0.0,1
1,ANSL_66892,2013-11-21 00:00:00,2018-12-27 17:47:00,1.0,White,0.72,14.19,13,9,0.0,2
2,ANSL_69750,2014-09-28 00:00:00,2016-10-19 08:24:00,3.0,Brown,0.15,40.9,15,4,2.0,4
3,ANSL_71623,2016-12-31 00:00:00,2019-01-25 18:30:00,1.0,White,0.62,17.82,0,1,0.0,2
4,ANSL_57969,2017-09-28 00:00:00,2017-11-19 09:38:00,2.0,Black,0.5,11.06,18,4,0.0,1


In [319]:
data["condition"].value_counts()

1.0    6819
0.0    6281
2.0    4257
3.0    1477
Name: condition, dtype: int64

## Encode Data

### Encode Independent variable

In [320]:
#le = LabelEncoder()
#data["color_type"] = le.fit_transform(data["color_type"])

In [321]:
data.head()

Unnamed: 0,pet_id,issue_date,listing_date,condition,color_type,length(m),height(cm),X1,X2,breed_category,pet_category
0,ANSL_69903,2016-07-10 00:00:00,2016-09-21 16:25:00,2.0,Brown Tabby,0.8,7.78,13,9,0.0,1
1,ANSL_66892,2013-11-21 00:00:00,2018-12-27 17:47:00,1.0,White,0.72,14.19,13,9,0.0,2
2,ANSL_69750,2014-09-28 00:00:00,2016-10-19 08:24:00,3.0,Brown,0.15,40.9,15,4,2.0,4
3,ANSL_71623,2016-12-31 00:00:00,2019-01-25 18:30:00,1.0,White,0.62,17.82,0,1,0.0,2
4,ANSL_57969,2017-09-28 00:00:00,2017-11-19 09:38:00,2.0,Black,0.5,11.06,18,4,0.0,1


In [322]:
color_type_enc = OneHotEncoder()
color_type_1h = color_type_enc.fit_transform(data[["color_type"]])

In [323]:
X = data[["condition", "length(m)", "height(cm)", "X1", "X2"]].values

In [324]:
X = np.concatenate((X,color_type_1h.toarray()), axis=1)

In [339]:
print(X)
print(X.shape)

[[ 2.    0.8   7.78 ...  0.    0.    0.  ]
 [ 1.    0.72 14.19 ...  1.    0.    0.  ]
 [ 3.    0.15 40.9  ...  0.    0.    0.  ]
 ...
 [ 0.    0.99 28.13 ...  0.    0.    0.  ]
 [ 0.    0.55 44.82 ...  0.    0.    0.  ]
 [ 0.    0.86 37.4  ...  0.    0.    0.  ]]
(18834, 61)


### Encode Dependent variable

In [340]:
enc_y1 = OneHotEncoder()
y = enc_y1.fit_transform(data[["breed_category"]]).toarray()

In [341]:
print(y)

[[1. 0. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 ...
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]]


## Split Train and Test Set

In [345]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=0)

## Scale Data

In [346]:
sc_X = StandardScaler()
#sc_y = StandardScaler()
X_train[:,:5] = sc_X.fit_transform(X_train[:,:5])
#y_train = sc_y.fit_transform(y_train)

In [347]:
print(X_train)

[[-0.05587848  0.78685575 -0.80279881 ...  0.          0.
   0.        ]
 [-1.1297573   1.13345267  0.50396968 ...  0.          0.
   0.        ]
 [ 2.09187917 -0.98078854  0.88523861 ...  0.          0.
   0.        ]
 ...
 [ 1.01800035  1.51470928  0.31102916 ...  0.          0.
   0.        ]
 [-1.1297573  -1.46602423  0.60927985 ...  0.          0.
   0.        ]
 [ 2.09187917 -0.35691408 -0.58679767 ...  0.          0.
   0.        ]]


In [348]:
print(y_train)

[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 ...
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


## Build the ANN for Breed Category

In [349]:
ann = tf.keras.models.Sequential()

In [350]:
ann.add(tf.keras.layers.Dense(units=64, activation='relu'))

In [351]:
ann.add(tf.keras.layers.Dense(units=64, activation='relu'))

In [352]:
ann.add(tf.keras.layers.Dense(units=64, activation='relu'))

In [353]:
ann.add(tf.keras.layers.Dense(units=3,activation='softmax'))

In [354]:
ann.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [355]:
ann.fit(X_train, y_train, batch_size=32, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7f15608d5d68>

In [357]:
X_test[:,:5] = sc_X.transform(X_test[:,:5])

In [362]:
y_pred = ann.predict(X_test)
print(y_pred)

[[1.00000000e+00 1.91509881e-22 0.00000000e+00]
 [0.00000000e+00 1.00000000e+00 0.00000000e+00]
 [9.90512550e-01 9.48741939e-03 1.61775127e-08]
 ...
 [0.00000000e+00 1.00000000e+00 0.00000000e+00]
 [7.28043830e-08 5.60059879e-15 9.99999881e-01]
 [1.07526015e-16 1.29327399e-27 1.00000000e+00]]


In [359]:
#y_pred = sc_y.inverse_transform(y_pred)
#print(y_pred)

[[0.9789672  0.4434194  0.07718856]
 [0.47939205 0.9402077  0.07718856]
 [0.9742275  0.44813263 0.07718856]
 ...
 [0.47939205 0.9402077  0.07718856]
 [0.47939208 0.4434194  0.34407893]
 [0.47939205 0.4434194  0.34407896]]


In [363]:
np.concatenate((enc_y1.inverse_transform(y_pred), enc_y1.inverse_transform(y_test)), axis =1)

array([[0., 0.],
       [1., 1.],
       [0., 1.],
       ...,
       [1., 1.],
       [2., 2.],
       [2., 2.]])

In [364]:
confusion_matrix(enc_y1.inverse_transform(y_test), enc_y1.inverse_transform(y_pred))

array([[1650,  127,    0],
       [ 245, 1431,    0],
       [   0,    0,  314]])

In [417]:
f1_breed = f1_score(enc_y1.inverse_transform(y_test), enc_y1.inverse_transform(y_pred), average='weighted')
print(f1_breed)

0.9010327799982764


In [374]:
ann.save("breed_category_ann.h5")

## Build the ANN for Pet Category

In [375]:
enc_y2 = OneHotEncoder()
y2 = enc_y2.fit_transform(data[["pet_category"]]).toarray()

In [376]:
print(y2)

[[0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]
 ...
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 1. 0.]]


In [390]:
X2_train, X2_test, y2_train, y2_test = train_test_split(X, y2, test_size = 0.2, random_state=0)

In [392]:
X2_train[:,:5] = sc_X.transform(X2_train[:,:5])
print(X2_train)

[[-0.05587848  0.78685575 -0.80279881 ...  0.          0.
   0.        ]
 [-1.1297573   1.13345267  0.50396968 ...  0.          0.
   0.        ]
 [ 2.09187917 -0.98078854  0.88523861 ...  0.          0.
   0.        ]
 ...
 [ 1.01800035  1.51470928  0.31102916 ...  0.          0.
   0.        ]
 [-1.1297573  -1.46602423  0.60927985 ...  0.          0.
   0.        ]
 [ 2.09187917 -0.35691408 -0.58679767 ...  0.          0.
   0.        ]]


In [393]:
print(y2_train)

[[0. 0. 1. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]
 ...
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 1. 0. 0.]]


In [401]:
pet_ann = tf.keras.models.Sequential()

In [402]:
pet_ann.add(tf.keras.layers.Dense(units=64, activation='relu'))

In [403]:
pet_ann.add(tf.keras.layers.Dense(units=64, activation='relu'))

In [404]:
pet_ann.add(tf.keras.layers.Dense(units=64, activation='relu'))

In [405]:
pet_ann.add(tf.keras.layers.Dense(units=4,activation='softmax'))

In [406]:
pet_ann.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [407]:
pet_ann.fit(X2_train, y2_train, batch_size=32, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7f1561b6b1d0>

In [408]:
X2_test[:,:5] = sc_X.transform(X2_test[:,:5])

In [411]:
y2_pred = pet_ann.predict(X2_test)
print(y2_pred)

[[1.2823130e-19 1.5472609e-05 9.9998343e-01 1.1003017e-06]
 [1.1985372e-17 9.6176769e-16 1.0000000e+00 4.0730246e-09]
 [0.0000000e+00 1.0000000e+00 1.1623100e-31 1.2732335e-27]
 ...
 [3.2498997e-17 5.5150402e-01 4.4849595e-01 2.7092371e-09]
 [0.0000000e+00 1.0000000e+00 6.6274489e-23 1.0314866e-17]
 [4.6566786e-04 8.5665590e-09 9.5328887e-06 9.9952483e-01]]


In [412]:
np.concatenate((enc_y2.inverse_transform(y2_pred), enc_y2.inverse_transform(y2_test)), axis =1)

array([[2, 2],
       [2, 2],
       [1, 1],
       ...,
       [1, 2],
       [1, 1],
       [4, 4]])

In [413]:
confusion_matrix(enc_y2.inverse_transform(y2_test), enc_y2.inverse_transform(y2_pred))

array([[   1,    2,    5,    5],
       [   0, 1097,  349,   13],
       [   2,   89, 2011,   12],
       [   1,   11,   19,  150]])

In [419]:
f1_pet = f1_score(enc_y2.inverse_transform(y2_test), enc_y2.inverse_transform(y2_pred), average='weighted')
print(f1_pet)

0.861836346964682


In [422]:
pet_ann.save("pet_category_ann.h5")

In [421]:
net_f1 = (f1_breed+f1_pet)*100/2
print(net_f1)

88.14345634814792
