# Predicting the health of horses using an Artificial Neural Network

## Importing libraries

In [29]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [30]:
tf.__version__

'2.13.0'

## Data preprocessing

### Importing the datasets

In [31]:
df = pd.read_csv('train_modified.csv')
df.replace('None', np.nan, inplace=True)
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

In [32]:
print(X)

[['yes' 'adult' 'cool' ... 38.1 132 24]
 ['yes' 'adult' 'cool' ... 37.5 88 12]
 ['yes' 'adult' 'cool' ... 38.3 120 28]
 ...
 ['yes' 'young' 'normal' ... 37.5 84 40]
 ['yes' 'adult' 'normal' ... 38.1 70 16]
 ['yes' 'adult' 'normal' ... 38.1 54 36]]


### Taking care of missing data

In [33]:
from sklearn.impute import SimpleImputer
imputerString = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
imputerString.fit(X[:, :15])
X[:, :15] = imputerString.transform(X[:, :15])
imputerNumeric = SimpleImputer(missing_values=np.nan, strategy='mean')
imputerNumeric.fit(X[:, -8:])
X[:, -8:] = imputerNumeric.transform(X[:, -8:])

### Encoding categorical data

#### Encoding the independent variables

In [34]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(drop='first', sparse_output=False, handle_unknown='ignore'), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

#### Encoding the dependent variable

In [35]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)
y = tf.keras.utils.to_categorical(y)

### Splitting the dataset into the Training set and Test set

In [36]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [37]:
print(X)

[[1.0 0.0 1.0 ... 38.1 132.0 24.0]
 [1.0 0.0 1.0 ... 37.5 88.0 12.0]
 [1.0 0.0 1.0 ... 38.3 120.0 28.0]
 ...
 [1.0 1.0 0.0 ... 37.5 84.0 40.0]
 [1.0 0.0 0.0 ... 38.1 70.0 16.0]
 [1.0 0.0 0.0 ... 38.1 54.0 36.0]]


In [38]:
np.shape(X)

(1235, 50)

### Feature Scaling

In [39]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train[:, -8:] = sc.fit_transform(X_train[:, -8:])
X_test[:, -8:] = sc.transform(X_test[:, -8:])

In [40]:
X_train = np.asarray(X_train).astype(np.float32)
X_test = np.asarray(X_test).astype(np.float32)

## Building the ANN

### Initializing the ANN

In [41]:
ann = tf.keras.models.Sequential()

### Adding the input layer and the first hidden layer

In [42]:
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))

### Adding the second hidden layer

In [43]:
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))

### Adding the output layer

In [44]:
ann.add(tf.keras.layers.Dense(units=3, activation='softmax'))

## Training the ANN

### Compiling the ANN

In [45]:
ann.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

### Training the ANN on the Training set

In [46]:
ann.fit(X_train, y_train, batch_size=64, epochs=100, verbose=0)

<keras.src.callbacks.History at 0x7e8a4aadee60>

## Predicting the Test set results

In [47]:
y_pred = ann.predict(X_test)



## Making the Confusion Matrix

In [48]:
y_pred = tf.keras.utils.to_categorical(np.argmax(y_pred, 1), dtype = "int64")

In [49]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test.argmax(axis=1), y_pred.argmax(axis=1))
print(cm)
accuracy_score(y_test.argmax(axis=1), y_pred.argmax(axis=1))

[[69  3 17]
 [ 5 24 13]
 [32  9 75]]


0.680161943319838

## Predicting values for the test dataset

In [50]:
df_res = pd.read_csv('test_modified.csv')
df_res.replace('None', np.nan, inplace=True)
X_res = df_res.values

In [51]:
X_res

array([['no', 'adult', 'normal', ..., 38.6, 40, 20],
       ['yes', 'adult', 'cool', ..., 38.2, 112, 48],
       ['yes', 'adult', 'cool', ..., 37.7, 66, 12],
       ...,
       ['yes', 'adult', 'cool', ..., 39.2, 132, 12],
       ['no', 'adult', 'normal', ..., 38.3, 54, 66],
       ['yes', 'adult', 'cold', ..., 38.1, 66, 12]], dtype=object)

In [52]:
X_res[:, :15] = imputerString.transform(X_res[:, :15])
X_res[:, -8:] = imputerNumeric.transform(X_res[:, -8:])

In [53]:
X_res = np.array(ct.transform(X_res))



In [54]:
X_res[:, -8:] = sc.transform(X_res[:, -8:])

In [55]:
X_res = np.asarray(X_res).astype(np.float32)

In [56]:
y_res = ann.predict(X_res)
y_res = y_res.argmax(axis=1)



In [57]:
y_res_str = le.inverse_transform(y_res)

In [58]:
pd.DataFrame(y_res_str).to_csv("submission.csv")