# Predicting the health of horses using an Artificial Neural Network

## Importing libraries

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [2]:
tf.__version__

'2.10.0'

## Data preprocessing

### Importing the datasets

In [3]:
df = pd.read_csv('train.csv')
df = df.drop(labels=['hospital_number', 'lesion_2', 'lesion_3', 'cp_data'], axis=1)
X = df.iloc[:, 1:-1].values
y = df.iloc[:, -1].values

In [4]:
print(X)

[['yes' 'adult' 38.1 ... 3.4 'yes' 2209]
 ['yes' 'adult' 37.5 ... 2.0 'yes' 2208]
 ['yes' 'adult' 38.3 ... 3.4 'yes' 5124]
 ...
 ['yes' 'young' 37.5 ... 7.0 'yes' 400]
 ['yes' 'adult' 38.1 ... 2.0 'yes' 2209]
 ['yes' 'adult' 38.1 ... 3.6 'yes' 2124]]


### Taking care of missing data

In [5]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values="None", strategy='most_frequent')
imputer.fit(X)
X = imputer.transform(X)

ValueError: Input contains NaN

### Encoding categorical data

#### Encoding the independent variables

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(drop='first', sparse_output=False), [0, 1, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 19, 21])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

#### Encoding the dependent variable

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le.fit_transform(y)

### Splitting the dataset into the Training set and Test set

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
print(X)

In [None]:
np.shape(X)

### Feature Scaling

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## Building the ANN

### Initializing the ANN

In [None]:
ann = tf.keras.models.Sequential()

### Adding the input layer and the first hidden layer

In [None]:
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))

### Adding the second hidden layer

In [None]:
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))

### Adding the output layer

In [None]:
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

## Training the ANN

### Compiling the ANN

In [None]:
ann.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

### Training the ANN on the Training set

In [None]:
ann.fit(X_train, y_train, batch_size=32, epochs=100)

## Predicting the Test set results

In [None]:
y_pred = ann.predict(X_test)
y_pred = (y_pred > 0.5)

## Making the Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)