In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [3]:
# Assuming your dataset is in a CSV file named 'dataset.csv'
df = pd.read_csv('G:/GitHub/Zootopia/animaldisease/animal_disease_dataset/data.csv')

In [4]:
df.head()

Unnamed: 0,AnimalName,symptoms1,symptoms2,symptoms3,symptoms4,symptoms5,Dangerous
0,Dog,Fever,Diarrhea,Vomiting,Weight loss,Dehydration,Yes
1,Dog,Fever,Diarrhea,Coughing,Tiredness,Pains,Yes
2,Dog,Fever,Diarrhea,Coughing,Vomiting,Anorexia,Yes
3,Dog,Fever,Difficulty breathing,Coughing,Lethargy,Sneezing,Yes
4,Dog,Fever,Diarrhea,Coughing,Lethargy,Blue Eye,Yes


In [5]:
from sklearn.preprocessing import LabelEncoder

# Define the columns to be encoded
categorical_cols = ['AnimalName', 'symptoms1', 'symptoms2', 
                    'symptoms3', 'symptoms4', 'symptoms5']

# Initialize the LabelEncoder
label_encoder_X = {}

# Encode categorical variables
for col in categorical_cols:
    label_encoder_X[col] = LabelEncoder()
    df[col + '_en'] = label_encoder_X[col].fit_transform(df[col])

# Display the first few rows of the dataset with encoded variables
print("\nEncoded Dataset:")
print(df.head())


Encoded Dataset:
  AnimalName symptoms1             symptoms2 symptoms3    symptoms4  \
0        Dog     Fever              Diarrhea  Vomiting  Weight loss   
1        Dog     Fever              Diarrhea  Coughing    Tiredness   
2        Dog     Fever              Diarrhea  Coughing     Vomiting   
3        Dog     Fever  Difficulty breathing  Coughing     Lethargy   
4        Dog     Fever              Diarrhea  Coughing     Lethargy   

     symptoms5 Dangerous  AnimalName_en  symptoms1_en  symptoms2_en  \
0  Dehydration       Yes              6            63            31   
1        Pains       Yes              6            63            31   
2     Anorexia       Yes              6            63            31   
3     Sneezing       Yes              6            63            34   
4     Blue Eye       Yes              6            63            31   

   symptoms3_en  symptoms4_en  symptoms5_en  
0           179           182            32  
1            31           165       

In [6]:
# Separate features (X) and target variable (y)
selected_columns = ['AnimalName_en', 'symptoms1_en', 'symptoms2_en', 'symptoms3_en', 'symptoms4_en', 'symptoms5_en']
X = df[selected_columns]
y = df['Dangerous']

In [7]:
from sklearn.preprocessing import LabelEncoder

# Encode target variable using LabelEncoder
label_encoder_Y = LabelEncoder()
y = label_encoder_Y.fit_transform(y)

In [8]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
X.dtypes

AnimalName_en    int32
symptoms1_en     int32
symptoms2_en     int32
symptoms3_en     int32
symptoms4_en     int32
symptoms5_en     int32
dtype: object

In [10]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Define the neural network model
model = Sequential([
    # Input layer with 10 nodes
    Dense(128, activation='relu', input_shape=(6,)),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    # Output layer with appropriate activation function
    Dense(5, activation='softmax')  # Assuming 3 classes for the output
])

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',  # Use sparse categorical crossentropy for integer labels
              metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [11]:
model.summary()

In [12]:
import numpy as np

# Assuming y_train contains your training labels
num_classes = np.unique(y_train).shape[0]
print("Number of classes:", num_classes)

Number of classes: 3


In [13]:
# Train the model (assuming X_train and y_train are your training data)
history = model.fit(X_train, y_train, validation_split=0.2, epochs=30, batch_size=32)

Epoch 1/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 22ms/step - accuracy: 0.5643 - loss: 16.9296 - val_accuracy: 0.9643 - val_loss: 3.4609
Epoch 2/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9426 - loss: 3.1306 - val_accuracy: 0.9643 - val_loss: 3.5140
Epoch 3/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9545 - loss: 2.2129 - val_accuracy: 0.9643 - val_loss: 2.8681
Epoch 4/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9396 - loss: 2.8908 - val_accuracy: 0.9643 - val_loss: 2.4591
Epoch 5/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9514 - loss: 2.1381 - val_accuracy: 0.9643 - val_loss: 2.1377
Epoch 6/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9306 - loss: 2.5674 - val_accuracy: 0.9643 - val_loss: 1.8044
Epoch 7/30
[1m18/18[0m [32m━━━━━━━━

In [14]:
import numpy as np

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print("Test Accuracy:", test_accuracy)

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 419us/step - accuracy: 0.9938 - loss: 0.0515
Test Accuracy: 0.9885714054107666


In [15]:
# Save the model
model.save("model.h5")



In [16]:
from joblib import dump

# Save the label encoder classes
for col, encoder in label_encoder_X.items():
    dump(encoder, f'label_encoder_{col}_classes.joblib')
    
dump(label_encoder_Y, 'label_encoder_Y_classes.joblib')

['label_encoder_Y_classes.joblib']

In [18]:
import pickle

# Save the label encoder to a file
with open('label_encoder_X.pkl', 'wb') as f:
    pickle.dump(label_encoder_X, f)

In [19]:
import pickle

# Save the label encoder to a file
with open('label_encoder_Y.pkl', 'wb') as f:
    pickle.dump(label_encoder_Y, f)

In [24]:
from joblib import load

# Load the label encoder classes
label_encoder_X_classes = {}
for col in ['AnimalName', 'symptoms1', 'symptoms2', 'symptoms3', 'symptoms4', 'symptoms5']:
    label_encoder_X_classes[col] = load(f'label_encoder_{col}_classes.joblib')
label_encoder_Y_classes = load('label_encoder_Y_classes.joblib')

# Print the classes for each feature
for col, encoder in label_encoder_X_classes.items():
    print(f'Classes for {col}: {encoder.classes_}')

print(f'Classes for Y: {label_encoder_Y_classes.classes_}')

Classes for AnimalName: ['Birds' 'Black-tailed deer' 'Buffaloes' 'Cattle' 'Chicken' 'Deer' 'Dog'
 'Dogs' 'Donkey' 'Duck' 'Elephant' 'Elk' 'Fowl' 'Fox' 'Fox ' 'Goat'
 'Goats' 'Hamster' 'Horse' 'Hyaenas' 'Lion' 'Monkey' 'Moos' 'Mule deer'
 'Other Birds' 'Pig' 'Pigs' 'Rabbit' 'Reindeer' 'Sheep' 'Sika deer'
 'Tiger' 'Turtle' 'Wapiti' 'White-tailed deer' 'Wolves' 'cat' 'cattle'
 'chicken' 'cow' 'donkey' 'duck' 'horse' 'mammal' 'mules' 'snake']
Classes for symptoms1: [' abortion at the end of gestation' 'Abdominal pain' 'Abortion '
 'Abortion on late pregancy' 'Air sacculitis' 'Allergic Reaction'
 'Animal gets uneasy' 'Apathy' 'Appetite varies' 'Attacks' 'Bad breath'
 'Balance problem' 'Bald patches' 'Barren ewes' 'Berberi'
 'Bleeding from other parts of the body' 'Blood from mouth'
 'Blood in urine' 'Blood stool' 'Bloody Diarhhea' 'Bloody Urine'
 'Bluish white opacity' 'Bony Lesion' 'Bubbles in the mouth' 'Cell Damage'
 'Cessation of eructation' 'Changes in outer layer skin' 'Constant pacin

In [28]:
import numpy as np

# Function to preprocess input data for testing
def preprocess_input(input_data, label_encoders_X):
    encoded_input = []
    for col, value in input_data.items():
        if col in label_encoder_X:
            # Use the corresponding LabelEncoder to transform feature variables
            encoded_input.append(label_encoder_X[col].transform([value])[0])
        else:
            # If the column is not categorical, use the value as is
            encoded_input.append(value)
    return np.array(encoded_input).reshape(1, -1)

# Define input data for testing
input_data = {
    'AnimalName': 'Buffaloes',
    'symptoms1': 'Indigestion',
    'symptoms2': 'Infection',
    'symptoms3': 'Drooling',
    'symptoms4': 'Appetite',
    'symptoms5': 'Muscle stiffness'
}

# Preprocess input data for testing
preprocessed_input = preprocess_input(input_data, label_encoder_X)

# Make predictions using the trained model
predictions = model.predict(preprocessed_input)

# Get the predicted class
predicted_class_index = np.argmax(predictions)
predicted_class = label_encoder_Y.classes_[predicted_class_index]

print("IS it dangerous:", predicted_class)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step
Predicted Disease: Yes
