In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [60]:
# Assuming your dataset is in a CSV file named 'dataset.csv'
df = pd.read_csv('F:/KA Projects/zootopia/animaldisease/animal_disease_dataset/animal_disease_dataset.csv')

In [61]:
df.head()

Unnamed: 0,Animal,Age,Temperature,Symptom 1,Symptom 2,Symptom 3,Disease
0,cow,3,103.1,depression,painless lumps,loss of appetite,pneumonia
1,buffalo,13,104.5,painless lumps,loss of appetite,depression,lumpy virus
2,sheep,1,100.5,depression,painless lumps,loss of appetite,lumpy virus
3,cow,14,100.3,loss of appetite,swelling in limb,crackling sound,blackleg
4,sheep,2,103.6,painless lumps,loss of appetite,depression,pneumonia


In [62]:
from sklearn.preprocessing import LabelEncoder

# Define the columns to be encoded
categorical_cols = ['Animal', 'Symptom 1', 'Symptom 2', 
                    'Symptom 3']

# Initialize the LabelEncoder
label_encoder_X = {}

# Encode categorical variables
for col in categorical_cols:
    label_encoder_X[col] = LabelEncoder()
    df[col + '_en'] = label_encoder_X[col].fit_transform(df[col])

# Display the first few rows of the dataset with encoded variables
print("\nEncoded Dataset:")
print(df.head())


Encoded Dataset:
    Animal  Age  Temperature         Symptom 1         Symptom 2  \
0      cow    3        103.1        depression    painless lumps   
1  buffalo   13        104.5    painless lumps  loss of appetite   
2    sheep    1        100.5        depression    painless lumps   
3      cow   14        100.3  loss of appetite  swelling in limb   
4    sheep    2        103.6    painless lumps  loss of appetite   

          Symptom 3      Disease  Animal_en  Symptom 1_en  Symptom 2_en  \
0  loss of appetite    pneumonia          1             7            12   
1        depression  lumpy virus          0            12            11   
2  loss of appetite  lumpy virus          3             7            12   
3   crackling sound     blackleg          1            11            21   
4        depression    pneumonia          3            12            11   

   Symptom 3_en  
0            11  
1             7  
2            11  
3             6  
4             7  


In [63]:
# Separate features (X) and target variable (y)
selected_columns = ['Animal_en', 'Age', 'Temperature', 'Symptom 1_en', 'Symptom 2_en', 'Symptom 3_en']
X = df[selected_columns]
y = df['Disease']

In [64]:
from sklearn.preprocessing import LabelEncoder

# Encode target variable using LabelEncoder
label_encoder_Y = LabelEncoder()
y = label_encoder_Y.fit_transform(y)

In [65]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [67]:
X.dtypes

Animal_en         int32
Age               int64
Temperature     float64
Symptom 1_en      int32
Symptom 2_en      int32
Symptom 3_en      int32
dtype: object

In [68]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Define the neural network model
model = Sequential([
    # Input layer with 10 nodes
    Dense(128, activation='relu', input_shape=(6,)),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    # Output layer with appropriate activation function
    Dense(5, activation='softmax')  # Assuming 3 classes for the output
])

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',  # Use sparse categorical crossentropy for integer labels
              metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [69]:
model.summary()

In [70]:
import numpy as np

# Assuming y_train contains your training labels
num_classes = np.unique(y_train).shape[0]
print("Number of classes:", num_classes)

Number of classes: 5


In [72]:
# Train the model (assuming X_train and y_train are your training data)
history = model.fit(X_train, y_train, validation_split=0.2, epochs=30, batch_size=32)

Epoch 1/30
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.7344 - loss: 0.5302 - val_accuracy: 0.7824 - val_loss: 0.3843
Epoch 2/30
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7367 - loss: 0.5237 - val_accuracy: 0.7981 - val_loss: 0.3528
Epoch 3/30
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7373 - loss: 0.5118 - val_accuracy: 0.7854 - val_loss: 0.3693
Epoch 4/30
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7369 - loss: 0.5141 - val_accuracy: 0.7837 - val_loss: 0.3762
Epoch 5/30
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7387 - loss: 0.5201 - val_accuracy: 0.7964 - val_loss: 0.3454
Epoch 6/30
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7387 - loss: 0.5260 - val_accuracy: 0.7997 - val_loss: 0.3573
Epoch 7/30
[1m876/876[0m 

In [73]:
import numpy as np

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print("Test Accuracy:", test_accuracy)

[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8007 - loss: 0.3448
Test Accuracy: 0.8039059042930603


In [84]:
# Save the model
model.save("model.h5")



In [76]:
from joblib import dump

# Save the label encoder classes
for col, encoder in label_encoder_X.items():
    dump(encoder, f'label_encoder_{col}_classes.joblib')
    
dump(label_encoder_Y, 'label_encoder_Y_classes.joblib')

['label_encoder_Y_classes.joblib']

In [78]:
import pickle

# Save the label encoder to a file
with open('label_encoder_X.pkl', 'wb') as f:
    pickle.dump(label_encoders_X, f)

In [79]:
import pickle

# Save the label encoder to a file
with open('label_encoder_Y.pkl', 'wb') as f:
    pickle.dump(label_encoder_Y, f)

In [80]:
from joblib import load

# Load the label encoder classes
label_encoder_X_classes = {}
for col in ['Animal', 'Symptom 1', 'Symptom 2', 'Symptom 3']:
    label_encoder_X_classes[col] = load(f'label_encoder_{col}_classes.joblib')
label_encoder_Y_classes = load('label_encoder_Y_classes.joblib')

# Print the classes for each feature
for col, encoder in label_encoder_X_classes.items():
    print(f'Classes for {col}: {encoder.classes_}')

print(f'Classes for Y: {label_encoder_Y_classes.classes_}')

Classes for Animal: ['buffalo' 'cow' 'goat' 'sheep']
Classes for Symptom 1: ['blisters on gums' 'blisters on hooves' 'blisters on mouth'
 'blisters on tongue' 'chest discomfort' 'chills' 'crackling sound'
 'depression' 'difficulty walking' 'fatigue' 'lameness' 'loss of appetite'
 'painless lumps' 'shortness of breath' 'sores on gums' 'sores on hooves'
 'sores on mouth' 'sores on tongue' 'sweats' 'swelling in abdomen'
 'swelling in extremities' 'swelling in limb' 'swelling in muscle'
 'swelling in neck']
Classes for Symptom 2: ['blisters on gums' 'blisters on hooves' 'blisters on mouth'
 'blisters on tongue' 'chest discomfort' 'chills' 'crackling sound'
 'depression' 'difficulty walking' 'fatigue' 'lameness' 'loss of appetite'
 'painless lumps' 'shortness of breath' 'sores on gums' 'sores on hooves'
 'sores on mouth' 'sores on tongue' 'sweats' 'swelling in abdomen'
 'swelling in extremities' 'swelling in limb' 'swelling in muscle'
 'swelling in neck']
Classes for Symptom 3: ['blisters o

In [83]:
import numpy as np

# Function to preprocess input data for testing
def preprocess_input(input_data, label_encoders_X):
    encoded_input = []
    for col, value in input_data.items():
        if col in label_encoder_X:
            # Use the corresponding LabelEncoder to transform feature variables
            encoded_input.append(label_encoder_X[col].transform([value])[0])
        else:
            # If the column is not categorical, use the value as is
            encoded_input.append(value)
    return np.array(encoded_input).reshape(1, -1)

# Define input data for testing
input_data = {
    'Animal': 'buffalo',
    'Age': 5,
    'Temperature': 102,
    'Symptom 1': 'difficulty walking',
    'Symptom 2': 'chest discomfort',
    'Symptom 3': 'chills'
}

# Preprocess input data for testing
preprocessed_input = preprocess_input(input_data, label_encoders_X)

# Make predictions using the trained model
predictions = model.predict(preprocessed_input)

# Get the predicted class
predicted_class_index = np.argmax(predictions)
predicted_class = label_encoder_Y.classes_[predicted_class_index]

print("Predicted Disease:", predicted_class)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 385ms/step
Predicted Disease: anthrax
