In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import joblib

In [2]:

# Load the dataset
file_path = 'breeds.csv'
data = pd.read_csv(file_path)

In [3]:
# Display the first few rows of the dataset
print(data.head())
print(data.info())
print(data.describe())

   temperature  pulse_rate  heart_rate   breed
0         95.7        77.3        85.5     pug
1        106.3        89.4        92.2     pug
2        100.3       122.1       118.0  beagle
3        102.0       114.5       119.6  beagle
4        106.5       133.0       111.0   labra
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 666 entries, 0 to 665
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   temperature  666 non-null    float64
 1   pulse_rate   666 non-null    float64
 2   heart_rate   666 non-null    float64
 3   breed        666 non-null    object 
dtypes: float64(3), object(1)
memory usage: 20.9+ KB
None
       temperature  pulse_rate  heart_rate
count   666.000000  666.000000  666.000000
mean    102.922973  106.556757  104.835135
std       3.435931   16.560644   13.917588
min      94.000000   73.900000   74.800000
25%     100.500000   91.025000   93.500000
50%     103.000000  114.000000  107.000000
75%

In [4]:
# Check for missing values
print(data.isnull().sum())

temperature    0
pulse_rate     0
heart_rate     0
breed          0
dtype: int64


In [5]:
# Drop missing values (if any)
data = data.dropna()

In [6]:
# Initialize the scaler
scaler = StandardScaler()

In [7]:
# Fit and transform the scaler on numerical columns
data[['temperature', 'pulse_rate', 'heart_rate']] = scaler.fit_transform(data[['temperature', 'pulse_rate', 'heart_rate']])

In [8]:
# Define normal ranges
normal_ranges = {
    'temperature': {'min': 99.5, 'max': 102.5},
    'pulse_rate': {'min': 70, 'max': 120},
    'heart_rate': {'min': 60, 'max': 100}
}


In [9]:
# Create a DataFrame for normal ranges to transform using the scaler
normal_values_df = pd.DataFrame({
    'temperature': [normal_ranges['temperature']['min'], normal_ranges['temperature']['max']],
    'pulse_rate': [normal_ranges['pulse_rate']['min'], normal_ranges['pulse_rate']['max']],
    'heart_rate': [normal_ranges['heart_rate']['min'], normal_ranges['heart_rate']['max']]
})

In [10]:
# Scale the normal ranges
scaled_normal_ranges_df = pd.DataFrame(scaler.transform(normal_values_df), columns=normal_values_df.columns)
scaled_normal_ranges = {
    'temperature': {'min': scaled_normal_ranges_df['temperature'][0], 'max': scaled_normal_ranges_df['temperature'][1]},
    'pulse_rate': {'min': scaled_normal_ranges_df['pulse_rate'][0], 'max': scaled_normal_ranges_df['pulse_rate'][1]},
    'heart_rate': {'min': scaled_normal_ranges_df['heart_rate'][0], 'max': scaled_normal_ranges_df['heart_rate'][1]}
}

In [11]:
# Function to determine if a value is within the normal range
def is_normal(value, min_val, max_val):
    return min_val <= value <= max_val

In [12]:
# Apply rules to create the 'state' column
def determine_state(row, normal_ranges):
    temp_normal = is_normal(row['temperature'], normal_ranges['temperature']['min'], normal_ranges['temperature']['max'])
    pulse_normal = is_normal(row['pulse_rate'], normal_ranges['pulse_rate']['min'], normal_ranges['pulse_rate']['max'])
    heart_normal = is_normal(row['heart_rate'], normal_ranges['heart_rate']['min'], normal_ranges['heart_rate']['max'])

    if temp_normal and pulse_normal and heart_normal:
        return 'normal'
    else:
        return 'abnormal'

In [13]:
# Apply the function to each row
data['state'] = data.apply(determine_state, axis=1, normal_ranges=scaled_normal_ranges)

In [14]:
# Features and target variable
X = data[['temperature', 'pulse_rate', 'heart_rate']]
y = data['state']


In [15]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [16]:
# Initialize and train the Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [17]:

# Predict on the test set
y_pred = model.predict(X_test)


In [18]:
# Print evaluation metrics
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))

              precision    recall  f1-score   support

    abnormal       1.00      1.00      1.00       121
      normal       1.00      1.00      1.00        13

    accuracy                           1.00       134
   macro avg       1.00      1.00      1.00       134
weighted avg       1.00      1.00      1.00       134

Accuracy: 1.0


In [20]:
# Save the trained model and scaler to files
joblib.dump(model, 'dog_health_model.pkl')
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']